blob: b232b0242e515c27159298fe66eabe53235847c9 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Serhiy Storchakaf24131f2015-04-16 11:19:43 +030014/*[clinic input]
15module _io
16class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
17class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
18[clinic start generated code]*/
19/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
20
21/*[python input]
22class io_ssize_t_converter(CConverter):
23 type = 'Py_ssize_t'
24 converter = '_PyIO_ConvertSsize_t'
25[python start generated code]*/
26/*[python end generated code: output=da39a3ee5e6b4b0d input=d0a811d3cbfd1b33]*/
27
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020028_Py_IDENTIFIER(close);
29_Py_IDENTIFIER(_dealloc_warn);
30_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020031_Py_IDENTIFIER(fileno);
32_Py_IDENTIFIER(flush);
33_Py_IDENTIFIER(getpreferredencoding);
34_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020035_Py_IDENTIFIER(mode);
36_Py_IDENTIFIER(name);
37_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020038_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020039_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020040_Py_IDENTIFIER(readable);
41_Py_IDENTIFIER(replace);
42_Py_IDENTIFIER(reset);
43_Py_IDENTIFIER(seek);
44_Py_IDENTIFIER(seekable);
45_Py_IDENTIFIER(setstate);
46_Py_IDENTIFIER(tell);
47_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020048
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000049/* TextIOBase */
50
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000051PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000052 "Base class for text I/O.\n"
53 "\n"
54 "This class provides a character and line based interface to stream\n"
55 "I/O. There is no readinto method because Python's character strings\n"
56 "are immutable. There is no public constructor.\n"
57 );
58
59static PyObject *
60_unsupported(const char *message)
61{
Antoine Pitrou712cb732013-12-21 15:51:54 +010062 _PyIO_State *state = IO_STATE();
63 if (state != NULL)
64 PyErr_SetString(state->unsupported_operation, message);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000065 return NULL;
66}
67
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000068PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000069 "Separate the underlying buffer from the TextIOBase and return it.\n"
70 "\n"
71 "After the underlying buffer has been detached, the TextIO is in an\n"
72 "unusable state.\n"
73 );
74
75static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000076textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000077{
78 return _unsupported("detach");
79}
80
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000081PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000082 "Read at most n characters from stream.\n"
83 "\n"
84 "Read from underlying buffer until we have n characters or we hit EOF.\n"
85 "If n is negative or omitted, read until EOF.\n"
86 );
87
88static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000089textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000090{
91 return _unsupported("read");
92}
93
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000094PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000095 "Read until newline or EOF.\n"
96 "\n"
97 "Returns an empty string if EOF is hit immediately.\n"
98 );
99
100static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000101textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000102{
103 return _unsupported("readline");
104}
105
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000106PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000107 "Write string to stream.\n"
108 "Returns the number of characters written (which is always equal to\n"
109 "the length of the string).\n"
110 );
111
112static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000113textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000114{
115 return _unsupported("write");
116}
117
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000118PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000119 "Encoding of the text stream.\n"
120 "\n"
121 "Subclasses should override.\n"
122 );
123
124static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000125textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000126{
127 Py_RETURN_NONE;
128}
129
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000130PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000131 "Line endings translated so far.\n"
132 "\n"
133 "Only line endings translated during reading are considered.\n"
134 "\n"
135 "Subclasses should override.\n"
136 );
137
138static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000139textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000140{
141 Py_RETURN_NONE;
142}
143
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000144PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000145 "The error setting of the decoder or encoder.\n"
146 "\n"
147 "Subclasses should override.\n"
148 );
149
150static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000151textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000152{
153 Py_RETURN_NONE;
154}
155
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000156
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000157static PyMethodDef textiobase_methods[] = {
158 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
159 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
160 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
161 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000162 {NULL, NULL}
163};
164
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000165static PyGetSetDef textiobase_getset[] = {
166 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
167 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
168 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000169 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000170};
171
172PyTypeObject PyTextIOBase_Type = {
173 PyVarObject_HEAD_INIT(NULL, 0)
174 "_io._TextIOBase", /*tp_name*/
175 0, /*tp_basicsize*/
176 0, /*tp_itemsize*/
177 0, /*tp_dealloc*/
178 0, /*tp_print*/
179 0, /*tp_getattr*/
180 0, /*tp_setattr*/
181 0, /*tp_compare */
182 0, /*tp_repr*/
183 0, /*tp_as_number*/
184 0, /*tp_as_sequence*/
185 0, /*tp_as_mapping*/
186 0, /*tp_hash */
187 0, /*tp_call*/
188 0, /*tp_str*/
189 0, /*tp_getattro*/
190 0, /*tp_setattro*/
191 0, /*tp_as_buffer*/
Antoine Pitrou796564c2013-07-30 19:59:21 +0200192 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
193 | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000194 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000195 0, /* tp_traverse */
196 0, /* tp_clear */
197 0, /* tp_richcompare */
198 0, /* tp_weaklistoffset */
199 0, /* tp_iter */
200 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000201 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000202 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000203 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000204 &PyIOBase_Type, /* tp_base */
205 0, /* tp_dict */
206 0, /* tp_descr_get */
207 0, /* tp_descr_set */
208 0, /* tp_dictoffset */
209 0, /* tp_init */
210 0, /* tp_alloc */
211 0, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +0200212 0, /* tp_free */
213 0, /* tp_is_gc */
214 0, /* tp_bases */
215 0, /* tp_mro */
216 0, /* tp_cache */
217 0, /* tp_subclasses */
218 0, /* tp_weaklist */
219 0, /* tp_del */
220 0, /* tp_version_tag */
221 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000222};
223
224
225/* IncrementalNewlineDecoder */
226
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000227typedef struct {
228 PyObject_HEAD
229 PyObject *decoder;
230 PyObject *errors;
Victor Stinner7d7e7752014-06-17 23:31:25 +0200231 unsigned int pendingcr: 1;
232 unsigned int translate: 1;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000233 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000234} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000235
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300236/*[clinic input]
237_io.IncrementalNewlineDecoder.__init__
238 decoder: object
239 translate: int
240 errors: object(c_default="NULL") = "strict"
241
242Codec used when reading a file in universal newlines mode.
243
244It wraps another incremental decoder, translating \r\n and \r into \n.
245It also records the types of newlines encountered. When used with
246translate=False, it ensures that the newline sequence is returned in
247one piece. When used with decoder=None, it expects unicode strings as
248decode input and translates newlines without first invoking an external
249decoder.
250[clinic start generated code]*/
251
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000252static int
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300253_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
254 PyObject *decoder, int translate,
255 PyObject *errors)
256/*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000257{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000258 self->decoder = decoder;
259 Py_INCREF(decoder);
260
261 if (errors == NULL) {
262 self->errors = PyUnicode_FromString("strict");
263 if (self->errors == NULL)
264 return -1;
265 }
266 else {
267 Py_INCREF(errors);
268 self->errors = errors;
269 }
270
271 self->translate = translate;
272 self->seennl = 0;
273 self->pendingcr = 0;
274
275 return 0;
276}
277
278static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000279incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000280{
281 Py_CLEAR(self->decoder);
282 Py_CLEAR(self->errors);
283 Py_TYPE(self)->tp_free((PyObject *)self);
284}
285
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200286static int
287check_decoded(PyObject *decoded)
288{
289 if (decoded == NULL)
290 return -1;
291 if (!PyUnicode_Check(decoded)) {
292 PyErr_Format(PyExc_TypeError,
293 "decoder should return a string result, not '%.200s'",
294 Py_TYPE(decoded)->tp_name);
295 Py_DECREF(decoded);
296 return -1;
297 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200298 if (PyUnicode_READY(decoded) < 0) {
299 Py_DECREF(decoded);
300 return -1;
301 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200302 return 0;
303}
304
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000305#define SEEN_CR 1
306#define SEEN_LF 2
307#define SEEN_CRLF 4
308#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
309
310PyObject *
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200311_PyIncrementalNewlineDecoder_decode(PyObject *myself,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000312 PyObject *input, int final)
313{
314 PyObject *output;
315 Py_ssize_t output_len;
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200316 nldecoder_object *self = (nldecoder_object *) myself;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000317
318 if (self->decoder == NULL) {
319 PyErr_SetString(PyExc_ValueError,
320 "IncrementalNewlineDecoder.__init__ not called");
321 return NULL;
322 }
323
324 /* decode input (with the eventual \r from a previous pass) */
325 if (self->decoder != Py_None) {
326 output = PyObject_CallMethodObjArgs(self->decoder,
327 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
328 }
329 else {
330 output = input;
331 Py_INCREF(output);
332 }
333
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200334 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000335 return NULL;
336
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200337 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000338 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200339 /* Prefix output with CR */
340 int kind;
341 PyObject *modified;
342 char *out;
343
344 modified = PyUnicode_New(output_len + 1,
345 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000346 if (modified == NULL)
347 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200348 kind = PyUnicode_KIND(modified);
349 out = PyUnicode_DATA(modified);
350 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200351 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000352 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200353 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000354 self->pendingcr = 0;
355 output_len++;
356 }
357
358 /* retain last \r even when not translating data:
359 * then readline() is sure to get \r\n in one pass
360 */
361 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000362 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200363 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
364 {
365 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
366 if (modified == NULL)
367 goto error;
368 Py_DECREF(output);
369 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000370 self->pendingcr = 1;
371 }
372 }
373
374 /* Record which newlines are read and do newline translation if desired,
375 all in one pass. */
376 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200377 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000378 Py_ssize_t len;
379 int seennl = self->seennl;
380 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200381 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000382
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200383 in_str = PyUnicode_DATA(output);
384 len = PyUnicode_GET_LENGTH(output);
385 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000386
387 if (len == 0)
388 return output;
389
390 /* If, up to now, newlines are consistently \n, do a quick check
391 for the \r *byte* with the libc's optimized memchr.
392 */
393 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200394 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000395 }
396
Antoine Pitrou66913e22009-03-06 23:40:56 +0000397 if (only_lf) {
398 /* If not already seen, quick scan for a possible "\n" character.
399 (there's nothing else to be done, even when in translation mode)
400 */
401 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200402 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100403 if (kind == PyUnicode_1BYTE_KIND)
404 seennl |= SEEN_LF;
405 else {
406 Py_ssize_t i = 0;
407 for (;;) {
408 Py_UCS4 c;
409 /* Fast loop for non-control characters */
410 while (PyUnicode_READ(kind, in_str, i) > '\n')
411 i++;
412 c = PyUnicode_READ(kind, in_str, i++);
413 if (c == '\n') {
414 seennl |= SEEN_LF;
415 break;
416 }
417 if (i >= len)
418 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000419 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000420 }
421 }
422 /* Finished: we have scanned for newlines, and none of them
423 need translating */
424 }
425 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200426 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000427 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000428 if (seennl == SEEN_ALL)
429 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000430 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200431 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000432 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200433 while (PyUnicode_READ(kind, in_str, i) > '\r')
434 i++;
435 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000436 if (c == '\n')
437 seennl |= SEEN_LF;
438 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200439 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000440 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200441 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000442 }
443 else
444 seennl |= SEEN_CR;
445 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200446 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000447 break;
448 if (seennl == SEEN_ALL)
449 break;
450 }
451 endscan:
452 ;
453 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000454 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200455 void *translated;
456 int kind = PyUnicode_KIND(output);
457 void *in_str = PyUnicode_DATA(output);
458 Py_ssize_t in, out;
459 /* XXX: Previous in-place translation here is disabled as
460 resizing is not possible anymore */
461 /* We could try to optimize this so that we only do a copy
462 when there is something to translate. On the other hand,
463 we already know there is a \r byte, so chances are high
464 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200465 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200466 if (translated == NULL) {
467 PyErr_NoMemory();
468 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000469 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200470 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000471 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200472 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000473 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200474 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
475 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000476 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200477 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000478 seennl |= SEEN_LF;
479 continue;
480 }
481 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200482 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000483 in++;
484 seennl |= SEEN_CRLF;
485 }
486 else
487 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200488 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000489 continue;
490 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200491 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000492 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200493 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000494 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200495 Py_DECREF(output);
496 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100497 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200498 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200499 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000500 }
501 self->seennl |= seennl;
502 }
503
504 return output;
505
506 error:
507 Py_DECREF(output);
508 return NULL;
509}
510
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300511/*[clinic input]
512_io.IncrementalNewlineDecoder.decode
513 input: object
514 final: int(c_default="0") = False
515[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000516
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300517static PyObject *
518_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
519 PyObject *input, int final)
520/*[clinic end generated code: output=0d486755bb37a66e input=d65677385bfd6827]*/
521{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000522 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
523}
524
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300525/*[clinic input]
526_io.IncrementalNewlineDecoder.getstate
527[clinic start generated code]*/
528
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000529static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300530_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
531/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000532{
533 PyObject *buffer;
534 unsigned PY_LONG_LONG flag;
535
536 if (self->decoder != Py_None) {
537 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
538 _PyIO_str_getstate, NULL);
539 if (state == NULL)
540 return NULL;
Serhiy Storchakabb72c472015-04-19 20:38:19 +0300541 if (!PyArg_ParseTuple(state, "OK", &buffer, &flag)) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000542 Py_DECREF(state);
543 return NULL;
544 }
545 Py_INCREF(buffer);
546 Py_DECREF(state);
547 }
548 else {
549 buffer = PyBytes_FromString("");
550 flag = 0;
551 }
552 flag <<= 1;
553 if (self->pendingcr)
554 flag |= 1;
555 return Py_BuildValue("NK", buffer, flag);
556}
557
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300558/*[clinic input]
559_io.IncrementalNewlineDecoder.setstate
560 state: object
561 /
562[clinic start generated code]*/
563
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000564static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300565_io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
566 PyObject *state)
567/*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000568{
569 PyObject *buffer;
570 unsigned PY_LONG_LONG flag;
571
Serhiy Storchakabb72c472015-04-19 20:38:19 +0300572 if (!PyArg_ParseTuple(state, "OK", &buffer, &flag))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000573 return NULL;
574
Victor Stinner7d7e7752014-06-17 23:31:25 +0200575 self->pendingcr = (int) (flag & 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000576 flag >>= 1;
577
578 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200579 return _PyObject_CallMethodId(self->decoder,
580 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000581 else
582 Py_RETURN_NONE;
583}
584
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300585/*[clinic input]
586_io.IncrementalNewlineDecoder.reset
587[clinic start generated code]*/
588
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000589static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300590_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
591/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000592{
593 self->seennl = 0;
594 self->pendingcr = 0;
595 if (self->decoder != Py_None)
596 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
597 else
598 Py_RETURN_NONE;
599}
600
601static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000602incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000603{
604 switch (self->seennl) {
605 case SEEN_CR:
606 return PyUnicode_FromString("\r");
607 case SEEN_LF:
608 return PyUnicode_FromString("\n");
609 case SEEN_CRLF:
610 return PyUnicode_FromString("\r\n");
611 case SEEN_CR | SEEN_LF:
612 return Py_BuildValue("ss", "\r", "\n");
613 case SEEN_CR | SEEN_CRLF:
614 return Py_BuildValue("ss", "\r", "\r\n");
615 case SEEN_LF | SEEN_CRLF:
616 return Py_BuildValue("ss", "\n", "\r\n");
617 case SEEN_CR | SEEN_LF | SEEN_CRLF:
618 return Py_BuildValue("sss", "\r", "\n", "\r\n");
619 default:
620 Py_RETURN_NONE;
621 }
622
623}
624
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000625/* TextIOWrapper */
626
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000627typedef PyObject *
628 (*encodefunc_t)(PyObject *, PyObject *);
629
630typedef struct
631{
632 PyObject_HEAD
633 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000634 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000635 Py_ssize_t chunk_size;
636 PyObject *buffer;
637 PyObject *encoding;
638 PyObject *encoder;
639 PyObject *decoder;
640 PyObject *readnl;
641 PyObject *errors;
642 const char *writenl; /* utf-8 encoded, NULL stands for \n */
643 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200644 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000645 char readuniversal;
646 char readtranslate;
647 char writetranslate;
648 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200649 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000650 char telling;
Antoine Pitrou796564c2013-07-30 19:59:21 +0200651 char finalizing;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000652 /* Specialized encoding func (see below) */
653 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000654 /* Whether or not it's the start of the stream */
655 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000656
657 /* Reads and writes are internally buffered in order to speed things up.
658 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000659
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000660 Please also note that text to be written is first encoded before being
661 buffered. This is necessary so that encoding errors are immediately
662 reported to the caller, but it unfortunately means that the
663 IncrementalEncoder (whose encode() method is always written in Python)
664 becomes a bottleneck for small writes.
665 */
666 PyObject *decoded_chars; /* buffer for text returned from decoder */
667 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
668 PyObject *pending_bytes; /* list of bytes objects waiting to be
669 written, or NULL */
670 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000671
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000672 /* snapshot is either None, or a tuple (dec_flags, next_input) where
673 * dec_flags is the second (integer) item of the decoder state and
674 * next_input is the chunk of input bytes that comes next after the
675 * snapshot point. We use this to reconstruct decoder states in tell().
676 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000677 PyObject *snapshot;
678 /* Bytes-to-characters ratio for the current chunk. Serves as input for
679 the heuristic in tell(). */
680 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000681
682 /* Cache raw object if it's a FileIO object */
683 PyObject *raw;
684
685 PyObject *weakreflist;
686 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000687} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000688
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000689/* A couple of specialized cases in order to bypass the slow incremental
690 encoding methods for the most popular encodings. */
691
692static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000693ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000694{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200695 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000696}
697
698static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000699utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000700{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100701 return _PyUnicode_EncodeUTF16(text,
702 PyBytes_AS_STRING(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000703}
704
705static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000706utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000707{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100708 return _PyUnicode_EncodeUTF16(text,
709 PyBytes_AS_STRING(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000710}
711
712static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000713utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000714{
Antoine Pitroue4501852009-05-14 18:55:55 +0000715 if (!self->encoding_start_of_stream) {
716 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200717#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000718 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000719#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000720 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000721#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000722 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100723 return _PyUnicode_EncodeUTF16(text,
724 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000725}
726
Antoine Pitroue4501852009-05-14 18:55:55 +0000727static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000728utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000729{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100730 return _PyUnicode_EncodeUTF32(text,
731 PyBytes_AS_STRING(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000732}
733
734static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000735utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000736{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100737 return _PyUnicode_EncodeUTF32(text,
738 PyBytes_AS_STRING(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000739}
740
741static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000742utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000743{
744 if (!self->encoding_start_of_stream) {
745 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200746#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000747 return utf32be_encode(self, text);
748#else
749 return utf32le_encode(self, text);
750#endif
751 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100752 return _PyUnicode_EncodeUTF32(text,
753 PyBytes_AS_STRING(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000754}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000755
756static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000757utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000758{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200759 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000760}
761
762static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000763latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000764{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200765 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000766}
767
768/* Map normalized encoding names onto the specialized encoding funcs */
769
770typedef struct {
771 const char *name;
772 encodefunc_t encodefunc;
773} encodefuncentry;
774
Antoine Pitrou24f36292009-03-28 22:16:42 +0000775static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000776 {"ascii", (encodefunc_t) ascii_encode},
777 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000778 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000779 {"utf-16-be", (encodefunc_t) utf16be_encode},
780 {"utf-16-le", (encodefunc_t) utf16le_encode},
781 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000782 {"utf-32-be", (encodefunc_t) utf32be_encode},
783 {"utf-32-le", (encodefunc_t) utf32le_encode},
784 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000785 {NULL, NULL}
786};
787
788
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300789/*[clinic input]
790_io.TextIOWrapper.__init__
791 buffer: object
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700792 encoding: str(accept={str, NoneType}) = NULL
793 errors: str(accept={str, NoneType}) = NULL
794 newline: str(accept={str, NoneType}) = NULL
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300795 line_buffering: int(c_default="0") = False
796 write_through: int(c_default="0") = False
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000797
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300798Character and line based layer over a BufferedIOBase object, buffer.
799
800encoding gives the name of the encoding that the stream will be
801decoded or encoded with. It defaults to locale.getpreferredencoding(False).
802
803errors determines the strictness of encoding and decoding (see
804help(codecs.Codec) or the documentation for codecs.register) and
805defaults to "strict".
806
807newline controls how line endings are handled. It can be None, '',
808'\n', '\r', and '\r\n'. It works as follows:
809
810* On input, if newline is None, universal newlines mode is
811 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
812 these are translated into '\n' before being returned to the
813 caller. If it is '', universal newline mode is enabled, but line
814 endings are returned to the caller untranslated. If it has any of
815 the other legal values, input lines are only terminated by the given
816 string, and the line ending is returned to the caller untranslated.
817
818* On output, if newline is None, any '\n' characters written are
819 translated to the system default line separator, os.linesep. If
820 newline is '' or '\n', no translation takes place. If newline is any
821 of the other legal values, any '\n' characters written are translated
822 to the given string.
823
824If line_buffering is True, a call to flush is implied when a call to
825write contains a newline character.
826[clinic start generated code]*/
827
828static int
829_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
830 const char *encoding, const char *errors,
831 const char *newline, int line_buffering,
832 int write_through)
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700833/*[clinic end generated code: output=56a83402ce2a8381 input=3126cb3101a2c99b]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300834{
835 PyObject *raw, *codec_info = NULL;
836 _PyIO_State *state = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000837 PyObject *res;
838 int r;
839
840 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000841 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000842
843 if (newline && newline[0] != '\0'
844 && !(newline[0] == '\n' && newline[1] == '\0')
845 && !(newline[0] == '\r' && newline[1] == '\0')
846 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
847 PyErr_Format(PyExc_ValueError,
848 "illegal newline value: %s", newline);
849 return -1;
850 }
851
852 Py_CLEAR(self->buffer);
853 Py_CLEAR(self->encoding);
854 Py_CLEAR(self->encoder);
855 Py_CLEAR(self->decoder);
856 Py_CLEAR(self->readnl);
857 Py_CLEAR(self->decoded_chars);
858 Py_CLEAR(self->pending_bytes);
859 Py_CLEAR(self->snapshot);
860 Py_CLEAR(self->errors);
861 Py_CLEAR(self->raw);
862 self->decoded_chars_used = 0;
863 self->pending_bytes_count = 0;
864 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000865 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000866
867 if (encoding == NULL) {
868 /* Try os.device_encoding(fileno) */
869 PyObject *fileno;
Antoine Pitrou712cb732013-12-21 15:51:54 +0100870 state = IO_STATE();
871 if (state == NULL)
872 goto error;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200873 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000874 /* Ignore only AttributeError and UnsupportedOperation */
875 if (fileno == NULL) {
876 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
877 PyErr_ExceptionMatches(state->unsupported_operation)) {
878 PyErr_Clear();
879 }
880 else {
881 goto error;
882 }
883 }
884 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +0200885 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -0500886 Py_DECREF(fileno);
887 if (fd == -1 && PyErr_Occurred()) {
888 goto error;
889 }
890
891 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000892 if (self->encoding == NULL)
893 goto error;
894 else if (!PyUnicode_Check(self->encoding))
895 Py_CLEAR(self->encoding);
896 }
897 }
898 if (encoding == NULL && self->encoding == NULL) {
Antoine Pitrou932ff832013-08-01 21:04:50 +0200899 PyObject *locale_module = _PyIO_get_locale_module(state);
900 if (locale_module == NULL)
901 goto catch_ImportError;
902 self->encoding = _PyObject_CallMethodId(
903 locale_module, &PyId_getpreferredencoding, "O", Py_False);
904 Py_DECREF(locale_module);
905 if (self->encoding == NULL) {
906 catch_ImportError:
907 /*
Martin Panter7462b6492015-11-02 03:37:02 +0000908 Importing locale can raise an ImportError because of
909 _functools, and locale.getpreferredencoding can raise an
Antoine Pitrou932ff832013-08-01 21:04:50 +0200910 ImportError if _locale is not available. These will happen
911 during module building.
912 */
913 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
914 PyErr_Clear();
915 self->encoding = PyUnicode_FromString("ascii");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000916 }
Antoine Pitrou932ff832013-08-01 21:04:50 +0200917 else
918 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000919 }
Antoine Pitrou932ff832013-08-01 21:04:50 +0200920 else if (!PyUnicode_Check(self->encoding))
921 Py_CLEAR(self->encoding);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000922 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000923 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000924 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000925 if (encoding == NULL)
926 goto error;
927 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000928 else if (encoding != NULL) {
929 self->encoding = PyUnicode_FromString(encoding);
930 if (self->encoding == NULL)
931 goto error;
932 }
933 else {
934 PyErr_SetString(PyExc_IOError,
935 "could not determine default encoding");
936 }
937
Nick Coghlana9b15242014-02-04 22:11:18 +1000938 /* Check we have been asked for a real text encoding */
939 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
940 if (codec_info == NULL) {
941 Py_CLEAR(self->encoding);
942 goto error;
943 }
944
945 /* XXX: Failures beyond this point have the potential to leak elements
946 * of the partially constructed object (like self->encoding)
947 */
948
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000949 if (errors == NULL)
950 errors = "strict";
951 self->errors = PyBytes_FromString(errors);
952 if (self->errors == NULL)
953 goto error;
954
955 self->chunk_size = 8192;
956 self->readuniversal = (newline == NULL || newline[0] == '\0');
957 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200958 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000959 self->readtranslate = (newline == NULL);
960 if (newline) {
961 self->readnl = PyUnicode_FromString(newline);
962 if (self->readnl == NULL)
Nick Coghlana9b15242014-02-04 22:11:18 +1000963 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000964 }
965 self->writetranslate = (newline == NULL || newline[0] != '\0');
966 if (!self->readuniversal && self->readnl) {
967 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000968 if (self->writenl == NULL)
969 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000970 if (!strcmp(self->writenl, "\n"))
971 self->writenl = NULL;
972 }
973#ifdef MS_WINDOWS
974 else
975 self->writenl = "\r\n";
976#endif
977
978 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200979 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000980 if (res == NULL)
981 goto error;
982 r = PyObject_IsTrue(res);
983 Py_DECREF(res);
984 if (r == -1)
985 goto error;
986 if (r == 1) {
Nick Coghlana9b15242014-02-04 22:11:18 +1000987 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info,
988 errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000989 if (self->decoder == NULL)
990 goto error;
991
992 if (self->readuniversal) {
993 PyObject *incrementalDecoder = PyObject_CallFunction(
994 (PyObject *)&PyIncrementalNewlineDecoder_Type,
995 "Oi", self->decoder, (int)self->readtranslate);
996 if (incrementalDecoder == NULL)
997 goto error;
998 Py_CLEAR(self->decoder);
999 self->decoder = incrementalDecoder;
1000 }
1001 }
1002
1003 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001004 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001005 if (res == NULL)
1006 goto error;
1007 r = PyObject_IsTrue(res);
1008 Py_DECREF(res);
1009 if (r == -1)
1010 goto error;
1011 if (r == 1) {
Nick Coghlana9b15242014-02-04 22:11:18 +10001012 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info,
1013 errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001014 if (self->encoder == NULL)
1015 goto error;
1016 /* Get the normalized named of the codec */
Nick Coghlana9b15242014-02-04 22:11:18 +10001017 res = _PyObject_GetAttrId(codec_info, &PyId_name);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001018 if (res == NULL) {
1019 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1020 PyErr_Clear();
1021 else
1022 goto error;
1023 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001024 else if (PyUnicode_Check(res)) {
1025 encodefuncentry *e = encodefuncs;
1026 while (e->name != NULL) {
1027 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1028 self->encodefunc = e->encodefunc;
1029 break;
1030 }
1031 e++;
1032 }
1033 }
1034 Py_XDECREF(res);
1035 }
1036
Nick Coghlana9b15242014-02-04 22:11:18 +10001037 /* Finished sorting out the codec details */
Benjamin Peterson6c14f232014-11-12 10:19:46 -05001038 Py_CLEAR(codec_info);
Nick Coghlana9b15242014-02-04 22:11:18 +10001039
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001040 self->buffer = buffer;
1041 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001042
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001043 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1044 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1045 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001046 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001047 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001048 if (raw == NULL) {
1049 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1050 PyErr_Clear();
1051 else
1052 goto error;
1053 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001054 else if (Py_TYPE(raw) == &PyFileIO_Type)
1055 self->raw = raw;
1056 else
1057 Py_DECREF(raw);
1058 }
1059
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001060 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001061 if (res == NULL)
1062 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001063 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001064 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001065 if (r < 0)
1066 goto error;
1067 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001068
Martin v. Löwis767046a2011-10-14 15:35:36 +02001069 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001070
Antoine Pitroue4501852009-05-14 18:55:55 +00001071 self->encoding_start_of_stream = 0;
1072 if (self->seekable && self->encoder) {
1073 PyObject *cookieObj;
1074 int cmp;
1075
1076 self->encoding_start_of_stream = 1;
1077
1078 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1079 if (cookieObj == NULL)
1080 goto error;
1081
1082 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1083 Py_DECREF(cookieObj);
1084 if (cmp < 0) {
1085 goto error;
1086 }
1087
1088 if (cmp == 0) {
1089 self->encoding_start_of_stream = 0;
1090 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1091 _PyIO_zero, NULL);
1092 if (res == NULL)
1093 goto error;
1094 Py_DECREF(res);
1095 }
1096 }
1097
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001098 self->ok = 1;
1099 return 0;
1100
1101 error:
Nick Coghlana9b15242014-02-04 22:11:18 +10001102 Py_XDECREF(codec_info);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001103 return -1;
1104}
1105
1106static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001107_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001108{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001109 self->ok = 0;
1110 Py_CLEAR(self->buffer);
1111 Py_CLEAR(self->encoding);
1112 Py_CLEAR(self->encoder);
1113 Py_CLEAR(self->decoder);
1114 Py_CLEAR(self->readnl);
1115 Py_CLEAR(self->decoded_chars);
1116 Py_CLEAR(self->pending_bytes);
1117 Py_CLEAR(self->snapshot);
1118 Py_CLEAR(self->errors);
1119 Py_CLEAR(self->raw);
1120 return 0;
1121}
1122
1123static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001124textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001125{
Antoine Pitrou796564c2013-07-30 19:59:21 +02001126 self->finalizing = 1;
1127 if (_PyIOBase_finalize((PyObject *) self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001128 return;
Antoine Pitrou796564c2013-07-30 19:59:21 +02001129 _textiowrapper_clear(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001130 _PyObject_GC_UNTRACK(self);
1131 if (self->weakreflist != NULL)
1132 PyObject_ClearWeakRefs((PyObject *)self);
1133 Py_CLEAR(self->dict);
1134 Py_TYPE(self)->tp_free((PyObject *)self);
1135}
1136
1137static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001138textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001139{
1140 Py_VISIT(self->buffer);
1141 Py_VISIT(self->encoding);
1142 Py_VISIT(self->encoder);
1143 Py_VISIT(self->decoder);
1144 Py_VISIT(self->readnl);
1145 Py_VISIT(self->decoded_chars);
1146 Py_VISIT(self->pending_bytes);
1147 Py_VISIT(self->snapshot);
1148 Py_VISIT(self->errors);
1149 Py_VISIT(self->raw);
1150
1151 Py_VISIT(self->dict);
1152 return 0;
1153}
1154
1155static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001156textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001157{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001158 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001159 return -1;
1160 Py_CLEAR(self->dict);
1161 return 0;
1162}
1163
1164static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001165textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001166
1167/* This macro takes some shortcuts to make the common case faster. */
1168#define CHECK_CLOSED(self) \
1169 do { \
1170 int r; \
1171 PyObject *_res; \
1172 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1173 if (self->raw != NULL) \
1174 r = _PyFileIO_closed(self->raw); \
1175 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001176 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001177 if (_res == NULL) \
1178 return NULL; \
1179 r = PyObject_IsTrue(_res); \
1180 Py_DECREF(_res); \
1181 if (r < 0) \
1182 return NULL; \
1183 } \
1184 if (r > 0) { \
1185 PyErr_SetString(PyExc_ValueError, \
1186 "I/O operation on closed file."); \
1187 return NULL; \
1188 } \
1189 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001190 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001191 return NULL; \
1192 } while (0)
1193
1194#define CHECK_INITIALIZED(self) \
1195 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001196 PyErr_SetString(PyExc_ValueError, \
1197 "I/O operation on uninitialized object"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001198 return NULL; \
1199 }
1200
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001201#define CHECK_ATTACHED(self) \
1202 CHECK_INITIALIZED(self); \
1203 if (self->detached) { \
1204 PyErr_SetString(PyExc_ValueError, \
1205 "underlying buffer has been detached"); \
1206 return NULL; \
1207 }
1208
1209#define CHECK_ATTACHED_INT(self) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001210 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001211 PyErr_SetString(PyExc_ValueError, \
1212 "I/O operation on uninitialized object"); \
1213 return -1; \
1214 } else if (self->detached) { \
1215 PyErr_SetString(PyExc_ValueError, \
1216 "underlying buffer has been detached"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001217 return -1; \
1218 }
1219
1220
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001221/*[clinic input]
1222_io.TextIOWrapper.detach
1223[clinic start generated code]*/
1224
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001225static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001226_io_TextIOWrapper_detach_impl(textio *self)
1227/*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001228{
1229 PyObject *buffer, *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001230 CHECK_ATTACHED(self);
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001231 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1232 if (res == NULL)
1233 return NULL;
1234 Py_DECREF(res);
1235 buffer = self->buffer;
1236 self->buffer = NULL;
1237 self->detached = 1;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001238 return buffer;
1239}
1240
Antoine Pitrou24f36292009-03-28 22:16:42 +00001241/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001242 underlying buffered object, though. */
1243static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001244_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001245{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001246 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001247
1248 if (self->pending_bytes == NULL)
1249 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001250
1251 pending = self->pending_bytes;
1252 Py_INCREF(pending);
1253 self->pending_bytes_count = 0;
1254 Py_CLEAR(self->pending_bytes);
1255
1256 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1257 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001258 if (b == NULL)
1259 return -1;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001260 ret = NULL;
1261 do {
1262 ret = PyObject_CallMethodObjArgs(self->buffer,
1263 _PyIO_str_write, b, NULL);
1264 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001265 Py_DECREF(b);
1266 if (ret == NULL)
1267 return -1;
1268 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001269 return 0;
1270}
1271
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001272/*[clinic input]
1273_io.TextIOWrapper.write
1274 text: unicode
1275 /
1276[clinic start generated code]*/
1277
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001278static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001279_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1280/*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001281{
1282 PyObject *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001283 PyObject *b;
1284 Py_ssize_t textlen;
1285 int haslf = 0;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001286 int needflush = 0, text_needflush = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001287
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001288 if (PyUnicode_READY(text) == -1)
1289 return NULL;
1290
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001291 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001292 CHECK_CLOSED(self);
1293
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001294 if (self->encoder == NULL)
1295 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001296
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001297 Py_INCREF(text);
1298
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001299 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001300
1301 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001302 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001303 haslf = 1;
1304
1305 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001306 PyObject *newtext = _PyObject_CallMethodId(
1307 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001308 Py_DECREF(text);
1309 if (newtext == NULL)
1310 return NULL;
1311 text = newtext;
1312 }
1313
Antoine Pitroue96ec682011-07-23 21:46:35 +02001314 if (self->write_through)
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001315 text_needflush = 1;
1316 if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001317 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001318 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001319 needflush = 1;
1320
1321 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001322 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001323 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001324 self->encoding_start_of_stream = 0;
1325 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001326 else
1327 b = PyObject_CallMethodObjArgs(self->encoder,
1328 _PyIO_str_encode, text, NULL);
1329 Py_DECREF(text);
1330 if (b == NULL)
1331 return NULL;
1332
1333 if (self->pending_bytes == NULL) {
1334 self->pending_bytes = PyList_New(0);
1335 if (self->pending_bytes == NULL) {
1336 Py_DECREF(b);
1337 return NULL;
1338 }
1339 self->pending_bytes_count = 0;
1340 }
1341 if (PyList_Append(self->pending_bytes, b) < 0) {
1342 Py_DECREF(b);
1343 return NULL;
1344 }
1345 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1346 Py_DECREF(b);
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001347 if (self->pending_bytes_count > self->chunk_size || needflush ||
1348 text_needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001349 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001350 return NULL;
1351 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001352
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001353 if (needflush) {
1354 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1355 if (ret == NULL)
1356 return NULL;
1357 Py_DECREF(ret);
1358 }
1359
1360 Py_CLEAR(self->snapshot);
1361
1362 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001363 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001364 if (ret == NULL)
1365 return NULL;
1366 Py_DECREF(ret);
1367 }
1368
1369 return PyLong_FromSsize_t(textlen);
1370}
1371
1372/* Steal a reference to chars and store it in the decoded_char buffer;
1373 */
1374static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001375textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001376{
1377 Py_CLEAR(self->decoded_chars);
1378 self->decoded_chars = chars;
1379 self->decoded_chars_used = 0;
1380}
1381
1382static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001383textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001384{
1385 PyObject *chars;
1386 Py_ssize_t avail;
1387
1388 if (self->decoded_chars == NULL)
1389 return PyUnicode_FromStringAndSize(NULL, 0);
1390
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001391 /* decoded_chars is guaranteed to be "ready". */
1392 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001393 - self->decoded_chars_used);
1394
1395 assert(avail >= 0);
1396
1397 if (n < 0 || n > avail)
1398 n = avail;
1399
1400 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001401 chars = PyUnicode_Substring(self->decoded_chars,
1402 self->decoded_chars_used,
1403 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001404 if (chars == NULL)
1405 return NULL;
1406 }
1407 else {
1408 chars = self->decoded_chars;
1409 Py_INCREF(chars);
1410 }
1411
1412 self->decoded_chars_used += n;
1413 return chars;
1414}
1415
1416/* Read and decode the next chunk of data from the BufferedReader.
1417 */
1418static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001419textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001420{
1421 PyObject *dec_buffer = NULL;
1422 PyObject *dec_flags = NULL;
1423 PyObject *input_chunk = NULL;
Antoine Pitroub8503892014-04-29 10:14:02 +02001424 Py_buffer input_chunk_buf;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001425 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001426 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001427 int eof;
1428
1429 /* The return value is True unless EOF was reached. The decoded string is
1430 * placed in self._decoded_chars (replacing its previous value). The
1431 * entire input chunk is sent to the decoder, though some of it may remain
1432 * buffered in the decoder, yet to be converted.
1433 */
1434
1435 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001436 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001437 return -1;
1438 }
1439
1440 if (self->telling) {
1441 /* To prepare for tell(), we need to snapshot a point in the file
1442 * where the decoder's input buffer is empty.
1443 */
1444
1445 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1446 _PyIO_str_getstate, NULL);
1447 if (state == NULL)
1448 return -1;
1449 /* Given this, we know there was a valid snapshot point
1450 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1451 */
Serhiy Storchakabb72c472015-04-19 20:38:19 +03001452 if (PyArg_ParseTuple(state, "OO", &dec_buffer, &dec_flags) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001453 Py_DECREF(state);
1454 return -1;
1455 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001456
1457 if (!PyBytes_Check(dec_buffer)) {
1458 PyErr_Format(PyExc_TypeError,
1459 "decoder getstate() should have returned a bytes "
1460 "object, not '%.200s'",
1461 Py_TYPE(dec_buffer)->tp_name);
1462 Py_DECREF(state);
1463 return -1;
1464 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001465 Py_INCREF(dec_buffer);
1466 Py_INCREF(dec_flags);
1467 Py_DECREF(state);
1468 }
1469
1470 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001471 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001472 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001473 }
1474 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001475 if (chunk_size == NULL)
1476 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001477
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001478 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001479 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1480 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001481 Py_DECREF(chunk_size);
1482 if (input_chunk == NULL)
1483 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001484
1485 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001486 PyErr_Format(PyExc_TypeError,
Antoine Pitroub8503892014-04-29 10:14:02 +02001487 "underlying %s() should have returned a bytes-like object, "
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001488 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1489 Py_TYPE(input_chunk)->tp_name);
1490 goto fail;
1491 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001492
Antoine Pitroub8503892014-04-29 10:14:02 +02001493 nbytes = input_chunk_buf.len;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001494 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001495 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1496 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1497 self->decoder, input_chunk, eof);
1498 }
1499 else {
1500 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1501 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1502 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001503 PyBuffer_Release(&input_chunk_buf);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001504
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001505 if (check_decoded(decoded_chars) < 0)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001506 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001507 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001508 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001509 if (nchars > 0)
1510 self->b2cratio = (double) nbytes / nchars;
1511 else
1512 self->b2cratio = 0.0;
1513 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001514 eof = 0;
1515
1516 if (self->telling) {
1517 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1518 * next input to be decoded is dec_buffer + input_chunk.
1519 */
Antoine Pitroub8503892014-04-29 10:14:02 +02001520 PyObject *next_input = dec_buffer;
1521 PyBytes_Concat(&next_input, input_chunk);
1522 if (next_input == NULL) {
1523 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001524 goto fail;
1525 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001526 Py_CLEAR(self->snapshot);
1527 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1528 }
1529 Py_DECREF(input_chunk);
1530
1531 return (eof == 0);
1532
1533 fail:
1534 Py_XDECREF(dec_buffer);
1535 Py_XDECREF(dec_flags);
1536 Py_XDECREF(input_chunk);
1537 return -1;
1538}
1539
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001540/*[clinic input]
1541_io.TextIOWrapper.read
1542 size as n: io_ssize_t = -1
1543 /
1544[clinic start generated code]*/
1545
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001546static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001547_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1548/*[clinic end generated code: output=7e651ce6cc6a25a6 input=8c09398424085cca]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001549{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001550 PyObject *result = NULL, *chunks = NULL;
1551
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001552 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001553 CHECK_CLOSED(self);
1554
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001555 if (self->decoder == NULL)
1556 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001557
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001558 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001559 return NULL;
1560
1561 if (n < 0) {
1562 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001563 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001564 PyObject *decoded;
1565 if (bytes == NULL)
1566 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001567
1568 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1569 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1570 bytes, 1);
1571 else
1572 decoded = PyObject_CallMethodObjArgs(
1573 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001574 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001575 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001576 goto fail;
1577
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001578 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001579
1580 if (result == NULL) {
1581 Py_DECREF(decoded);
1582 return NULL;
1583 }
1584
1585 PyUnicode_AppendAndDel(&result, decoded);
1586 if (result == NULL)
1587 goto fail;
1588
1589 Py_CLEAR(self->snapshot);
1590 return result;
1591 }
1592 else {
1593 int res = 1;
1594 Py_ssize_t remaining = n;
1595
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001596 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001597 if (result == NULL)
1598 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001599 if (PyUnicode_READY(result) == -1)
1600 goto fail;
1601 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001602
1603 /* Keep reading chunks until we have n characters to return */
1604 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001605 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001606 if (res < 0) {
1607 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1608 when EINTR occurs so we needn't do it ourselves. */
1609 if (_PyIO_trap_eintr()) {
1610 continue;
1611 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001612 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001613 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001614 if (res == 0) /* EOF */
1615 break;
1616 if (chunks == NULL) {
1617 chunks = PyList_New(0);
1618 if (chunks == NULL)
1619 goto fail;
1620 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001621 if (PyUnicode_GET_LENGTH(result) > 0 &&
1622 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001623 goto fail;
1624 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001625 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001626 if (result == NULL)
1627 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001628 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001629 }
1630 if (chunks != NULL) {
1631 if (result != NULL && PyList_Append(chunks, result) < 0)
1632 goto fail;
1633 Py_CLEAR(result);
1634 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1635 if (result == NULL)
1636 goto fail;
1637 Py_CLEAR(chunks);
1638 }
1639 return result;
1640 }
1641 fail:
1642 Py_XDECREF(result);
1643 Py_XDECREF(chunks);
1644 return NULL;
1645}
1646
1647
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001648/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001649 that is to the NUL character. Otherwise the function will produce
1650 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001651static char *
1652find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001653{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001654 if (kind == PyUnicode_1BYTE_KIND) {
1655 assert(ch < 256);
1656 return (char *) memchr((void *) s, (char) ch, end - s);
1657 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001658 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001659 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001660 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001661 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001662 return s;
1663 if (s == end)
1664 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001665 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001666 }
1667}
1668
1669Py_ssize_t
1670_PyIO_find_line_ending(
1671 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001672 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001673{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001674 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001675
1676 if (translated) {
1677 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001678 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001679 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001680 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001681 else {
1682 *consumed = len;
1683 return -1;
1684 }
1685 }
1686 else if (universal) {
1687 /* Universal newline search. Find any of \r, \r\n, \n
1688 * The decoder ensures that \r\n are not split in two pieces
1689 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001690 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001691 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001692 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001693 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001694 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001695 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001696 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001697 if (s >= end) {
1698 *consumed = len;
1699 return -1;
1700 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001701 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001702 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001703 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001704 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001705 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001706 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001707 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001708 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001709 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001710 }
1711 }
1712 }
1713 else {
1714 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001715 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
Victor Stinner706768c2014-08-16 01:03:39 +02001716 Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001717 /* Assume that readnl is an ASCII character. */
1718 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001719 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001720 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001721 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001722 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001723 *consumed = len;
1724 return -1;
1725 }
1726 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001727 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001728 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001729 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001730 if (e < s)
1731 e = s;
1732 while (s < e) {
1733 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001734 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001735 if (pos == NULL || pos >= e)
1736 break;
1737 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001738 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001739 break;
1740 }
1741 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001742 return (pos - start)/kind + readnl_len;
1743 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001744 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001745 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001746 if (pos == NULL)
1747 *consumed = len;
1748 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001749 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001750 return -1;
1751 }
1752 }
1753}
1754
1755static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001756_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001757{
1758 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1759 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1760 int res;
1761
1762 CHECK_CLOSED(self);
1763
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001764 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001765 return NULL;
1766
1767 chunked = 0;
1768
1769 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001770 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001771 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001772 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001773 Py_ssize_t consumed = 0;
1774
1775 /* First, get some data if necessary */
1776 res = 1;
1777 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001778 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001779 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07001780 if (res < 0) {
1781 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1782 when EINTR occurs so we needn't do it ourselves. */
1783 if (_PyIO_trap_eintr()) {
1784 continue;
1785 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001786 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07001787 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001788 if (res == 0)
1789 break;
1790 }
1791 if (res == 0) {
1792 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001793 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001794 Py_CLEAR(self->snapshot);
1795 start = endpos = offset_to_buffer = 0;
1796 break;
1797 }
1798
1799 if (remaining == NULL) {
1800 line = self->decoded_chars;
1801 start = self->decoded_chars_used;
1802 offset_to_buffer = 0;
1803 Py_INCREF(line);
1804 }
1805 else {
1806 assert(self->decoded_chars_used == 0);
1807 line = PyUnicode_Concat(remaining, self->decoded_chars);
1808 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001809 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001810 Py_CLEAR(remaining);
1811 if (line == NULL)
1812 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001813 if (PyUnicode_READY(line) == -1)
1814 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001815 }
1816
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001817 ptr = PyUnicode_DATA(line);
1818 line_len = PyUnicode_GET_LENGTH(line);
1819 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001820
1821 endpos = _PyIO_find_line_ending(
1822 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001823 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001824 ptr + kind * start,
1825 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001826 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001827 if (endpos >= 0) {
1828 endpos += start;
1829 if (limit >= 0 && (endpos - start) + chunked >= limit)
1830 endpos = start + limit - chunked;
1831 break;
1832 }
1833
1834 /* We can put aside up to `endpos` */
1835 endpos = consumed + start;
1836 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1837 /* Didn't find line ending, but reached length limit */
1838 endpos = start + limit - chunked;
1839 break;
1840 }
1841
1842 if (endpos > start) {
1843 /* No line ending seen yet - put aside current data */
1844 PyObject *s;
1845 if (chunks == NULL) {
1846 chunks = PyList_New(0);
1847 if (chunks == NULL)
1848 goto error;
1849 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001850 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001851 if (s == NULL)
1852 goto error;
1853 if (PyList_Append(chunks, s) < 0) {
1854 Py_DECREF(s);
1855 goto error;
1856 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001857 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001858 Py_DECREF(s);
1859 }
1860 /* There may be some remaining bytes we'll have to prepend to the
1861 next chunk of data */
1862 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001863 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001864 if (remaining == NULL)
1865 goto error;
1866 }
1867 Py_CLEAR(line);
1868 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001869 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001870 }
1871
1872 if (line != NULL) {
1873 /* Our line ends in the current buffer */
1874 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001875 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1876 PyObject *s = PyUnicode_Substring(line, start, endpos);
1877 Py_CLEAR(line);
1878 if (s == NULL)
1879 goto error;
1880 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001881 }
1882 }
1883 if (remaining != NULL) {
1884 if (chunks == NULL) {
1885 chunks = PyList_New(0);
1886 if (chunks == NULL)
1887 goto error;
1888 }
1889 if (PyList_Append(chunks, remaining) < 0)
1890 goto error;
1891 Py_CLEAR(remaining);
1892 }
1893 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001894 if (line != NULL) {
1895 if (PyList_Append(chunks, line) < 0)
1896 goto error;
1897 Py_DECREF(line);
1898 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001899 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1900 if (line == NULL)
1901 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001902 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001903 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001904 if (line == NULL) {
1905 Py_INCREF(_PyIO_empty_str);
1906 line = _PyIO_empty_str;
1907 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001908
1909 return line;
1910
1911 error:
1912 Py_XDECREF(chunks);
1913 Py_XDECREF(remaining);
1914 Py_XDECREF(line);
1915 return NULL;
1916}
1917
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001918/*[clinic input]
1919_io.TextIOWrapper.readline
1920 size: Py_ssize_t = -1
1921 /
1922[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001923
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001924static PyObject *
1925_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
1926/*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
1927{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001928 CHECK_ATTACHED(self);
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001929 return _textiowrapper_readline(self, size);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001930}
1931
1932/* Seek and Tell */
1933
1934typedef struct {
1935 Py_off_t start_pos;
1936 int dec_flags;
1937 int bytes_to_feed;
1938 int chars_to_skip;
1939 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001940} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001941
1942/*
1943 To speed up cookie packing/unpacking, we store the fields in a temporary
1944 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1945 The following macros define at which offsets in the intermediary byte
1946 string the various CookieStruct fields will be stored.
1947 */
1948
1949#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1950
Christian Heimes743e0cd2012-10-17 23:52:17 +02001951#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001952/* We want the least significant byte of start_pos to also be the least
1953 significant byte of the cookie, which means that in big-endian mode we
1954 must copy the fields in reverse order. */
1955
1956# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1957# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1958# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1959# define OFF_CHARS_TO_SKIP (sizeof(char))
1960# define OFF_NEED_EOF 0
1961
1962#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001963/* Little-endian mode: the least significant byte of start_pos will
1964 naturally end up the least significant byte of the cookie. */
1965
1966# define OFF_START_POS 0
1967# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1968# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1969# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1970# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1971
1972#endif
1973
1974static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001975textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001976{
1977 unsigned char buffer[COOKIE_BUF_LEN];
1978 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1979 if (cookieLong == NULL)
1980 return -1;
1981
1982 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02001983 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001984 Py_DECREF(cookieLong);
1985 return -1;
1986 }
1987 Py_DECREF(cookieLong);
1988
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001989 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1990 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1991 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1992 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1993 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001994
1995 return 0;
1996}
1997
1998static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001999textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002000{
2001 unsigned char buffer[COOKIE_BUF_LEN];
2002
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002003 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2004 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2005 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2006 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2007 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002008
Christian Heimes743e0cd2012-10-17 23:52:17 +02002009 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2010 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002011}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002012
2013static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002014_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002015{
2016 PyObject *res;
2017 /* When seeking to the start of the stream, we call decoder.reset()
2018 rather than decoder.getstate().
2019 This is for a few decoders such as utf-16 for which the state value
2020 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2021 utf-16, that we are expecting a BOM).
2022 */
2023 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2024 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2025 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002026 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2027 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002028 if (res == NULL)
2029 return -1;
2030 Py_DECREF(res);
2031 return 0;
2032}
2033
Antoine Pitroue4501852009-05-14 18:55:55 +00002034static int
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002035_textiowrapper_encoder_reset(textio *self, int start_of_stream)
Antoine Pitroue4501852009-05-14 18:55:55 +00002036{
2037 PyObject *res;
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002038 if (start_of_stream) {
Antoine Pitroue4501852009-05-14 18:55:55 +00002039 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2040 self->encoding_start_of_stream = 1;
2041 }
2042 else {
2043 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2044 _PyIO_zero, NULL);
2045 self->encoding_start_of_stream = 0;
2046 }
2047 if (res == NULL)
2048 return -1;
2049 Py_DECREF(res);
2050 return 0;
2051}
2052
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002053static int
2054_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2055{
2056 /* Same as _textiowrapper_decoder_setstate() above. */
2057 return _textiowrapper_encoder_reset(
2058 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2059}
2060
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002061/*[clinic input]
2062_io.TextIOWrapper.seek
2063 cookie as cookieObj: object
2064 whence: int = 0
2065 /
2066[clinic start generated code]*/
2067
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002068static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002069_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2070/*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002071{
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002072 PyObject *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002073 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002074 PyObject *res;
2075 int cmp;
2076
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002077 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002078 CHECK_CLOSED(self);
2079
2080 Py_INCREF(cookieObj);
2081
2082 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002083 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002084 goto fail;
2085 }
2086
2087 if (whence == 1) {
2088 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002089 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002090 if (cmp < 0)
2091 goto fail;
2092
2093 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002094 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002095 goto fail;
2096 }
2097
2098 /* Seeking to the current position should attempt to
2099 * sync the underlying buffer with the current position.
2100 */
2101 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002102 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002103 if (cookieObj == NULL)
2104 goto fail;
2105 }
2106 else if (whence == 2) {
2107 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002108 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002109 if (cmp < 0)
2110 goto fail;
2111
2112 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002113 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002114 goto fail;
2115 }
2116
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002117 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002118 if (res == NULL)
2119 goto fail;
2120 Py_DECREF(res);
2121
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002122 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002123 Py_CLEAR(self->snapshot);
2124 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002125 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002126 if (res == NULL)
2127 goto fail;
2128 Py_DECREF(res);
2129 }
2130
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002131 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002132 Py_CLEAR(cookieObj);
2133 if (res == NULL)
2134 goto fail;
2135 if (self->encoder) {
2136 /* If seek() == 0, we are at the start of stream, otherwise not */
2137 cmp = PyObject_RichCompareBool(res, _PyIO_zero, Py_EQ);
2138 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2139 Py_DECREF(res);
2140 goto fail;
2141 }
2142 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002143 return res;
2144 }
2145 else if (whence != 0) {
2146 PyErr_Format(PyExc_ValueError,
2147 "invalid whence (%d, should be 0, 1 or 2)", whence);
2148 goto fail;
2149 }
2150
Antoine Pitroue4501852009-05-14 18:55:55 +00002151 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002152 if (cmp < 0)
2153 goto fail;
2154
2155 if (cmp == 1) {
2156 PyErr_Format(PyExc_ValueError,
2157 "negative seek position %R", cookieObj);
2158 goto fail;
2159 }
2160
2161 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2162 if (res == NULL)
2163 goto fail;
2164 Py_DECREF(res);
2165
2166 /* The strategy of seek() is to go back to the safe start point
2167 * and replay the effect of read(chars_to_skip) from there.
2168 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002169 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002170 goto fail;
2171
2172 /* Seek back to the safe start point. */
2173 posobj = PyLong_FromOff_t(cookie.start_pos);
2174 if (posobj == NULL)
2175 goto fail;
2176 res = PyObject_CallMethodObjArgs(self->buffer,
2177 _PyIO_str_seek, posobj, NULL);
2178 Py_DECREF(posobj);
2179 if (res == NULL)
2180 goto fail;
2181 Py_DECREF(res);
2182
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002183 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002184 Py_CLEAR(self->snapshot);
2185
2186 /* Restore the decoder to its state from the safe start point. */
2187 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002188 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002189 goto fail;
2190 }
2191
2192 if (cookie.chars_to_skip) {
2193 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002194 PyObject *input_chunk = _PyObject_CallMethodId(
2195 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002196 PyObject *decoded;
2197
2198 if (input_chunk == NULL)
2199 goto fail;
2200
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002201 if (!PyBytes_Check(input_chunk)) {
2202 PyErr_Format(PyExc_TypeError,
2203 "underlying read() should have returned a bytes "
2204 "object, not '%.200s'",
2205 Py_TYPE(input_chunk)->tp_name);
2206 Py_DECREF(input_chunk);
2207 goto fail;
2208 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002209
2210 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2211 if (self->snapshot == NULL) {
2212 Py_DECREF(input_chunk);
2213 goto fail;
2214 }
2215
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002216 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2217 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002218
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002219 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002220 goto fail;
2221
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002222 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002223
2224 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002225 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002226 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2227 goto fail;
2228 }
2229 self->decoded_chars_used = cookie.chars_to_skip;
2230 }
2231 else {
2232 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2233 if (self->snapshot == NULL)
2234 goto fail;
2235 }
2236
Antoine Pitroue4501852009-05-14 18:55:55 +00002237 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2238 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002239 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002240 goto fail;
2241 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002242 return cookieObj;
2243 fail:
2244 Py_XDECREF(cookieObj);
2245 return NULL;
2246
2247}
2248
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002249/*[clinic input]
2250_io.TextIOWrapper.tell
2251[clinic start generated code]*/
2252
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002253static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002254_io_TextIOWrapper_tell_impl(textio *self)
2255/*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002256{
2257 PyObject *res;
2258 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002259 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002260 PyObject *next_input;
2261 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002262 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002263 PyObject *saved_state = NULL;
2264 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002265 Py_ssize_t dec_buffer_len;
2266 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002267
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002268 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002269 CHECK_CLOSED(self);
2270
2271 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002272 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002273 goto fail;
2274 }
2275 if (!self->telling) {
2276 PyErr_SetString(PyExc_IOError,
2277 "telling position disabled by next() call");
2278 goto fail;
2279 }
2280
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002281 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002282 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002283 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002284 if (res == NULL)
2285 goto fail;
2286 Py_DECREF(res);
2287
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002288 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002289 if (posobj == NULL)
2290 goto fail;
2291
2292 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002293 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002294 return posobj;
2295 }
2296
2297#if defined(HAVE_LARGEFILE_SUPPORT)
2298 cookie.start_pos = PyLong_AsLongLong(posobj);
2299#else
2300 cookie.start_pos = PyLong_AsLong(posobj);
2301#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002302 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002303 if (PyErr_Occurred())
2304 goto fail;
2305
2306 /* Skip backward to the snapshot point (see _read_chunk). */
Serhiy Storchakabb72c472015-04-19 20:38:19 +03002307 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002308 goto fail;
2309
2310 assert (PyBytes_Check(next_input));
2311
2312 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2313
2314 /* How many decoded characters have been used up since the snapshot? */
2315 if (self->decoded_chars_used == 0) {
2316 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002317 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002318 }
2319
2320 chars_to_skip = self->decoded_chars_used;
2321
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002322 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002323 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2324 _PyIO_str_getstate, NULL);
2325 if (saved_state == NULL)
2326 goto fail;
2327
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002328#define DECODER_GETSTATE() do { \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002329 PyObject *dec_buffer; \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002330 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2331 _PyIO_str_getstate, NULL); \
2332 if (_state == NULL) \
2333 goto fail; \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002334 if (!PyArg_ParseTuple(_state, "Oi", &dec_buffer, &dec_flags)) { \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002335 Py_DECREF(_state); \
2336 goto fail; \
2337 } \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002338 if (!PyBytes_Check(dec_buffer)) { \
2339 PyErr_Format(PyExc_TypeError, \
2340 "decoder getstate() should have returned a bytes " \
2341 "object, not '%.200s'", \
2342 Py_TYPE(dec_buffer)->tp_name); \
2343 Py_DECREF(_state); \
2344 goto fail; \
2345 } \
2346 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002347 Py_DECREF(_state); \
2348 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002349
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002350#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002351 PyObject *_decoded = _PyObject_CallMethodId( \
2352 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002353 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002354 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002355 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002356 Py_DECREF(_decoded); \
2357 } while (0)
2358
2359 /* Fast search for an acceptable start point, close to our
2360 current pos */
2361 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2362 skip_back = 1;
2363 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2364 input = PyBytes_AS_STRING(next_input);
2365 while (skip_bytes > 0) {
2366 /* Decode up to temptative start point */
2367 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2368 goto fail;
2369 DECODER_DECODE(input, skip_bytes, chars_decoded);
2370 if (chars_decoded <= chars_to_skip) {
2371 DECODER_GETSTATE();
2372 if (dec_buffer_len == 0) {
2373 /* Before pos and no bytes buffered in decoder => OK */
2374 cookie.dec_flags = dec_flags;
2375 chars_to_skip -= chars_decoded;
2376 break;
2377 }
2378 /* Skip back by buffered amount and reset heuristic */
2379 skip_bytes -= dec_buffer_len;
2380 skip_back = 1;
2381 }
2382 else {
2383 /* We're too far ahead, skip back a bit */
2384 skip_bytes -= skip_back;
2385 skip_back *= 2;
2386 }
2387 }
2388 if (skip_bytes <= 0) {
2389 skip_bytes = 0;
2390 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2391 goto fail;
2392 }
2393
2394 /* Note our initial start point. */
2395 cookie.start_pos += skip_bytes;
Victor Stinner9a282972013-06-24 23:01:33 +02002396 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002397 if (chars_to_skip == 0)
2398 goto finally;
2399
2400 /* We should be close to the desired position. Now feed the decoder one
2401 * byte at a time until we reach the `chars_to_skip` target.
2402 * As we go, note the nearest "safe start point" before the current
2403 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002404 * can safely start from there and advance to this location).
2405 */
2406 chars_decoded = 0;
2407 input = PyBytes_AS_STRING(next_input);
2408 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002409 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002410 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002411 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002412
Serhiy Storchakaec67d182013-08-20 20:04:47 +03002413 DECODER_DECODE(input, (Py_ssize_t)1, n);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002414 /* We got n chars for 1 byte */
2415 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002416 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002417 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002418
2419 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2420 /* Decoder buffer is empty, so this is a safe start point. */
2421 cookie.start_pos += cookie.bytes_to_feed;
2422 chars_to_skip -= chars_decoded;
2423 cookie.dec_flags = dec_flags;
2424 cookie.bytes_to_feed = 0;
2425 chars_decoded = 0;
2426 }
2427 if (chars_decoded >= chars_to_skip)
2428 break;
2429 input++;
2430 }
2431 if (input == input_end) {
2432 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002433 PyObject *decoded = _PyObject_CallMethodId(
2434 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002435 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002436 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002437 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002438 Py_DECREF(decoded);
2439 cookie.need_eof = 1;
2440
2441 if (chars_decoded < chars_to_skip) {
2442 PyErr_SetString(PyExc_IOError,
2443 "can't reconstruct logical file position");
2444 goto fail;
2445 }
2446 }
2447
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002448finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002449 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002450 Py_DECREF(saved_state);
2451 if (res == NULL)
2452 return NULL;
2453 Py_DECREF(res);
2454
2455 /* The returned cookie corresponds to the last safe start point. */
2456 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002457 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002458
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002459fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002460 if (saved_state) {
2461 PyObject *type, *value, *traceback;
2462 PyErr_Fetch(&type, &value, &traceback);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002463 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002464 _PyErr_ChainExceptions(type, value, traceback);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002465 Py_DECREF(saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002466 Py_XDECREF(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002467 }
2468 return NULL;
2469}
2470
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002471/*[clinic input]
2472_io.TextIOWrapper.truncate
2473 pos: object = None
2474 /
2475[clinic start generated code]*/
2476
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002477static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002478_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2479/*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002480{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002481 PyObject *res;
2482
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002483 CHECK_ATTACHED(self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002484
2485 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2486 if (res == NULL)
2487 return NULL;
2488 Py_DECREF(res);
2489
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002490 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002491}
2492
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002493static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002494textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002495{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002496 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002497
2498 CHECK_INITIALIZED(self);
2499
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002500 res = PyUnicode_FromString("<_io.TextIOWrapper");
2501 if (res == NULL)
2502 return NULL;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002503
Martin v. Löwis767046a2011-10-14 15:35:36 +02002504 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002505 if (nameobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002506 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitrou716c4442009-05-23 19:04:03 +00002507 PyErr_Clear();
2508 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002509 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002510 }
2511 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002512 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002513 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002514 if (s == NULL)
2515 goto error;
2516 PyUnicode_AppendAndDel(&res, s);
2517 if (res == NULL)
2518 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002519 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002520 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002521 if (modeobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002522 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002523 PyErr_Clear();
2524 else
2525 goto error;
2526 }
2527 else {
2528 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2529 Py_DECREF(modeobj);
2530 if (s == NULL)
2531 goto error;
2532 PyUnicode_AppendAndDel(&res, s);
2533 if (res == NULL)
2534 return NULL;
2535 }
2536 s = PyUnicode_FromFormat("%U encoding=%R>",
2537 res, self->encoding);
2538 Py_DECREF(res);
2539 return s;
2540error:
2541 Py_XDECREF(res);
2542 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002543}
2544
2545
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002546/* Inquiries */
2547
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002548/*[clinic input]
2549_io.TextIOWrapper.fileno
2550[clinic start generated code]*/
2551
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002552static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002553_io_TextIOWrapper_fileno_impl(textio *self)
2554/*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002555{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002556 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002557 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002558}
2559
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002560/*[clinic input]
2561_io.TextIOWrapper.seekable
2562[clinic start generated code]*/
2563
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002564static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002565_io_TextIOWrapper_seekable_impl(textio *self)
2566/*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002567{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002568 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002569 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002570}
2571
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002572/*[clinic input]
2573_io.TextIOWrapper.readable
2574[clinic start generated code]*/
2575
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002576static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002577_io_TextIOWrapper_readable_impl(textio *self)
2578/*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002579{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002580 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002581 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002582}
2583
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002584/*[clinic input]
2585_io.TextIOWrapper.writable
2586[clinic start generated code]*/
2587
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002588static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002589_io_TextIOWrapper_writable_impl(textio *self)
2590/*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002591{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002592 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002593 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002594}
2595
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002596/*[clinic input]
2597_io.TextIOWrapper.isatty
2598[clinic start generated code]*/
2599
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002600static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002601_io_TextIOWrapper_isatty_impl(textio *self)
2602/*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002603{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002604 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002605 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002606}
2607
2608static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002609textiowrapper_getstate(textio *self, PyObject *args)
2610{
2611 PyErr_Format(PyExc_TypeError,
2612 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2613 return NULL;
2614}
2615
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002616/*[clinic input]
2617_io.TextIOWrapper.flush
2618[clinic start generated code]*/
2619
Antoine Pitrou243757e2010-11-05 21:15:39 +00002620static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002621_io_TextIOWrapper_flush_impl(textio *self)
2622/*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002623{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002624 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002625 CHECK_CLOSED(self);
2626 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002627 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002628 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002629 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002630}
2631
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002632/*[clinic input]
2633_io.TextIOWrapper.close
2634[clinic start generated code]*/
2635
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002636static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002637_io_TextIOWrapper_close_impl(textio *self)
2638/*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002639{
2640 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002641 int r;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002642 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002643
Antoine Pitrou6be88762010-05-03 16:48:20 +00002644 res = textiowrapper_closed_get(self, NULL);
2645 if (res == NULL)
2646 return NULL;
2647 r = PyObject_IsTrue(res);
2648 Py_DECREF(res);
2649 if (r < 0)
2650 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002651
Antoine Pitrou6be88762010-05-03 16:48:20 +00002652 if (r > 0) {
2653 Py_RETURN_NONE; /* stream already closed */
2654 }
2655 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06002656 PyObject *exc = NULL, *val, *tb;
Antoine Pitrou796564c2013-07-30 19:59:21 +02002657 if (self->finalizing) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002658 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002659 if (res)
2660 Py_DECREF(res);
2661 else
2662 PyErr_Clear();
2663 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002664 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson68623612012-12-20 11:53:11 -06002665 if (res == NULL)
2666 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002667 else
2668 Py_DECREF(res);
2669
Benjamin Peterson68623612012-12-20 11:53:11 -06002670 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
2671 if (exc != NULL) {
Serhiy Storchakae2bd2a72014-10-08 22:31:52 +03002672 _PyErr_ChainExceptions(exc, val, tb);
2673 Py_CLEAR(res);
Benjamin Peterson68623612012-12-20 11:53:11 -06002674 }
2675 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002676 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002677}
2678
2679static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002680textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002681{
2682 PyObject *line;
2683
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002684 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002685
2686 self->telling = 0;
2687 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2688 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002689 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002690 }
2691 else {
2692 line = PyObject_CallMethodObjArgs((PyObject *)self,
2693 _PyIO_str_readline, NULL);
2694 if (line && !PyUnicode_Check(line)) {
2695 PyErr_Format(PyExc_IOError,
2696 "readline() should have returned an str object, "
2697 "not '%.200s'", Py_TYPE(line)->tp_name);
2698 Py_DECREF(line);
2699 return NULL;
2700 }
2701 }
2702
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002703 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002704 return NULL;
2705
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002706 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002707 /* Reached EOF or would have blocked */
2708 Py_DECREF(line);
2709 Py_CLEAR(self->snapshot);
2710 self->telling = self->seekable;
2711 return NULL;
2712 }
2713
2714 return line;
2715}
2716
2717static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002718textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002719{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002720 CHECK_ATTACHED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002721 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002722}
2723
2724static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002725textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002726{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002727 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002728 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2729}
2730
2731static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002732textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002733{
2734 PyObject *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002735 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002736 if (self->decoder == NULL)
2737 Py_RETURN_NONE;
2738 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2739 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002740 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2741 PyErr_Clear();
2742 Py_RETURN_NONE;
2743 }
2744 else {
2745 return NULL;
2746 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002747 }
2748 return res;
2749}
2750
2751static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002752textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002753{
2754 CHECK_INITIALIZED(self);
2755 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2756}
2757
2758static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002759textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002760{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002761 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002762 return PyLong_FromSsize_t(self->chunk_size);
2763}
2764
2765static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002766textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002767{
2768 Py_ssize_t n;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002769 CHECK_ATTACHED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002770 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002771 if (n == -1 && PyErr_Occurred())
2772 return -1;
2773 if (n <= 0) {
2774 PyErr_SetString(PyExc_ValueError,
2775 "a strictly positive integer is required");
2776 return -1;
2777 }
2778 self->chunk_size = n;
2779 return 0;
2780}
2781
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002782#include "clinic/textio.c.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002783
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002784static PyMethodDef incrementalnewlinedecoder_methods[] = {
2785 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
2786 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
2787 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
2788 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
2789 {NULL}
2790};
2791
2792static PyGetSetDef incrementalnewlinedecoder_getset[] = {
2793 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
2794 {NULL}
2795};
2796
2797PyTypeObject PyIncrementalNewlineDecoder_Type = {
2798 PyVarObject_HEAD_INIT(NULL, 0)
2799 "_io.IncrementalNewlineDecoder", /*tp_name*/
2800 sizeof(nldecoder_object), /*tp_basicsize*/
2801 0, /*tp_itemsize*/
2802 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
2803 0, /*tp_print*/
2804 0, /*tp_getattr*/
2805 0, /*tp_setattr*/
2806 0, /*tp_compare */
2807 0, /*tp_repr*/
2808 0, /*tp_as_number*/
2809 0, /*tp_as_sequence*/
2810 0, /*tp_as_mapping*/
2811 0, /*tp_hash */
2812 0, /*tp_call*/
2813 0, /*tp_str*/
2814 0, /*tp_getattro*/
2815 0, /*tp_setattro*/
2816 0, /*tp_as_buffer*/
2817 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
2818 _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
2819 0, /* tp_traverse */
2820 0, /* tp_clear */
2821 0, /* tp_richcompare */
2822 0, /*tp_weaklistoffset*/
2823 0, /* tp_iter */
2824 0, /* tp_iternext */
2825 incrementalnewlinedecoder_methods, /* tp_methods */
2826 0, /* tp_members */
2827 incrementalnewlinedecoder_getset, /* tp_getset */
2828 0, /* tp_base */
2829 0, /* tp_dict */
2830 0, /* tp_descr_get */
2831 0, /* tp_descr_set */
2832 0, /* tp_dictoffset */
2833 _io_IncrementalNewlineDecoder___init__, /* tp_init */
2834 0, /* tp_alloc */
2835 PyType_GenericNew, /* tp_new */
2836};
2837
2838
2839static PyMethodDef textiowrapper_methods[] = {
2840 _IO_TEXTIOWRAPPER_DETACH_METHODDEF
2841 _IO_TEXTIOWRAPPER_WRITE_METHODDEF
2842 _IO_TEXTIOWRAPPER_READ_METHODDEF
2843 _IO_TEXTIOWRAPPER_READLINE_METHODDEF
2844 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
2845 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
2846
2847 _IO_TEXTIOWRAPPER_FILENO_METHODDEF
2848 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
2849 _IO_TEXTIOWRAPPER_READABLE_METHODDEF
2850 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
2851 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
Antoine Pitrou243757e2010-11-05 21:15:39 +00002852 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002853
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002854 _IO_TEXTIOWRAPPER_SEEK_METHODDEF
2855 _IO_TEXTIOWRAPPER_TELL_METHODDEF
2856 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002857 {NULL, NULL}
2858};
2859
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002860static PyMemberDef textiowrapper_members[] = {
2861 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2862 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2863 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Antoine Pitrou796564c2013-07-30 19:59:21 +02002864 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002865 {NULL}
2866};
2867
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002868static PyGetSetDef textiowrapper_getset[] = {
2869 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2870 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002871/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2872*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002873 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2874 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2875 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2876 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002877 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002878};
2879
2880PyTypeObject PyTextIOWrapper_Type = {
2881 PyVarObject_HEAD_INIT(NULL, 0)
2882 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002883 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002884 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002885 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002886 0, /*tp_print*/
2887 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002888 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002889 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002890 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002891 0, /*tp_as_number*/
2892 0, /*tp_as_sequence*/
2893 0, /*tp_as_mapping*/
2894 0, /*tp_hash */
2895 0, /*tp_call*/
2896 0, /*tp_str*/
2897 0, /*tp_getattro*/
2898 0, /*tp_setattro*/
2899 0, /*tp_as_buffer*/
2900 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
Antoine Pitrou796564c2013-07-30 19:59:21 +02002901 | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002902 _io_TextIOWrapper___init____doc__, /* tp_doc */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002903 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2904 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002905 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002906 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002907 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002908 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2909 textiowrapper_methods, /* tp_methods */
2910 textiowrapper_members, /* tp_members */
2911 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002912 0, /* tp_base */
2913 0, /* tp_dict */
2914 0, /* tp_descr_get */
2915 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002916 offsetof(textio, dict), /*tp_dictoffset*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002917 _io_TextIOWrapper___init__, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002918 0, /* tp_alloc */
2919 PyType_GenericNew, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +02002920 0, /* tp_free */
2921 0, /* tp_is_gc */
2922 0, /* tp_bases */
2923 0, /* tp_mro */
2924 0, /* tp_cache */
2925 0, /* tp_subclasses */
2926 0, /* tp_weaklist */
2927 0, /* tp_del */
2928 0, /* tp_version_tag */
2929 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002930};