blob: 063caa6067f6751c5a286223c5b3cdfeaa9bbdcd [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Serhiy Storchakaf24131f2015-04-16 11:19:43 +030014/*[clinic input]
15module _io
16class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
17class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
18[clinic start generated code]*/
19/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
20
21/*[python input]
22class io_ssize_t_converter(CConverter):
23 type = 'Py_ssize_t'
24 converter = '_PyIO_ConvertSsize_t'
25[python start generated code]*/
26/*[python end generated code: output=da39a3ee5e6b4b0d input=d0a811d3cbfd1b33]*/
27
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020028_Py_IDENTIFIER(close);
29_Py_IDENTIFIER(_dealloc_warn);
30_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020031_Py_IDENTIFIER(fileno);
32_Py_IDENTIFIER(flush);
33_Py_IDENTIFIER(getpreferredencoding);
34_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020035_Py_IDENTIFIER(mode);
36_Py_IDENTIFIER(name);
37_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020038_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020039_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020040_Py_IDENTIFIER(readable);
41_Py_IDENTIFIER(replace);
42_Py_IDENTIFIER(reset);
43_Py_IDENTIFIER(seek);
44_Py_IDENTIFIER(seekable);
45_Py_IDENTIFIER(setstate);
46_Py_IDENTIFIER(tell);
47_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020048
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000049/* TextIOBase */
50
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000051PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000052 "Base class for text I/O.\n"
53 "\n"
54 "This class provides a character and line based interface to stream\n"
55 "I/O. There is no readinto method because Python's character strings\n"
56 "are immutable. There is no public constructor.\n"
57 );
58
59static PyObject *
60_unsupported(const char *message)
61{
Antoine Pitrou712cb732013-12-21 15:51:54 +010062 _PyIO_State *state = IO_STATE();
63 if (state != NULL)
64 PyErr_SetString(state->unsupported_operation, message);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000065 return NULL;
66}
67
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000068PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000069 "Separate the underlying buffer from the TextIOBase and return it.\n"
70 "\n"
71 "After the underlying buffer has been detached, the TextIO is in an\n"
72 "unusable state.\n"
73 );
74
75static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000076textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000077{
78 return _unsupported("detach");
79}
80
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000081PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000082 "Read at most n characters from stream.\n"
83 "\n"
84 "Read from underlying buffer until we have n characters or we hit EOF.\n"
85 "If n is negative or omitted, read until EOF.\n"
86 );
87
88static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000089textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000090{
91 return _unsupported("read");
92}
93
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000094PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000095 "Read until newline or EOF.\n"
96 "\n"
97 "Returns an empty string if EOF is hit immediately.\n"
98 );
99
100static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000101textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000102{
103 return _unsupported("readline");
104}
105
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000106PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000107 "Write string to stream.\n"
108 "Returns the number of characters written (which is always equal to\n"
109 "the length of the string).\n"
110 );
111
112static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000113textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000114{
115 return _unsupported("write");
116}
117
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000118PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000119 "Encoding of the text stream.\n"
120 "\n"
121 "Subclasses should override.\n"
122 );
123
124static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000125textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000126{
127 Py_RETURN_NONE;
128}
129
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000130PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000131 "Line endings translated so far.\n"
132 "\n"
133 "Only line endings translated during reading are considered.\n"
134 "\n"
135 "Subclasses should override.\n"
136 );
137
138static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000139textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000140{
141 Py_RETURN_NONE;
142}
143
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000144PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000145 "The error setting of the decoder or encoder.\n"
146 "\n"
147 "Subclasses should override.\n"
148 );
149
150static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000151textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000152{
153 Py_RETURN_NONE;
154}
155
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000156
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000157static PyMethodDef textiobase_methods[] = {
158 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
159 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
160 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
161 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000162 {NULL, NULL}
163};
164
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000165static PyGetSetDef textiobase_getset[] = {
166 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
167 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
168 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000169 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000170};
171
172PyTypeObject PyTextIOBase_Type = {
173 PyVarObject_HEAD_INIT(NULL, 0)
174 "_io._TextIOBase", /*tp_name*/
175 0, /*tp_basicsize*/
176 0, /*tp_itemsize*/
177 0, /*tp_dealloc*/
178 0, /*tp_print*/
179 0, /*tp_getattr*/
180 0, /*tp_setattr*/
181 0, /*tp_compare */
182 0, /*tp_repr*/
183 0, /*tp_as_number*/
184 0, /*tp_as_sequence*/
185 0, /*tp_as_mapping*/
186 0, /*tp_hash */
187 0, /*tp_call*/
188 0, /*tp_str*/
189 0, /*tp_getattro*/
190 0, /*tp_setattro*/
191 0, /*tp_as_buffer*/
Antoine Pitrou796564c2013-07-30 19:59:21 +0200192 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
193 | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000194 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000195 0, /* tp_traverse */
196 0, /* tp_clear */
197 0, /* tp_richcompare */
198 0, /* tp_weaklistoffset */
199 0, /* tp_iter */
200 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000201 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000202 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000203 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000204 &PyIOBase_Type, /* tp_base */
205 0, /* tp_dict */
206 0, /* tp_descr_get */
207 0, /* tp_descr_set */
208 0, /* tp_dictoffset */
209 0, /* tp_init */
210 0, /* tp_alloc */
211 0, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +0200212 0, /* tp_free */
213 0, /* tp_is_gc */
214 0, /* tp_bases */
215 0, /* tp_mro */
216 0, /* tp_cache */
217 0, /* tp_subclasses */
218 0, /* tp_weaklist */
219 0, /* tp_del */
220 0, /* tp_version_tag */
221 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000222};
223
224
225/* IncrementalNewlineDecoder */
226
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000227typedef struct {
228 PyObject_HEAD
229 PyObject *decoder;
230 PyObject *errors;
Victor Stinner7d7e7752014-06-17 23:31:25 +0200231 unsigned int pendingcr: 1;
232 unsigned int translate: 1;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000233 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000234} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000235
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300236/*[clinic input]
237_io.IncrementalNewlineDecoder.__init__
238 decoder: object
239 translate: int
240 errors: object(c_default="NULL") = "strict"
241
242Codec used when reading a file in universal newlines mode.
243
244It wraps another incremental decoder, translating \r\n and \r into \n.
245It also records the types of newlines encountered. When used with
246translate=False, it ensures that the newline sequence is returned in
247one piece. When used with decoder=None, it expects unicode strings as
248decode input and translates newlines without first invoking an external
249decoder.
250[clinic start generated code]*/
251
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000252static int
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300253_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
254 PyObject *decoder, int translate,
255 PyObject *errors)
256/*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000257{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000258 self->decoder = decoder;
259 Py_INCREF(decoder);
260
261 if (errors == NULL) {
262 self->errors = PyUnicode_FromString("strict");
263 if (self->errors == NULL)
264 return -1;
265 }
266 else {
267 Py_INCREF(errors);
268 self->errors = errors;
269 }
270
271 self->translate = translate;
272 self->seennl = 0;
273 self->pendingcr = 0;
274
275 return 0;
276}
277
278static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000279incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000280{
281 Py_CLEAR(self->decoder);
282 Py_CLEAR(self->errors);
283 Py_TYPE(self)->tp_free((PyObject *)self);
284}
285
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200286static int
287check_decoded(PyObject *decoded)
288{
289 if (decoded == NULL)
290 return -1;
291 if (!PyUnicode_Check(decoded)) {
292 PyErr_Format(PyExc_TypeError,
293 "decoder should return a string result, not '%.200s'",
294 Py_TYPE(decoded)->tp_name);
295 Py_DECREF(decoded);
296 return -1;
297 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200298 if (PyUnicode_READY(decoded) < 0) {
299 Py_DECREF(decoded);
300 return -1;
301 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200302 return 0;
303}
304
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000305#define SEEN_CR 1
306#define SEEN_LF 2
307#define SEEN_CRLF 4
308#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
309
310PyObject *
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200311_PyIncrementalNewlineDecoder_decode(PyObject *myself,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000312 PyObject *input, int final)
313{
314 PyObject *output;
315 Py_ssize_t output_len;
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200316 nldecoder_object *self = (nldecoder_object *) myself;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000317
318 if (self->decoder == NULL) {
319 PyErr_SetString(PyExc_ValueError,
320 "IncrementalNewlineDecoder.__init__ not called");
321 return NULL;
322 }
323
324 /* decode input (with the eventual \r from a previous pass) */
325 if (self->decoder != Py_None) {
326 output = PyObject_CallMethodObjArgs(self->decoder,
327 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
328 }
329 else {
330 output = input;
331 Py_INCREF(output);
332 }
333
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200334 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000335 return NULL;
336
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200337 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000338 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200339 /* Prefix output with CR */
340 int kind;
341 PyObject *modified;
342 char *out;
343
344 modified = PyUnicode_New(output_len + 1,
345 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000346 if (modified == NULL)
347 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200348 kind = PyUnicode_KIND(modified);
349 out = PyUnicode_DATA(modified);
350 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200351 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000352 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200353 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000354 self->pendingcr = 0;
355 output_len++;
356 }
357
358 /* retain last \r even when not translating data:
359 * then readline() is sure to get \r\n in one pass
360 */
361 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000362 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200363 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
364 {
365 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
366 if (modified == NULL)
367 goto error;
368 Py_DECREF(output);
369 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000370 self->pendingcr = 1;
371 }
372 }
373
374 /* Record which newlines are read and do newline translation if desired,
375 all in one pass. */
376 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200377 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000378 Py_ssize_t len;
379 int seennl = self->seennl;
380 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200381 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000382
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200383 in_str = PyUnicode_DATA(output);
384 len = PyUnicode_GET_LENGTH(output);
385 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000386
387 if (len == 0)
388 return output;
389
390 /* If, up to now, newlines are consistently \n, do a quick check
391 for the \r *byte* with the libc's optimized memchr.
392 */
393 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200394 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000395 }
396
Antoine Pitrou66913e22009-03-06 23:40:56 +0000397 if (only_lf) {
398 /* If not already seen, quick scan for a possible "\n" character.
399 (there's nothing else to be done, even when in translation mode)
400 */
401 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200402 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100403 if (kind == PyUnicode_1BYTE_KIND)
404 seennl |= SEEN_LF;
405 else {
406 Py_ssize_t i = 0;
407 for (;;) {
408 Py_UCS4 c;
409 /* Fast loop for non-control characters */
410 while (PyUnicode_READ(kind, in_str, i) > '\n')
411 i++;
412 c = PyUnicode_READ(kind, in_str, i++);
413 if (c == '\n') {
414 seennl |= SEEN_LF;
415 break;
416 }
417 if (i >= len)
418 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000419 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000420 }
421 }
422 /* Finished: we have scanned for newlines, and none of them
423 need translating */
424 }
425 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200426 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000427 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000428 if (seennl == SEEN_ALL)
429 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000430 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200431 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000432 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200433 while (PyUnicode_READ(kind, in_str, i) > '\r')
434 i++;
435 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000436 if (c == '\n')
437 seennl |= SEEN_LF;
438 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200439 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000440 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200441 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000442 }
443 else
444 seennl |= SEEN_CR;
445 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200446 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000447 break;
448 if (seennl == SEEN_ALL)
449 break;
450 }
451 endscan:
452 ;
453 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000454 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200455 void *translated;
456 int kind = PyUnicode_KIND(output);
457 void *in_str = PyUnicode_DATA(output);
458 Py_ssize_t in, out;
459 /* XXX: Previous in-place translation here is disabled as
460 resizing is not possible anymore */
461 /* We could try to optimize this so that we only do a copy
462 when there is something to translate. On the other hand,
463 we already know there is a \r byte, so chances are high
464 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200465 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200466 if (translated == NULL) {
467 PyErr_NoMemory();
468 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000469 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200470 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000471 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200472 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000473 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200474 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
475 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000476 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200477 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000478 seennl |= SEEN_LF;
479 continue;
480 }
481 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200482 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000483 in++;
484 seennl |= SEEN_CRLF;
485 }
486 else
487 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200488 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000489 continue;
490 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200491 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000492 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200493 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000494 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200495 Py_DECREF(output);
496 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100497 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200498 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200499 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000500 }
501 self->seennl |= seennl;
502 }
503
504 return output;
505
506 error:
507 Py_DECREF(output);
508 return NULL;
509}
510
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300511/*[clinic input]
512_io.IncrementalNewlineDecoder.decode
513 input: object
514 final: int(c_default="0") = False
515[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000516
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300517static PyObject *
518_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
519 PyObject *input, int final)
520/*[clinic end generated code: output=0d486755bb37a66e input=d65677385bfd6827]*/
521{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000522 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
523}
524
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300525/*[clinic input]
526_io.IncrementalNewlineDecoder.getstate
527[clinic start generated code]*/
528
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000529static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300530_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
531/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000532{
533 PyObject *buffer;
534 unsigned PY_LONG_LONG flag;
535
536 if (self->decoder != Py_None) {
537 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
538 _PyIO_str_getstate, NULL);
539 if (state == NULL)
540 return NULL;
Serhiy Storchakabb72c472015-04-19 20:38:19 +0300541 if (!PyArg_ParseTuple(state, "OK", &buffer, &flag)) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000542 Py_DECREF(state);
543 return NULL;
544 }
545 Py_INCREF(buffer);
546 Py_DECREF(state);
547 }
548 else {
549 buffer = PyBytes_FromString("");
550 flag = 0;
551 }
552 flag <<= 1;
553 if (self->pendingcr)
554 flag |= 1;
555 return Py_BuildValue("NK", buffer, flag);
556}
557
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300558/*[clinic input]
559_io.IncrementalNewlineDecoder.setstate
560 state: object
561 /
562[clinic start generated code]*/
563
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000564static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300565_io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
566 PyObject *state)
567/*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000568{
569 PyObject *buffer;
570 unsigned PY_LONG_LONG flag;
571
Serhiy Storchakabb72c472015-04-19 20:38:19 +0300572 if (!PyArg_ParseTuple(state, "OK", &buffer, &flag))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000573 return NULL;
574
Victor Stinner7d7e7752014-06-17 23:31:25 +0200575 self->pendingcr = (int) (flag & 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000576 flag >>= 1;
577
578 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200579 return _PyObject_CallMethodId(self->decoder,
580 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000581 else
582 Py_RETURN_NONE;
583}
584
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300585/*[clinic input]
586_io.IncrementalNewlineDecoder.reset
587[clinic start generated code]*/
588
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000589static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300590_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
591/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000592{
593 self->seennl = 0;
594 self->pendingcr = 0;
595 if (self->decoder != Py_None)
596 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
597 else
598 Py_RETURN_NONE;
599}
600
601static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000602incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000603{
604 switch (self->seennl) {
605 case SEEN_CR:
606 return PyUnicode_FromString("\r");
607 case SEEN_LF:
608 return PyUnicode_FromString("\n");
609 case SEEN_CRLF:
610 return PyUnicode_FromString("\r\n");
611 case SEEN_CR | SEEN_LF:
612 return Py_BuildValue("ss", "\r", "\n");
613 case SEEN_CR | SEEN_CRLF:
614 return Py_BuildValue("ss", "\r", "\r\n");
615 case SEEN_LF | SEEN_CRLF:
616 return Py_BuildValue("ss", "\n", "\r\n");
617 case SEEN_CR | SEEN_LF | SEEN_CRLF:
618 return Py_BuildValue("sss", "\r", "\n", "\r\n");
619 default:
620 Py_RETURN_NONE;
621 }
622
623}
624
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000625/* TextIOWrapper */
626
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000627typedef PyObject *
628 (*encodefunc_t)(PyObject *, PyObject *);
629
630typedef struct
631{
632 PyObject_HEAD
633 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000634 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000635 Py_ssize_t chunk_size;
636 PyObject *buffer;
637 PyObject *encoding;
638 PyObject *encoder;
639 PyObject *decoder;
640 PyObject *readnl;
641 PyObject *errors;
642 const char *writenl; /* utf-8 encoded, NULL stands for \n */
643 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200644 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000645 char readuniversal;
646 char readtranslate;
647 char writetranslate;
648 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200649 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000650 char telling;
Antoine Pitrou796564c2013-07-30 19:59:21 +0200651 char finalizing;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000652 /* Specialized encoding func (see below) */
653 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000654 /* Whether or not it's the start of the stream */
655 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000656
657 /* Reads and writes are internally buffered in order to speed things up.
658 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000659
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000660 Please also note that text to be written is first encoded before being
661 buffered. This is necessary so that encoding errors are immediately
662 reported to the caller, but it unfortunately means that the
663 IncrementalEncoder (whose encode() method is always written in Python)
664 becomes a bottleneck for small writes.
665 */
666 PyObject *decoded_chars; /* buffer for text returned from decoder */
667 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
668 PyObject *pending_bytes; /* list of bytes objects waiting to be
669 written, or NULL */
670 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000671
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000672 /* snapshot is either None, or a tuple (dec_flags, next_input) where
673 * dec_flags is the second (integer) item of the decoder state and
674 * next_input is the chunk of input bytes that comes next after the
675 * snapshot point. We use this to reconstruct decoder states in tell().
676 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000677 PyObject *snapshot;
678 /* Bytes-to-characters ratio for the current chunk. Serves as input for
679 the heuristic in tell(). */
680 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000681
682 /* Cache raw object if it's a FileIO object */
683 PyObject *raw;
684
685 PyObject *weakreflist;
686 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000687} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000688
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000689/* A couple of specialized cases in order to bypass the slow incremental
690 encoding methods for the most popular encodings. */
691
692static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000693ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000694{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200695 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000696}
697
698static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000699utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000700{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100701 return _PyUnicode_EncodeUTF16(text,
702 PyBytes_AS_STRING(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000703}
704
705static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000706utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000707{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100708 return _PyUnicode_EncodeUTF16(text,
709 PyBytes_AS_STRING(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000710}
711
712static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000713utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000714{
Antoine Pitroue4501852009-05-14 18:55:55 +0000715 if (!self->encoding_start_of_stream) {
716 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200717#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000718 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000719#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000720 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000721#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000722 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100723 return _PyUnicode_EncodeUTF16(text,
724 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000725}
726
Antoine Pitroue4501852009-05-14 18:55:55 +0000727static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000728utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000729{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100730 return _PyUnicode_EncodeUTF32(text,
731 PyBytes_AS_STRING(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000732}
733
734static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000735utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000736{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100737 return _PyUnicode_EncodeUTF32(text,
738 PyBytes_AS_STRING(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000739}
740
741static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000742utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000743{
744 if (!self->encoding_start_of_stream) {
745 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200746#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000747 return utf32be_encode(self, text);
748#else
749 return utf32le_encode(self, text);
750#endif
751 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100752 return _PyUnicode_EncodeUTF32(text,
753 PyBytes_AS_STRING(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000754}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000755
756static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000757utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000758{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200759 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000760}
761
762static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000763latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000764{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200765 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000766}
767
768/* Map normalized encoding names onto the specialized encoding funcs */
769
770typedef struct {
771 const char *name;
772 encodefunc_t encodefunc;
773} encodefuncentry;
774
Antoine Pitrou24f36292009-03-28 22:16:42 +0000775static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000776 {"ascii", (encodefunc_t) ascii_encode},
777 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000778 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000779 {"utf-16-be", (encodefunc_t) utf16be_encode},
780 {"utf-16-le", (encodefunc_t) utf16le_encode},
781 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000782 {"utf-32-be", (encodefunc_t) utf32be_encode},
783 {"utf-32-le", (encodefunc_t) utf32le_encode},
784 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000785 {NULL, NULL}
786};
787
788
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300789/*[clinic input]
790_io.TextIOWrapper.__init__
791 buffer: object
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700792 encoding: str(accept={str, NoneType}) = NULL
793 errors: str(accept={str, NoneType}) = NULL
794 newline: str(accept={str, NoneType}) = NULL
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300795 line_buffering: int(c_default="0") = False
796 write_through: int(c_default="0") = False
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000797
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300798Character and line based layer over a BufferedIOBase object, buffer.
799
800encoding gives the name of the encoding that the stream will be
801decoded or encoded with. It defaults to locale.getpreferredencoding(False).
802
803errors determines the strictness of encoding and decoding (see
804help(codecs.Codec) or the documentation for codecs.register) and
805defaults to "strict".
806
807newline controls how line endings are handled. It can be None, '',
808'\n', '\r', and '\r\n'. It works as follows:
809
810* On input, if newline is None, universal newlines mode is
811 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
812 these are translated into '\n' before being returned to the
813 caller. If it is '', universal newline mode is enabled, but line
814 endings are returned to the caller untranslated. If it has any of
815 the other legal values, input lines are only terminated by the given
816 string, and the line ending is returned to the caller untranslated.
817
818* On output, if newline is None, any '\n' characters written are
819 translated to the system default line separator, os.linesep. If
820 newline is '' or '\n', no translation takes place. If newline is any
821 of the other legal values, any '\n' characters written are translated
822 to the given string.
823
824If line_buffering is True, a call to flush is implied when a call to
825write contains a newline character.
826[clinic start generated code]*/
827
828static int
829_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
830 const char *encoding, const char *errors,
831 const char *newline, int line_buffering,
832 int write_through)
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700833/*[clinic end generated code: output=56a83402ce2a8381 input=3126cb3101a2c99b]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300834{
835 PyObject *raw, *codec_info = NULL;
836 _PyIO_State *state = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000837 PyObject *res;
838 int r;
839
840 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000841 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000842
843 if (newline && newline[0] != '\0'
844 && !(newline[0] == '\n' && newline[1] == '\0')
845 && !(newline[0] == '\r' && newline[1] == '\0')
846 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
847 PyErr_Format(PyExc_ValueError,
848 "illegal newline value: %s", newline);
849 return -1;
850 }
851
852 Py_CLEAR(self->buffer);
853 Py_CLEAR(self->encoding);
854 Py_CLEAR(self->encoder);
855 Py_CLEAR(self->decoder);
856 Py_CLEAR(self->readnl);
857 Py_CLEAR(self->decoded_chars);
858 Py_CLEAR(self->pending_bytes);
859 Py_CLEAR(self->snapshot);
860 Py_CLEAR(self->errors);
861 Py_CLEAR(self->raw);
862 self->decoded_chars_used = 0;
863 self->pending_bytes_count = 0;
864 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000865 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000866
867 if (encoding == NULL) {
868 /* Try os.device_encoding(fileno) */
869 PyObject *fileno;
Antoine Pitrou712cb732013-12-21 15:51:54 +0100870 state = IO_STATE();
871 if (state == NULL)
872 goto error;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200873 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000874 /* Ignore only AttributeError and UnsupportedOperation */
875 if (fileno == NULL) {
876 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
877 PyErr_ExceptionMatches(state->unsupported_operation)) {
878 PyErr_Clear();
879 }
880 else {
881 goto error;
882 }
883 }
884 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +0200885 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -0500886 Py_DECREF(fileno);
887 if (fd == -1 && PyErr_Occurred()) {
888 goto error;
889 }
890
891 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000892 if (self->encoding == NULL)
893 goto error;
894 else if (!PyUnicode_Check(self->encoding))
895 Py_CLEAR(self->encoding);
896 }
897 }
898 if (encoding == NULL && self->encoding == NULL) {
Antoine Pitrou932ff832013-08-01 21:04:50 +0200899 PyObject *locale_module = _PyIO_get_locale_module(state);
900 if (locale_module == NULL)
901 goto catch_ImportError;
902 self->encoding = _PyObject_CallMethodId(
903 locale_module, &PyId_getpreferredencoding, "O", Py_False);
904 Py_DECREF(locale_module);
905 if (self->encoding == NULL) {
906 catch_ImportError:
907 /*
Martin Panter7462b6492015-11-02 03:37:02 +0000908 Importing locale can raise an ImportError because of
909 _functools, and locale.getpreferredencoding can raise an
Antoine Pitrou932ff832013-08-01 21:04:50 +0200910 ImportError if _locale is not available. These will happen
911 during module building.
912 */
913 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
914 PyErr_Clear();
915 self->encoding = PyUnicode_FromString("ascii");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000916 }
Antoine Pitrou932ff832013-08-01 21:04:50 +0200917 else
918 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000919 }
Antoine Pitrou932ff832013-08-01 21:04:50 +0200920 else if (!PyUnicode_Check(self->encoding))
921 Py_CLEAR(self->encoding);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000922 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000923 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000924 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000925 if (encoding == NULL)
926 goto error;
927 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000928 else if (encoding != NULL) {
929 self->encoding = PyUnicode_FromString(encoding);
930 if (self->encoding == NULL)
931 goto error;
932 }
933 else {
934 PyErr_SetString(PyExc_IOError,
935 "could not determine default encoding");
936 }
937
Nick Coghlana9b15242014-02-04 22:11:18 +1000938 /* Check we have been asked for a real text encoding */
939 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
940 if (codec_info == NULL) {
941 Py_CLEAR(self->encoding);
942 goto error;
943 }
944
945 /* XXX: Failures beyond this point have the potential to leak elements
946 * of the partially constructed object (like self->encoding)
947 */
948
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000949 if (errors == NULL)
950 errors = "strict";
951 self->errors = PyBytes_FromString(errors);
952 if (self->errors == NULL)
953 goto error;
954
955 self->chunk_size = 8192;
956 self->readuniversal = (newline == NULL || newline[0] == '\0');
957 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200958 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000959 self->readtranslate = (newline == NULL);
960 if (newline) {
961 self->readnl = PyUnicode_FromString(newline);
962 if (self->readnl == NULL)
Nick Coghlana9b15242014-02-04 22:11:18 +1000963 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000964 }
965 self->writetranslate = (newline == NULL || newline[0] != '\0');
966 if (!self->readuniversal && self->readnl) {
967 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000968 if (self->writenl == NULL)
969 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000970 if (!strcmp(self->writenl, "\n"))
971 self->writenl = NULL;
972 }
973#ifdef MS_WINDOWS
974 else
975 self->writenl = "\r\n";
976#endif
977
978 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200979 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000980 if (res == NULL)
981 goto error;
982 r = PyObject_IsTrue(res);
983 Py_DECREF(res);
984 if (r == -1)
985 goto error;
986 if (r == 1) {
Nick Coghlana9b15242014-02-04 22:11:18 +1000987 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info,
988 errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000989 if (self->decoder == NULL)
990 goto error;
991
992 if (self->readuniversal) {
993 PyObject *incrementalDecoder = PyObject_CallFunction(
994 (PyObject *)&PyIncrementalNewlineDecoder_Type,
995 "Oi", self->decoder, (int)self->readtranslate);
996 if (incrementalDecoder == NULL)
997 goto error;
Serhiy Storchaka48842712016-04-06 09:45:48 +0300998 Py_XSETREF(self->decoder, incrementalDecoder);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000999 }
1000 }
1001
1002 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001003 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001004 if (res == NULL)
1005 goto error;
1006 r = PyObject_IsTrue(res);
1007 Py_DECREF(res);
1008 if (r == -1)
1009 goto error;
1010 if (r == 1) {
Nick Coghlana9b15242014-02-04 22:11:18 +10001011 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info,
1012 errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001013 if (self->encoder == NULL)
1014 goto error;
1015 /* Get the normalized named of the codec */
Nick Coghlana9b15242014-02-04 22:11:18 +10001016 res = _PyObject_GetAttrId(codec_info, &PyId_name);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001017 if (res == NULL) {
1018 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1019 PyErr_Clear();
1020 else
1021 goto error;
1022 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001023 else if (PyUnicode_Check(res)) {
1024 encodefuncentry *e = encodefuncs;
1025 while (e->name != NULL) {
1026 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1027 self->encodefunc = e->encodefunc;
1028 break;
1029 }
1030 e++;
1031 }
1032 }
1033 Py_XDECREF(res);
1034 }
1035
Nick Coghlana9b15242014-02-04 22:11:18 +10001036 /* Finished sorting out the codec details */
Benjamin Peterson6c14f232014-11-12 10:19:46 -05001037 Py_CLEAR(codec_info);
Nick Coghlana9b15242014-02-04 22:11:18 +10001038
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001039 self->buffer = buffer;
1040 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001041
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001042 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1043 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1044 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001045 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001046 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001047 if (raw == NULL) {
1048 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1049 PyErr_Clear();
1050 else
1051 goto error;
1052 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001053 else if (Py_TYPE(raw) == &PyFileIO_Type)
1054 self->raw = raw;
1055 else
1056 Py_DECREF(raw);
1057 }
1058
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001059 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001060 if (res == NULL)
1061 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001062 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001063 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001064 if (r < 0)
1065 goto error;
1066 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001067
Martin v. Löwis767046a2011-10-14 15:35:36 +02001068 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001069
Antoine Pitroue4501852009-05-14 18:55:55 +00001070 self->encoding_start_of_stream = 0;
1071 if (self->seekable && self->encoder) {
1072 PyObject *cookieObj;
1073 int cmp;
1074
1075 self->encoding_start_of_stream = 1;
1076
1077 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1078 if (cookieObj == NULL)
1079 goto error;
1080
1081 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1082 Py_DECREF(cookieObj);
1083 if (cmp < 0) {
1084 goto error;
1085 }
1086
1087 if (cmp == 0) {
1088 self->encoding_start_of_stream = 0;
1089 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1090 _PyIO_zero, NULL);
1091 if (res == NULL)
1092 goto error;
1093 Py_DECREF(res);
1094 }
1095 }
1096
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001097 self->ok = 1;
1098 return 0;
1099
1100 error:
Nick Coghlana9b15242014-02-04 22:11:18 +10001101 Py_XDECREF(codec_info);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001102 return -1;
1103}
1104
1105static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001106_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001107{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001108 self->ok = 0;
1109 Py_CLEAR(self->buffer);
1110 Py_CLEAR(self->encoding);
1111 Py_CLEAR(self->encoder);
1112 Py_CLEAR(self->decoder);
1113 Py_CLEAR(self->readnl);
1114 Py_CLEAR(self->decoded_chars);
1115 Py_CLEAR(self->pending_bytes);
1116 Py_CLEAR(self->snapshot);
1117 Py_CLEAR(self->errors);
1118 Py_CLEAR(self->raw);
1119 return 0;
1120}
1121
1122static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001123textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001124{
Antoine Pitrou796564c2013-07-30 19:59:21 +02001125 self->finalizing = 1;
1126 if (_PyIOBase_finalize((PyObject *) self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001127 return;
Antoine Pitrou796564c2013-07-30 19:59:21 +02001128 _textiowrapper_clear(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001129 _PyObject_GC_UNTRACK(self);
1130 if (self->weakreflist != NULL)
1131 PyObject_ClearWeakRefs((PyObject *)self);
1132 Py_CLEAR(self->dict);
1133 Py_TYPE(self)->tp_free((PyObject *)self);
1134}
1135
1136static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001137textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001138{
1139 Py_VISIT(self->buffer);
1140 Py_VISIT(self->encoding);
1141 Py_VISIT(self->encoder);
1142 Py_VISIT(self->decoder);
1143 Py_VISIT(self->readnl);
1144 Py_VISIT(self->decoded_chars);
1145 Py_VISIT(self->pending_bytes);
1146 Py_VISIT(self->snapshot);
1147 Py_VISIT(self->errors);
1148 Py_VISIT(self->raw);
1149
1150 Py_VISIT(self->dict);
1151 return 0;
1152}
1153
1154static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001155textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001156{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001157 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001158 return -1;
1159 Py_CLEAR(self->dict);
1160 return 0;
1161}
1162
1163static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001164textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001165
1166/* This macro takes some shortcuts to make the common case faster. */
1167#define CHECK_CLOSED(self) \
1168 do { \
1169 int r; \
1170 PyObject *_res; \
1171 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1172 if (self->raw != NULL) \
1173 r = _PyFileIO_closed(self->raw); \
1174 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001175 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001176 if (_res == NULL) \
1177 return NULL; \
1178 r = PyObject_IsTrue(_res); \
1179 Py_DECREF(_res); \
1180 if (r < 0) \
1181 return NULL; \
1182 } \
1183 if (r > 0) { \
1184 PyErr_SetString(PyExc_ValueError, \
1185 "I/O operation on closed file."); \
1186 return NULL; \
1187 } \
1188 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001189 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001190 return NULL; \
1191 } while (0)
1192
1193#define CHECK_INITIALIZED(self) \
1194 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001195 PyErr_SetString(PyExc_ValueError, \
1196 "I/O operation on uninitialized object"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001197 return NULL; \
1198 }
1199
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001200#define CHECK_ATTACHED(self) \
1201 CHECK_INITIALIZED(self); \
1202 if (self->detached) { \
1203 PyErr_SetString(PyExc_ValueError, \
1204 "underlying buffer has been detached"); \
1205 return NULL; \
1206 }
1207
1208#define CHECK_ATTACHED_INT(self) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001209 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001210 PyErr_SetString(PyExc_ValueError, \
1211 "I/O operation on uninitialized object"); \
1212 return -1; \
1213 } else if (self->detached) { \
1214 PyErr_SetString(PyExc_ValueError, \
1215 "underlying buffer has been detached"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001216 return -1; \
1217 }
1218
1219
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001220/*[clinic input]
1221_io.TextIOWrapper.detach
1222[clinic start generated code]*/
1223
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001224static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001225_io_TextIOWrapper_detach_impl(textio *self)
1226/*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001227{
1228 PyObject *buffer, *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001229 CHECK_ATTACHED(self);
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001230 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1231 if (res == NULL)
1232 return NULL;
1233 Py_DECREF(res);
1234 buffer = self->buffer;
1235 self->buffer = NULL;
1236 self->detached = 1;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001237 return buffer;
1238}
1239
Antoine Pitrou24f36292009-03-28 22:16:42 +00001240/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001241 underlying buffered object, though. */
1242static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001243_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001244{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001245 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001246
1247 if (self->pending_bytes == NULL)
1248 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001249
1250 pending = self->pending_bytes;
1251 Py_INCREF(pending);
1252 self->pending_bytes_count = 0;
1253 Py_CLEAR(self->pending_bytes);
1254
1255 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1256 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001257 if (b == NULL)
1258 return -1;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001259 ret = NULL;
1260 do {
1261 ret = PyObject_CallMethodObjArgs(self->buffer,
1262 _PyIO_str_write, b, NULL);
1263 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001264 Py_DECREF(b);
1265 if (ret == NULL)
1266 return -1;
1267 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001268 return 0;
1269}
1270
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001271/*[clinic input]
1272_io.TextIOWrapper.write
1273 text: unicode
1274 /
1275[clinic start generated code]*/
1276
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001277static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001278_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1279/*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001280{
1281 PyObject *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001282 PyObject *b;
1283 Py_ssize_t textlen;
1284 int haslf = 0;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001285 int needflush = 0, text_needflush = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001286
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001287 if (PyUnicode_READY(text) == -1)
1288 return NULL;
1289
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001290 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001291 CHECK_CLOSED(self);
1292
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001293 if (self->encoder == NULL)
1294 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001295
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001296 Py_INCREF(text);
1297
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001298 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001299
1300 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001301 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001302 haslf = 1;
1303
1304 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001305 PyObject *newtext = _PyObject_CallMethodId(
1306 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001307 Py_DECREF(text);
1308 if (newtext == NULL)
1309 return NULL;
1310 text = newtext;
1311 }
1312
Antoine Pitroue96ec682011-07-23 21:46:35 +02001313 if (self->write_through)
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001314 text_needflush = 1;
1315 if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001316 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001317 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001318 needflush = 1;
1319
1320 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001321 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001322 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001323 self->encoding_start_of_stream = 0;
1324 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001325 else
1326 b = PyObject_CallMethodObjArgs(self->encoder,
1327 _PyIO_str_encode, text, NULL);
1328 Py_DECREF(text);
1329 if (b == NULL)
1330 return NULL;
1331
1332 if (self->pending_bytes == NULL) {
1333 self->pending_bytes = PyList_New(0);
1334 if (self->pending_bytes == NULL) {
1335 Py_DECREF(b);
1336 return NULL;
1337 }
1338 self->pending_bytes_count = 0;
1339 }
1340 if (PyList_Append(self->pending_bytes, b) < 0) {
1341 Py_DECREF(b);
1342 return NULL;
1343 }
1344 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1345 Py_DECREF(b);
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001346 if (self->pending_bytes_count > self->chunk_size || needflush ||
1347 text_needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001348 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001349 return NULL;
1350 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001351
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001352 if (needflush) {
1353 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1354 if (ret == NULL)
1355 return NULL;
1356 Py_DECREF(ret);
1357 }
1358
1359 Py_CLEAR(self->snapshot);
1360
1361 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001362 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001363 if (ret == NULL)
1364 return NULL;
1365 Py_DECREF(ret);
1366 }
1367
1368 return PyLong_FromSsize_t(textlen);
1369}
1370
1371/* Steal a reference to chars and store it in the decoded_char buffer;
1372 */
1373static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001374textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001375{
Serhiy Storchaka48842712016-04-06 09:45:48 +03001376 Py_XSETREF(self->decoded_chars, chars);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001377 self->decoded_chars_used = 0;
1378}
1379
1380static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001381textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001382{
1383 PyObject *chars;
1384 Py_ssize_t avail;
1385
1386 if (self->decoded_chars == NULL)
1387 return PyUnicode_FromStringAndSize(NULL, 0);
1388
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001389 /* decoded_chars is guaranteed to be "ready". */
1390 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001391 - self->decoded_chars_used);
1392
1393 assert(avail >= 0);
1394
1395 if (n < 0 || n > avail)
1396 n = avail;
1397
1398 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001399 chars = PyUnicode_Substring(self->decoded_chars,
1400 self->decoded_chars_used,
1401 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001402 if (chars == NULL)
1403 return NULL;
1404 }
1405 else {
1406 chars = self->decoded_chars;
1407 Py_INCREF(chars);
1408 }
1409
1410 self->decoded_chars_used += n;
1411 return chars;
1412}
1413
1414/* Read and decode the next chunk of data from the BufferedReader.
1415 */
1416static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001417textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001418{
1419 PyObject *dec_buffer = NULL;
1420 PyObject *dec_flags = NULL;
1421 PyObject *input_chunk = NULL;
Antoine Pitroub8503892014-04-29 10:14:02 +02001422 Py_buffer input_chunk_buf;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001423 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001424 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001425 int eof;
1426
1427 /* The return value is True unless EOF was reached. The decoded string is
1428 * placed in self._decoded_chars (replacing its previous value). The
1429 * entire input chunk is sent to the decoder, though some of it may remain
1430 * buffered in the decoder, yet to be converted.
1431 */
1432
1433 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001434 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001435 return -1;
1436 }
1437
1438 if (self->telling) {
1439 /* To prepare for tell(), we need to snapshot a point in the file
1440 * where the decoder's input buffer is empty.
1441 */
1442
1443 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1444 _PyIO_str_getstate, NULL);
1445 if (state == NULL)
1446 return -1;
1447 /* Given this, we know there was a valid snapshot point
1448 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1449 */
Serhiy Storchakabb72c472015-04-19 20:38:19 +03001450 if (PyArg_ParseTuple(state, "OO", &dec_buffer, &dec_flags) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001451 Py_DECREF(state);
1452 return -1;
1453 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001454
1455 if (!PyBytes_Check(dec_buffer)) {
1456 PyErr_Format(PyExc_TypeError,
1457 "decoder getstate() should have returned a bytes "
1458 "object, not '%.200s'",
1459 Py_TYPE(dec_buffer)->tp_name);
1460 Py_DECREF(state);
1461 return -1;
1462 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001463 Py_INCREF(dec_buffer);
1464 Py_INCREF(dec_flags);
1465 Py_DECREF(state);
1466 }
1467
1468 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001469 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001470 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001471 }
1472 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001473 if (chunk_size == NULL)
1474 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001475
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001476 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001477 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1478 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001479 Py_DECREF(chunk_size);
1480 if (input_chunk == NULL)
1481 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001482
1483 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001484 PyErr_Format(PyExc_TypeError,
Antoine Pitroub8503892014-04-29 10:14:02 +02001485 "underlying %s() should have returned a bytes-like object, "
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001486 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1487 Py_TYPE(input_chunk)->tp_name);
1488 goto fail;
1489 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001490
Antoine Pitroub8503892014-04-29 10:14:02 +02001491 nbytes = input_chunk_buf.len;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001492 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001493 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1494 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1495 self->decoder, input_chunk, eof);
1496 }
1497 else {
1498 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1499 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1500 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001501 PyBuffer_Release(&input_chunk_buf);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001502
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001503 if (check_decoded(decoded_chars) < 0)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001504 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001505 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001506 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001507 if (nchars > 0)
1508 self->b2cratio = (double) nbytes / nchars;
1509 else
1510 self->b2cratio = 0.0;
1511 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001512 eof = 0;
1513
1514 if (self->telling) {
1515 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1516 * next input to be decoded is dec_buffer + input_chunk.
1517 */
Antoine Pitroub8503892014-04-29 10:14:02 +02001518 PyObject *next_input = dec_buffer;
1519 PyBytes_Concat(&next_input, input_chunk);
1520 if (next_input == NULL) {
1521 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001522 goto fail;
1523 }
Serhiy Storchaka48842712016-04-06 09:45:48 +03001524 Py_XSETREF(self->snapshot, Py_BuildValue("NN", dec_flags, next_input));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001525 }
1526 Py_DECREF(input_chunk);
1527
1528 return (eof == 0);
1529
1530 fail:
1531 Py_XDECREF(dec_buffer);
1532 Py_XDECREF(dec_flags);
1533 Py_XDECREF(input_chunk);
1534 return -1;
1535}
1536
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001537/*[clinic input]
1538_io.TextIOWrapper.read
1539 size as n: io_ssize_t = -1
1540 /
1541[clinic start generated code]*/
1542
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001543static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001544_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1545/*[clinic end generated code: output=7e651ce6cc6a25a6 input=8c09398424085cca]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001546{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001547 PyObject *result = NULL, *chunks = NULL;
1548
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001549 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001550 CHECK_CLOSED(self);
1551
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001552 if (self->decoder == NULL)
1553 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001554
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001555 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001556 return NULL;
1557
1558 if (n < 0) {
1559 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001560 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001561 PyObject *decoded;
1562 if (bytes == NULL)
1563 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001564
1565 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1566 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1567 bytes, 1);
1568 else
1569 decoded = PyObject_CallMethodObjArgs(
1570 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001571 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001572 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001573 goto fail;
1574
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001575 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001576
1577 if (result == NULL) {
1578 Py_DECREF(decoded);
1579 return NULL;
1580 }
1581
1582 PyUnicode_AppendAndDel(&result, decoded);
1583 if (result == NULL)
1584 goto fail;
1585
1586 Py_CLEAR(self->snapshot);
1587 return result;
1588 }
1589 else {
1590 int res = 1;
1591 Py_ssize_t remaining = n;
1592
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001593 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001594 if (result == NULL)
1595 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001596 if (PyUnicode_READY(result) == -1)
1597 goto fail;
1598 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001599
1600 /* Keep reading chunks until we have n characters to return */
1601 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001602 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001603 if (res < 0) {
1604 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1605 when EINTR occurs so we needn't do it ourselves. */
1606 if (_PyIO_trap_eintr()) {
1607 continue;
1608 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001609 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001610 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001611 if (res == 0) /* EOF */
1612 break;
1613 if (chunks == NULL) {
1614 chunks = PyList_New(0);
1615 if (chunks == NULL)
1616 goto fail;
1617 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001618 if (PyUnicode_GET_LENGTH(result) > 0 &&
1619 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001620 goto fail;
1621 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001622 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001623 if (result == NULL)
1624 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001625 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001626 }
1627 if (chunks != NULL) {
1628 if (result != NULL && PyList_Append(chunks, result) < 0)
1629 goto fail;
Serhiy Storchaka48842712016-04-06 09:45:48 +03001630 Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001631 if (result == NULL)
1632 goto fail;
1633 Py_CLEAR(chunks);
1634 }
1635 return result;
1636 }
1637 fail:
1638 Py_XDECREF(result);
1639 Py_XDECREF(chunks);
1640 return NULL;
1641}
1642
1643
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001644/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001645 that is to the NUL character. Otherwise the function will produce
1646 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001647static char *
1648find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001649{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001650 if (kind == PyUnicode_1BYTE_KIND) {
1651 assert(ch < 256);
1652 return (char *) memchr((void *) s, (char) ch, end - s);
1653 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001654 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001655 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001656 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001657 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001658 return s;
1659 if (s == end)
1660 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001661 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001662 }
1663}
1664
1665Py_ssize_t
1666_PyIO_find_line_ending(
1667 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001668 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001669{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001670 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001671
1672 if (translated) {
1673 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001674 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001675 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001676 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001677 else {
1678 *consumed = len;
1679 return -1;
1680 }
1681 }
1682 else if (universal) {
1683 /* Universal newline search. Find any of \r, \r\n, \n
1684 * The decoder ensures that \r\n are not split in two pieces
1685 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001686 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001687 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001688 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001689 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001690 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001691 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001692 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001693 if (s >= end) {
1694 *consumed = len;
1695 return -1;
1696 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001697 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001698 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001699 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001700 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001701 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001702 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001703 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001704 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001705 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001706 }
1707 }
1708 }
1709 else {
1710 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001711 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
Victor Stinner706768c2014-08-16 01:03:39 +02001712 Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001713 /* Assume that readnl is an ASCII character. */
1714 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001715 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001716 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001717 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001718 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001719 *consumed = len;
1720 return -1;
1721 }
1722 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001723 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001724 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001725 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001726 if (e < s)
1727 e = s;
1728 while (s < e) {
1729 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001730 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001731 if (pos == NULL || pos >= e)
1732 break;
1733 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001734 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001735 break;
1736 }
1737 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001738 return (pos - start)/kind + readnl_len;
1739 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001740 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001741 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001742 if (pos == NULL)
1743 *consumed = len;
1744 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001745 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001746 return -1;
1747 }
1748 }
1749}
1750
1751static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001752_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001753{
1754 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1755 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1756 int res;
1757
1758 CHECK_CLOSED(self);
1759
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001760 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001761 return NULL;
1762
1763 chunked = 0;
1764
1765 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001766 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001767 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001768 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001769 Py_ssize_t consumed = 0;
1770
1771 /* First, get some data if necessary */
1772 res = 1;
1773 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001774 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001775 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07001776 if (res < 0) {
1777 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1778 when EINTR occurs so we needn't do it ourselves. */
1779 if (_PyIO_trap_eintr()) {
1780 continue;
1781 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001782 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07001783 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001784 if (res == 0)
1785 break;
1786 }
1787 if (res == 0) {
1788 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001789 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001790 Py_CLEAR(self->snapshot);
1791 start = endpos = offset_to_buffer = 0;
1792 break;
1793 }
1794
1795 if (remaining == NULL) {
1796 line = self->decoded_chars;
1797 start = self->decoded_chars_used;
1798 offset_to_buffer = 0;
1799 Py_INCREF(line);
1800 }
1801 else {
1802 assert(self->decoded_chars_used == 0);
1803 line = PyUnicode_Concat(remaining, self->decoded_chars);
1804 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001805 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001806 Py_CLEAR(remaining);
1807 if (line == NULL)
1808 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001809 if (PyUnicode_READY(line) == -1)
1810 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001811 }
1812
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001813 ptr = PyUnicode_DATA(line);
1814 line_len = PyUnicode_GET_LENGTH(line);
1815 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001816
1817 endpos = _PyIO_find_line_ending(
1818 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001819 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001820 ptr + kind * start,
1821 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001822 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001823 if (endpos >= 0) {
1824 endpos += start;
1825 if (limit >= 0 && (endpos - start) + chunked >= limit)
1826 endpos = start + limit - chunked;
1827 break;
1828 }
1829
1830 /* We can put aside up to `endpos` */
1831 endpos = consumed + start;
1832 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1833 /* Didn't find line ending, but reached length limit */
1834 endpos = start + limit - chunked;
1835 break;
1836 }
1837
1838 if (endpos > start) {
1839 /* No line ending seen yet - put aside current data */
1840 PyObject *s;
1841 if (chunks == NULL) {
1842 chunks = PyList_New(0);
1843 if (chunks == NULL)
1844 goto error;
1845 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001846 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001847 if (s == NULL)
1848 goto error;
1849 if (PyList_Append(chunks, s) < 0) {
1850 Py_DECREF(s);
1851 goto error;
1852 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001853 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001854 Py_DECREF(s);
1855 }
1856 /* There may be some remaining bytes we'll have to prepend to the
1857 next chunk of data */
1858 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001859 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001860 if (remaining == NULL)
1861 goto error;
1862 }
1863 Py_CLEAR(line);
1864 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001865 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001866 }
1867
1868 if (line != NULL) {
1869 /* Our line ends in the current buffer */
1870 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001871 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1872 PyObject *s = PyUnicode_Substring(line, start, endpos);
1873 Py_CLEAR(line);
1874 if (s == NULL)
1875 goto error;
1876 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001877 }
1878 }
1879 if (remaining != NULL) {
1880 if (chunks == NULL) {
1881 chunks = PyList_New(0);
1882 if (chunks == NULL)
1883 goto error;
1884 }
1885 if (PyList_Append(chunks, remaining) < 0)
1886 goto error;
1887 Py_CLEAR(remaining);
1888 }
1889 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001890 if (line != NULL) {
1891 if (PyList_Append(chunks, line) < 0)
1892 goto error;
1893 Py_DECREF(line);
1894 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001895 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1896 if (line == NULL)
1897 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001898 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001899 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001900 if (line == NULL) {
1901 Py_INCREF(_PyIO_empty_str);
1902 line = _PyIO_empty_str;
1903 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001904
1905 return line;
1906
1907 error:
1908 Py_XDECREF(chunks);
1909 Py_XDECREF(remaining);
1910 Py_XDECREF(line);
1911 return NULL;
1912}
1913
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001914/*[clinic input]
1915_io.TextIOWrapper.readline
1916 size: Py_ssize_t = -1
1917 /
1918[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001919
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001920static PyObject *
1921_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
1922/*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
1923{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001924 CHECK_ATTACHED(self);
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001925 return _textiowrapper_readline(self, size);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001926}
1927
1928/* Seek and Tell */
1929
1930typedef struct {
1931 Py_off_t start_pos;
1932 int dec_flags;
1933 int bytes_to_feed;
1934 int chars_to_skip;
1935 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001936} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001937
1938/*
1939 To speed up cookie packing/unpacking, we store the fields in a temporary
1940 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1941 The following macros define at which offsets in the intermediary byte
1942 string the various CookieStruct fields will be stored.
1943 */
1944
1945#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1946
Christian Heimes743e0cd2012-10-17 23:52:17 +02001947#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001948/* We want the least significant byte of start_pos to also be the least
1949 significant byte of the cookie, which means that in big-endian mode we
1950 must copy the fields in reverse order. */
1951
1952# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1953# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1954# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1955# define OFF_CHARS_TO_SKIP (sizeof(char))
1956# define OFF_NEED_EOF 0
1957
1958#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001959/* Little-endian mode: the least significant byte of start_pos will
1960 naturally end up the least significant byte of the cookie. */
1961
1962# define OFF_START_POS 0
1963# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1964# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1965# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1966# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1967
1968#endif
1969
1970static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001971textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001972{
1973 unsigned char buffer[COOKIE_BUF_LEN];
1974 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1975 if (cookieLong == NULL)
1976 return -1;
1977
1978 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02001979 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001980 Py_DECREF(cookieLong);
1981 return -1;
1982 }
1983 Py_DECREF(cookieLong);
1984
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001985 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1986 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1987 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1988 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1989 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001990
1991 return 0;
1992}
1993
1994static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001995textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001996{
1997 unsigned char buffer[COOKIE_BUF_LEN];
1998
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001999 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2000 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2001 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2002 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2003 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002004
Christian Heimes743e0cd2012-10-17 23:52:17 +02002005 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2006 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002007}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002008
2009static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002010_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002011{
2012 PyObject *res;
2013 /* When seeking to the start of the stream, we call decoder.reset()
2014 rather than decoder.getstate().
2015 This is for a few decoders such as utf-16 for which the state value
2016 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2017 utf-16, that we are expecting a BOM).
2018 */
2019 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2020 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2021 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002022 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2023 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002024 if (res == NULL)
2025 return -1;
2026 Py_DECREF(res);
2027 return 0;
2028}
2029
Antoine Pitroue4501852009-05-14 18:55:55 +00002030static int
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002031_textiowrapper_encoder_reset(textio *self, int start_of_stream)
Antoine Pitroue4501852009-05-14 18:55:55 +00002032{
2033 PyObject *res;
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002034 if (start_of_stream) {
Antoine Pitroue4501852009-05-14 18:55:55 +00002035 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2036 self->encoding_start_of_stream = 1;
2037 }
2038 else {
2039 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2040 _PyIO_zero, NULL);
2041 self->encoding_start_of_stream = 0;
2042 }
2043 if (res == NULL)
2044 return -1;
2045 Py_DECREF(res);
2046 return 0;
2047}
2048
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002049static int
2050_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2051{
2052 /* Same as _textiowrapper_decoder_setstate() above. */
2053 return _textiowrapper_encoder_reset(
2054 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2055}
2056
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002057/*[clinic input]
2058_io.TextIOWrapper.seek
2059 cookie as cookieObj: object
2060 whence: int = 0
2061 /
2062[clinic start generated code]*/
2063
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002064static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002065_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2066/*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002067{
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002068 PyObject *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002069 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002070 PyObject *res;
2071 int cmp;
2072
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002073 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002074 CHECK_CLOSED(self);
2075
2076 Py_INCREF(cookieObj);
2077
2078 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002079 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002080 goto fail;
2081 }
2082
2083 if (whence == 1) {
2084 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002085 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002086 if (cmp < 0)
2087 goto fail;
2088
2089 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002090 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002091 goto fail;
2092 }
2093
2094 /* Seeking to the current position should attempt to
2095 * sync the underlying buffer with the current position.
2096 */
2097 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002098 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002099 if (cookieObj == NULL)
2100 goto fail;
2101 }
2102 else if (whence == 2) {
2103 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002104 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002105 if (cmp < 0)
2106 goto fail;
2107
2108 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002109 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002110 goto fail;
2111 }
2112
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002113 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002114 if (res == NULL)
2115 goto fail;
2116 Py_DECREF(res);
2117
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002118 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002119 Py_CLEAR(self->snapshot);
2120 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002121 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002122 if (res == NULL)
2123 goto fail;
2124 Py_DECREF(res);
2125 }
2126
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002127 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002128 Py_CLEAR(cookieObj);
2129 if (res == NULL)
2130 goto fail;
2131 if (self->encoder) {
2132 /* If seek() == 0, we are at the start of stream, otherwise not */
2133 cmp = PyObject_RichCompareBool(res, _PyIO_zero, Py_EQ);
2134 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2135 Py_DECREF(res);
2136 goto fail;
2137 }
2138 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002139 return res;
2140 }
2141 else if (whence != 0) {
2142 PyErr_Format(PyExc_ValueError,
2143 "invalid whence (%d, should be 0, 1 or 2)", whence);
2144 goto fail;
2145 }
2146
Antoine Pitroue4501852009-05-14 18:55:55 +00002147 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002148 if (cmp < 0)
2149 goto fail;
2150
2151 if (cmp == 1) {
2152 PyErr_Format(PyExc_ValueError,
2153 "negative seek position %R", cookieObj);
2154 goto fail;
2155 }
2156
2157 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2158 if (res == NULL)
2159 goto fail;
2160 Py_DECREF(res);
2161
2162 /* The strategy of seek() is to go back to the safe start point
2163 * and replay the effect of read(chars_to_skip) from there.
2164 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002165 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002166 goto fail;
2167
2168 /* Seek back to the safe start point. */
2169 posobj = PyLong_FromOff_t(cookie.start_pos);
2170 if (posobj == NULL)
2171 goto fail;
2172 res = PyObject_CallMethodObjArgs(self->buffer,
2173 _PyIO_str_seek, posobj, NULL);
2174 Py_DECREF(posobj);
2175 if (res == NULL)
2176 goto fail;
2177 Py_DECREF(res);
2178
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002179 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002180 Py_CLEAR(self->snapshot);
2181
2182 /* Restore the decoder to its state from the safe start point. */
2183 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002184 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002185 goto fail;
2186 }
2187
2188 if (cookie.chars_to_skip) {
2189 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002190 PyObject *input_chunk = _PyObject_CallMethodId(
2191 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002192 PyObject *decoded;
2193
2194 if (input_chunk == NULL)
2195 goto fail;
2196
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002197 if (!PyBytes_Check(input_chunk)) {
2198 PyErr_Format(PyExc_TypeError,
2199 "underlying read() should have returned a bytes "
2200 "object, not '%.200s'",
2201 Py_TYPE(input_chunk)->tp_name);
2202 Py_DECREF(input_chunk);
2203 goto fail;
2204 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002205
2206 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2207 if (self->snapshot == NULL) {
2208 Py_DECREF(input_chunk);
2209 goto fail;
2210 }
2211
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002212 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2213 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002214
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002215 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002216 goto fail;
2217
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002218 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002219
2220 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002221 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002222 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2223 goto fail;
2224 }
2225 self->decoded_chars_used = cookie.chars_to_skip;
2226 }
2227 else {
2228 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2229 if (self->snapshot == NULL)
2230 goto fail;
2231 }
2232
Antoine Pitroue4501852009-05-14 18:55:55 +00002233 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2234 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002235 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002236 goto fail;
2237 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002238 return cookieObj;
2239 fail:
2240 Py_XDECREF(cookieObj);
2241 return NULL;
2242
2243}
2244
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002245/*[clinic input]
2246_io.TextIOWrapper.tell
2247[clinic start generated code]*/
2248
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002249static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002250_io_TextIOWrapper_tell_impl(textio *self)
2251/*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002252{
2253 PyObject *res;
2254 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002255 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002256 PyObject *next_input;
2257 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002258 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002259 PyObject *saved_state = NULL;
2260 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002261 Py_ssize_t dec_buffer_len;
2262 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002263
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002264 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002265 CHECK_CLOSED(self);
2266
2267 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002268 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002269 goto fail;
2270 }
2271 if (!self->telling) {
2272 PyErr_SetString(PyExc_IOError,
2273 "telling position disabled by next() call");
2274 goto fail;
2275 }
2276
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002277 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002278 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002279 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002280 if (res == NULL)
2281 goto fail;
2282 Py_DECREF(res);
2283
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002284 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002285 if (posobj == NULL)
2286 goto fail;
2287
2288 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002289 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002290 return posobj;
2291 }
2292
2293#if defined(HAVE_LARGEFILE_SUPPORT)
2294 cookie.start_pos = PyLong_AsLongLong(posobj);
2295#else
2296 cookie.start_pos = PyLong_AsLong(posobj);
2297#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002298 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002299 if (PyErr_Occurred())
2300 goto fail;
2301
2302 /* Skip backward to the snapshot point (see _read_chunk). */
Serhiy Storchakabb72c472015-04-19 20:38:19 +03002303 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002304 goto fail;
2305
2306 assert (PyBytes_Check(next_input));
2307
2308 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2309
2310 /* How many decoded characters have been used up since the snapshot? */
2311 if (self->decoded_chars_used == 0) {
2312 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002313 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002314 }
2315
2316 chars_to_skip = self->decoded_chars_used;
2317
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002318 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002319 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2320 _PyIO_str_getstate, NULL);
2321 if (saved_state == NULL)
2322 goto fail;
2323
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002324#define DECODER_GETSTATE() do { \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002325 PyObject *dec_buffer; \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002326 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2327 _PyIO_str_getstate, NULL); \
2328 if (_state == NULL) \
2329 goto fail; \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002330 if (!PyArg_ParseTuple(_state, "Oi", &dec_buffer, &dec_flags)) { \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002331 Py_DECREF(_state); \
2332 goto fail; \
2333 } \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002334 if (!PyBytes_Check(dec_buffer)) { \
2335 PyErr_Format(PyExc_TypeError, \
2336 "decoder getstate() should have returned a bytes " \
2337 "object, not '%.200s'", \
2338 Py_TYPE(dec_buffer)->tp_name); \
2339 Py_DECREF(_state); \
2340 goto fail; \
2341 } \
2342 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002343 Py_DECREF(_state); \
2344 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002345
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002346#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002347 PyObject *_decoded = _PyObject_CallMethodId( \
2348 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002349 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002350 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002351 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002352 Py_DECREF(_decoded); \
2353 } while (0)
2354
2355 /* Fast search for an acceptable start point, close to our
2356 current pos */
2357 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2358 skip_back = 1;
2359 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2360 input = PyBytes_AS_STRING(next_input);
2361 while (skip_bytes > 0) {
2362 /* Decode up to temptative start point */
2363 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2364 goto fail;
2365 DECODER_DECODE(input, skip_bytes, chars_decoded);
2366 if (chars_decoded <= chars_to_skip) {
2367 DECODER_GETSTATE();
2368 if (dec_buffer_len == 0) {
2369 /* Before pos and no bytes buffered in decoder => OK */
2370 cookie.dec_flags = dec_flags;
2371 chars_to_skip -= chars_decoded;
2372 break;
2373 }
2374 /* Skip back by buffered amount and reset heuristic */
2375 skip_bytes -= dec_buffer_len;
2376 skip_back = 1;
2377 }
2378 else {
2379 /* We're too far ahead, skip back a bit */
2380 skip_bytes -= skip_back;
2381 skip_back *= 2;
2382 }
2383 }
2384 if (skip_bytes <= 0) {
2385 skip_bytes = 0;
2386 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2387 goto fail;
2388 }
2389
2390 /* Note our initial start point. */
2391 cookie.start_pos += skip_bytes;
Victor Stinner9a282972013-06-24 23:01:33 +02002392 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002393 if (chars_to_skip == 0)
2394 goto finally;
2395
2396 /* We should be close to the desired position. Now feed the decoder one
2397 * byte at a time until we reach the `chars_to_skip` target.
2398 * As we go, note the nearest "safe start point" before the current
2399 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002400 * can safely start from there and advance to this location).
2401 */
2402 chars_decoded = 0;
2403 input = PyBytes_AS_STRING(next_input);
2404 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002405 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002406 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002407 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002408
Serhiy Storchakaec67d182013-08-20 20:04:47 +03002409 DECODER_DECODE(input, (Py_ssize_t)1, n);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002410 /* We got n chars for 1 byte */
2411 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002412 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002413 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002414
2415 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2416 /* Decoder buffer is empty, so this is a safe start point. */
2417 cookie.start_pos += cookie.bytes_to_feed;
2418 chars_to_skip -= chars_decoded;
2419 cookie.dec_flags = dec_flags;
2420 cookie.bytes_to_feed = 0;
2421 chars_decoded = 0;
2422 }
2423 if (chars_decoded >= chars_to_skip)
2424 break;
2425 input++;
2426 }
2427 if (input == input_end) {
2428 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002429 PyObject *decoded = _PyObject_CallMethodId(
2430 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002431 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002432 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002433 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002434 Py_DECREF(decoded);
2435 cookie.need_eof = 1;
2436
2437 if (chars_decoded < chars_to_skip) {
2438 PyErr_SetString(PyExc_IOError,
2439 "can't reconstruct logical file position");
2440 goto fail;
2441 }
2442 }
2443
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002444finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002445 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002446 Py_DECREF(saved_state);
2447 if (res == NULL)
2448 return NULL;
2449 Py_DECREF(res);
2450
2451 /* The returned cookie corresponds to the last safe start point. */
2452 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002453 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002454
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002455fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002456 if (saved_state) {
2457 PyObject *type, *value, *traceback;
2458 PyErr_Fetch(&type, &value, &traceback);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002459 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002460 _PyErr_ChainExceptions(type, value, traceback);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002461 Py_DECREF(saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002462 Py_XDECREF(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002463 }
2464 return NULL;
2465}
2466
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002467/*[clinic input]
2468_io.TextIOWrapper.truncate
2469 pos: object = None
2470 /
2471[clinic start generated code]*/
2472
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002473static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002474_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2475/*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002476{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002477 PyObject *res;
2478
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002479 CHECK_ATTACHED(self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002480
2481 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2482 if (res == NULL)
2483 return NULL;
2484 Py_DECREF(res);
2485
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002486 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002487}
2488
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002489static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002490textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002491{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002492 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002493
2494 CHECK_INITIALIZED(self);
2495
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002496 res = PyUnicode_FromString("<_io.TextIOWrapper");
2497 if (res == NULL)
2498 return NULL;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002499
Martin v. Löwis767046a2011-10-14 15:35:36 +02002500 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002501 if (nameobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002502 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitrou716c4442009-05-23 19:04:03 +00002503 PyErr_Clear();
2504 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002505 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002506 }
2507 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002508 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002509 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002510 if (s == NULL)
2511 goto error;
2512 PyUnicode_AppendAndDel(&res, s);
2513 if (res == NULL)
2514 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002515 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002516 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002517 if (modeobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002518 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002519 PyErr_Clear();
2520 else
2521 goto error;
2522 }
2523 else {
2524 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2525 Py_DECREF(modeobj);
2526 if (s == NULL)
2527 goto error;
2528 PyUnicode_AppendAndDel(&res, s);
2529 if (res == NULL)
2530 return NULL;
2531 }
2532 s = PyUnicode_FromFormat("%U encoding=%R>",
2533 res, self->encoding);
2534 Py_DECREF(res);
2535 return s;
2536error:
2537 Py_XDECREF(res);
2538 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002539}
2540
2541
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002542/* Inquiries */
2543
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002544/*[clinic input]
2545_io.TextIOWrapper.fileno
2546[clinic start generated code]*/
2547
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002548static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002549_io_TextIOWrapper_fileno_impl(textio *self)
2550/*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002551{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002552 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002553 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002554}
2555
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002556/*[clinic input]
2557_io.TextIOWrapper.seekable
2558[clinic start generated code]*/
2559
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002560static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002561_io_TextIOWrapper_seekable_impl(textio *self)
2562/*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002563{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002564 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002565 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002566}
2567
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002568/*[clinic input]
2569_io.TextIOWrapper.readable
2570[clinic start generated code]*/
2571
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002572static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002573_io_TextIOWrapper_readable_impl(textio *self)
2574/*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002575{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002576 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002577 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002578}
2579
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002580/*[clinic input]
2581_io.TextIOWrapper.writable
2582[clinic start generated code]*/
2583
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002584static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002585_io_TextIOWrapper_writable_impl(textio *self)
2586/*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002587{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002588 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002589 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002590}
2591
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002592/*[clinic input]
2593_io.TextIOWrapper.isatty
2594[clinic start generated code]*/
2595
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002596static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002597_io_TextIOWrapper_isatty_impl(textio *self)
2598/*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002599{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002600 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002601 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002602}
2603
2604static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002605textiowrapper_getstate(textio *self, PyObject *args)
2606{
2607 PyErr_Format(PyExc_TypeError,
2608 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2609 return NULL;
2610}
2611
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002612/*[clinic input]
2613_io.TextIOWrapper.flush
2614[clinic start generated code]*/
2615
Antoine Pitrou243757e2010-11-05 21:15:39 +00002616static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002617_io_TextIOWrapper_flush_impl(textio *self)
2618/*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002619{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002620 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002621 CHECK_CLOSED(self);
2622 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002623 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002624 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002625 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002626}
2627
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002628/*[clinic input]
2629_io.TextIOWrapper.close
2630[clinic start generated code]*/
2631
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002632static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002633_io_TextIOWrapper_close_impl(textio *self)
2634/*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002635{
2636 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002637 int r;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002638 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002639
Antoine Pitrou6be88762010-05-03 16:48:20 +00002640 res = textiowrapper_closed_get(self, NULL);
2641 if (res == NULL)
2642 return NULL;
2643 r = PyObject_IsTrue(res);
2644 Py_DECREF(res);
2645 if (r < 0)
2646 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002647
Antoine Pitrou6be88762010-05-03 16:48:20 +00002648 if (r > 0) {
2649 Py_RETURN_NONE; /* stream already closed */
2650 }
2651 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06002652 PyObject *exc = NULL, *val, *tb;
Antoine Pitrou796564c2013-07-30 19:59:21 +02002653 if (self->finalizing) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002654 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002655 if (res)
2656 Py_DECREF(res);
2657 else
2658 PyErr_Clear();
2659 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002660 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson68623612012-12-20 11:53:11 -06002661 if (res == NULL)
2662 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002663 else
2664 Py_DECREF(res);
2665
Benjamin Peterson68623612012-12-20 11:53:11 -06002666 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
2667 if (exc != NULL) {
Serhiy Storchakae2bd2a72014-10-08 22:31:52 +03002668 _PyErr_ChainExceptions(exc, val, tb);
2669 Py_CLEAR(res);
Benjamin Peterson68623612012-12-20 11:53:11 -06002670 }
2671 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002672 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002673}
2674
2675static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002676textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002677{
2678 PyObject *line;
2679
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002680 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002681
2682 self->telling = 0;
2683 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2684 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002685 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002686 }
2687 else {
2688 line = PyObject_CallMethodObjArgs((PyObject *)self,
2689 _PyIO_str_readline, NULL);
2690 if (line && !PyUnicode_Check(line)) {
2691 PyErr_Format(PyExc_IOError,
2692 "readline() should have returned an str object, "
2693 "not '%.200s'", Py_TYPE(line)->tp_name);
2694 Py_DECREF(line);
2695 return NULL;
2696 }
2697 }
2698
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002699 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002700 return NULL;
2701
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002702 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002703 /* Reached EOF or would have blocked */
2704 Py_DECREF(line);
2705 Py_CLEAR(self->snapshot);
2706 self->telling = self->seekable;
2707 return NULL;
2708 }
2709
2710 return line;
2711}
2712
2713static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002714textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002715{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002716 CHECK_ATTACHED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002717 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002718}
2719
2720static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002721textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002722{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002723 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002724 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2725}
2726
2727static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002728textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002729{
2730 PyObject *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002731 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002732 if (self->decoder == NULL)
2733 Py_RETURN_NONE;
2734 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2735 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002736 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2737 PyErr_Clear();
2738 Py_RETURN_NONE;
2739 }
2740 else {
2741 return NULL;
2742 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002743 }
2744 return res;
2745}
2746
2747static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002748textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002749{
2750 CHECK_INITIALIZED(self);
2751 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2752}
2753
2754static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002755textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002756{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002757 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002758 return PyLong_FromSsize_t(self->chunk_size);
2759}
2760
2761static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002762textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002763{
2764 Py_ssize_t n;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002765 CHECK_ATTACHED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002766 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002767 if (n == -1 && PyErr_Occurred())
2768 return -1;
2769 if (n <= 0) {
2770 PyErr_SetString(PyExc_ValueError,
2771 "a strictly positive integer is required");
2772 return -1;
2773 }
2774 self->chunk_size = n;
2775 return 0;
2776}
2777
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002778#include "clinic/textio.c.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002779
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002780static PyMethodDef incrementalnewlinedecoder_methods[] = {
2781 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
2782 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
2783 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
2784 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
2785 {NULL}
2786};
2787
2788static PyGetSetDef incrementalnewlinedecoder_getset[] = {
2789 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
2790 {NULL}
2791};
2792
2793PyTypeObject PyIncrementalNewlineDecoder_Type = {
2794 PyVarObject_HEAD_INIT(NULL, 0)
2795 "_io.IncrementalNewlineDecoder", /*tp_name*/
2796 sizeof(nldecoder_object), /*tp_basicsize*/
2797 0, /*tp_itemsize*/
2798 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
2799 0, /*tp_print*/
2800 0, /*tp_getattr*/
2801 0, /*tp_setattr*/
2802 0, /*tp_compare */
2803 0, /*tp_repr*/
2804 0, /*tp_as_number*/
2805 0, /*tp_as_sequence*/
2806 0, /*tp_as_mapping*/
2807 0, /*tp_hash */
2808 0, /*tp_call*/
2809 0, /*tp_str*/
2810 0, /*tp_getattro*/
2811 0, /*tp_setattro*/
2812 0, /*tp_as_buffer*/
2813 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
2814 _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
2815 0, /* tp_traverse */
2816 0, /* tp_clear */
2817 0, /* tp_richcompare */
2818 0, /*tp_weaklistoffset*/
2819 0, /* tp_iter */
2820 0, /* tp_iternext */
2821 incrementalnewlinedecoder_methods, /* tp_methods */
2822 0, /* tp_members */
2823 incrementalnewlinedecoder_getset, /* tp_getset */
2824 0, /* tp_base */
2825 0, /* tp_dict */
2826 0, /* tp_descr_get */
2827 0, /* tp_descr_set */
2828 0, /* tp_dictoffset */
2829 _io_IncrementalNewlineDecoder___init__, /* tp_init */
2830 0, /* tp_alloc */
2831 PyType_GenericNew, /* tp_new */
2832};
2833
2834
2835static PyMethodDef textiowrapper_methods[] = {
2836 _IO_TEXTIOWRAPPER_DETACH_METHODDEF
2837 _IO_TEXTIOWRAPPER_WRITE_METHODDEF
2838 _IO_TEXTIOWRAPPER_READ_METHODDEF
2839 _IO_TEXTIOWRAPPER_READLINE_METHODDEF
2840 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
2841 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
2842
2843 _IO_TEXTIOWRAPPER_FILENO_METHODDEF
2844 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
2845 _IO_TEXTIOWRAPPER_READABLE_METHODDEF
2846 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
2847 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
Antoine Pitrou243757e2010-11-05 21:15:39 +00002848 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002849
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002850 _IO_TEXTIOWRAPPER_SEEK_METHODDEF
2851 _IO_TEXTIOWRAPPER_TELL_METHODDEF
2852 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002853 {NULL, NULL}
2854};
2855
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002856static PyMemberDef textiowrapper_members[] = {
2857 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2858 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2859 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Antoine Pitrou796564c2013-07-30 19:59:21 +02002860 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002861 {NULL}
2862};
2863
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002864static PyGetSetDef textiowrapper_getset[] = {
2865 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2866 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002867/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2868*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002869 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2870 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2871 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2872 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002873 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002874};
2875
2876PyTypeObject PyTextIOWrapper_Type = {
2877 PyVarObject_HEAD_INIT(NULL, 0)
2878 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002879 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002880 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002881 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002882 0, /*tp_print*/
2883 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002884 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002885 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002886 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002887 0, /*tp_as_number*/
2888 0, /*tp_as_sequence*/
2889 0, /*tp_as_mapping*/
2890 0, /*tp_hash */
2891 0, /*tp_call*/
2892 0, /*tp_str*/
2893 0, /*tp_getattro*/
2894 0, /*tp_setattro*/
2895 0, /*tp_as_buffer*/
2896 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
Antoine Pitrou796564c2013-07-30 19:59:21 +02002897 | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002898 _io_TextIOWrapper___init____doc__, /* tp_doc */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002899 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2900 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002901 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002902 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002903 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002904 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2905 textiowrapper_methods, /* tp_methods */
2906 textiowrapper_members, /* tp_members */
2907 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002908 0, /* tp_base */
2909 0, /* tp_dict */
2910 0, /* tp_descr_get */
2911 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002912 offsetof(textio, dict), /*tp_dictoffset*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002913 _io_TextIOWrapper___init__, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002914 0, /* tp_alloc */
2915 PyType_GenericNew, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +02002916 0, /* tp_free */
2917 0, /* tp_is_gc */
2918 0, /* tp_bases */
2919 0, /* tp_mro */
2920 0, /* tp_cache */
2921 0, /* tp_subclasses */
2922 0, /* tp_weaklist */
2923 0, /* tp_del */
2924 0, /* tp_version_tag */
2925 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002926};