blob: 83a2465d4d26b832d6d1883084a802cd14781871 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001#define PY_SSIZE_T_CLEAN
Alexandre Vassalotti794652d2008-06-11 22:58:36 +00002#include "Python.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003#include "structmember.h"
4#include "_iomodule.h"
Alexandre Vassalotti794652d2008-06-11 22:58:36 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006/* Implementation note: the buffer is always at least one character longer
7 than the enclosed string, for proper functioning of _PyIO_find_line_ending.
8*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +00009
10typedef struct {
11 PyObject_HEAD
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020012 Py_UCS4 *buf;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +000013 Py_ssize_t pos;
14 Py_ssize_t string_size;
15 size_t buf_size;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000016
17 char ok; /* initialized? */
18 char closed;
19 char readuniversal;
20 char readtranslate;
21 PyObject *decoder;
22 PyObject *readnl;
23 PyObject *writenl;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020024
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000025 PyObject *dict;
26 PyObject *weakreflist;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000027} stringio;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +000028
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000029#define CHECK_INITIALIZED(self) \
30 if (self->ok <= 0) { \
31 PyErr_SetString(PyExc_ValueError, \
32 "I/O operation on uninitialized object"); \
33 return NULL; \
34 }
35
36#define CHECK_CLOSED(self) \
37 if (self->closed) { \
38 PyErr_SetString(PyExc_ValueError, \
39 "I/O operation on closed file"); \
40 return NULL; \
41 }
42
43PyDoc_STRVAR(stringio_doc,
44 "Text I/O implementation using an in-memory buffer.\n"
45 "\n"
46 "The initial_value argument sets the value of object. The newline\n"
47 "argument is like the one of TextIOWrapper's constructor.");
48
Alexandre Vassalotti794652d2008-06-11 22:58:36 +000049
50/* Internal routine for changing the size, in terms of characters, of the
51 buffer of StringIO objects. The caller should ensure that the 'size'
52 argument is non-negative. Returns 0 on success, -1 otherwise. */
53static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000054resize_buffer(stringio *self, size_t size)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +000055{
56 /* Here, unsigned types are used to avoid dealing with signed integer
57 overflow, which is undefined in C. */
58 size_t alloc = self->buf_size;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020059 Py_UCS4 *new_buf = NULL;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +000060
61 assert(self->buf != NULL);
62
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000063 /* Reserve one more char for line ending detection. */
64 size = size + 1;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +000065 /* For simplicity, stay in the range of the signed type. Anyway, Python
66 doesn't allow strings to be longer than this. */
67 if (size > PY_SSIZE_T_MAX)
68 goto overflow;
69
70 if (size < alloc / 2) {
71 /* Major downsize; resize down to exact size. */
72 alloc = size + 1;
73 }
74 else if (size < alloc) {
75 /* Within allocated size; quick exit */
76 return 0;
77 }
78 else if (size <= alloc * 1.125) {
79 /* Moderate upsize; overallocate similar to list_resize() */
80 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
81 }
82 else {
83 /* Major upsize; resize up to exact size */
84 alloc = size + 1;
85 }
86
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020087 if (alloc > PY_SIZE_MAX / sizeof(Py_UCS4))
Alexandre Vassalotti794652d2008-06-11 22:58:36 +000088 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020089 new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4));
Alexandre Vassalotti794652d2008-06-11 22:58:36 +000090 if (new_buf == NULL) {
91 PyErr_NoMemory();
92 return -1;
93 }
94 self->buf_size = alloc;
95 self->buf = new_buf;
96
97 return 0;
98
99 overflow:
100 PyErr_SetString(PyExc_OverflowError,
101 "new buffer size too large");
102 return -1;
103}
104
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000105/* Internal routine for writing a whole PyUnicode object to the buffer of a
106 StringIO object. Returns 0 on success, or -1 on error. */
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000107static Py_ssize_t
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000108write_str(stringio *self, PyObject *obj)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000109{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000110 Py_ssize_t len;
111 PyObject *decoded = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200112
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000113 assert(self->buf != NULL);
114 assert(self->pos >= 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000115
116 if (self->decoder != NULL) {
117 decoded = _PyIncrementalNewlineDecoder_decode(
118 self->decoder, obj, 1 /* always final */);
119 }
120 else {
121 decoded = obj;
122 Py_INCREF(decoded);
123 }
124 if (self->writenl) {
125 PyObject *translated = PyUnicode_Replace(
126 decoded, _PyIO_str_nl, self->writenl, -1);
127 Py_DECREF(decoded);
128 decoded = translated;
129 }
130 if (decoded == NULL)
131 return -1;
132
133 assert(PyUnicode_Check(decoded));
Victor Stinnere1335c72011-10-04 20:53:03 +0200134 if (PyUnicode_READY(decoded)) {
135 Py_DECREF(decoded);
136 return -1;
137 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200138 len = PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000139
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000140 assert(len >= 0);
141
142 /* This overflow check is not strictly necessary. However, it avoids us to
143 deal with funky things like comparing an unsigned and a signed
144 integer. */
145 if (self->pos > PY_SSIZE_T_MAX - len) {
146 PyErr_SetString(PyExc_OverflowError,
147 "new position too large");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000148 goto fail;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000149 }
150 if (self->pos + len > self->string_size) {
151 if (resize_buffer(self, self->pos + len) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000152 goto fail;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000153 }
154
155 if (self->pos > self->string_size) {
156 /* In case of overseek, pad with null bytes the buffer region between
157 the end of stream and the current position.
158
159 0 lo string_size hi
160 | |<---used--->|<----------available----------->|
161 | | <--to pad-->|<---to write---> |
Ezio Melotti13925002011-03-16 11:05:33 +0200162 0 buf position
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000163
164 */
165 memset(self->buf + self->string_size, '\0',
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200166 (self->pos - self->string_size) * sizeof(Py_UCS4));
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000167 }
168
169 /* Copy the data to the internal buffer, overwriting some of the
170 existing data if self->pos < self->string_size. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200171 if (!PyUnicode_AsUCS4(decoded,
172 self->buf + self->pos,
173 self->buf_size - self->pos,
174 0))
175 goto fail;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000176
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000177 /* Set the new length of the internal string if it has changed. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200178 self->pos += len;
179 if (self->string_size < self->pos)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000180 self->string_size = self->pos;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000181
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000182 Py_DECREF(decoded);
183 return 0;
184
185fail:
186 Py_XDECREF(decoded);
187 return -1;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000188}
189
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000190PyDoc_STRVAR(stringio_getvalue_doc,
191 "Retrieve the entire contents of the object.");
192
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000193static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000194stringio_getvalue(stringio *self)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000195{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000196 CHECK_INITIALIZED(self);
197 CHECK_CLOSED(self);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200198 return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf,
199 self->string_size);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000200}
201
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000202PyDoc_STRVAR(stringio_tell_doc,
203 "Tell the current file position.");
204
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000205static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000206stringio_tell(stringio *self)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000207{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000208 CHECK_INITIALIZED(self);
209 CHECK_CLOSED(self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000210 return PyLong_FromSsize_t(self->pos);
211}
212
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000213PyDoc_STRVAR(stringio_read_doc,
214 "Read at most n characters, returned as a string.\n"
215 "\n"
216 "If the argument is negative or omitted, read until EOF\n"
217 "is reached. Return an empty string at EOF.\n");
218
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000219static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000220stringio_read(stringio *self, PyObject *args)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000221{
222 Py_ssize_t size, n;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200223 Py_UCS4 *output;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000224 PyObject *arg = Py_None;
225
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000226 CHECK_INITIALIZED(self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000227 if (!PyArg_ParseTuple(args, "|O:read", &arg))
228 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000229 CHECK_CLOSED(self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000230
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000231 if (PyNumber_Check(arg)) {
232 size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
Amaury Forgeot d'Arc58fb9052008-09-30 20:22:44 +0000233 if (size == -1 && PyErr_Occurred())
234 return NULL;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000235 }
236 else if (arg == Py_None) {
237 /* Read until EOF is reached, by default. */
238 size = -1;
239 }
240 else {
241 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
242 Py_TYPE(arg)->tp_name);
243 return NULL;
244 }
245
246 /* adjust invalid sizes */
247 n = self->string_size - self->pos;
248 if (size < 0 || size > n) {
249 size = n;
250 if (size < 0)
251 size = 0;
252 }
253
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000254 output = self->buf + self->pos;
255 self->pos += size;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200256 return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000257}
258
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000259/* Internal helper, used by stringio_readline and stringio_iternext */
260static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000261_stringio_readline(stringio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000262{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200263 Py_UCS4 *start, *end, old_char;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000264 Py_ssize_t len, consumed;
265
266 /* In case of overseek, return the empty string */
267 if (self->pos >= self->string_size)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200268 return PyUnicode_New(0, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000269
270 start = self->buf + self->pos;
271 if (limit < 0 || limit > self->string_size - self->pos)
272 limit = self->string_size - self->pos;
273
274 end = start + limit;
275 old_char = *end;
276 *end = '\0';
277 len = _PyIO_find_line_ending(
278 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200279 PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000280 *end = old_char;
281 /* If we haven't found any line ending, we just return everything
282 (`consumed` is ignored). */
283 if (len < 0)
284 len = limit;
285 self->pos += len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200286 return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000287}
288
289PyDoc_STRVAR(stringio_readline_doc,
290 "Read until newline or EOF.\n"
291 "\n"
292 "Returns an empty string if EOF is hit immediately.\n");
293
294static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000295stringio_readline(stringio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000296{
297 PyObject *arg = Py_None;
298 Py_ssize_t limit = -1;
299
300 CHECK_INITIALIZED(self);
301 if (!PyArg_ParseTuple(args, "|O:readline", &arg))
302 return NULL;
303 CHECK_CLOSED(self);
304
305 if (PyNumber_Check(arg)) {
306 limit = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
307 if (limit == -1 && PyErr_Occurred())
308 return NULL;
309 }
310 else if (arg != Py_None) {
311 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
312 Py_TYPE(arg)->tp_name);
313 return NULL;
314 }
315 return _stringio_readline(self, limit);
316}
317
318static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000319stringio_iternext(stringio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000320{
321 PyObject *line;
322
323 CHECK_INITIALIZED(self);
324 CHECK_CLOSED(self);
325
326 if (Py_TYPE(self) == &PyStringIO_Type) {
327 /* Skip method call overhead for speed */
328 line = _stringio_readline(self, -1);
329 }
330 else {
331 /* XXX is subclassing StringIO really supported? */
332 line = PyObject_CallMethodObjArgs((PyObject *)self,
333 _PyIO_str_readline, NULL);
334 if (line && !PyUnicode_Check(line)) {
335 PyErr_Format(PyExc_IOError,
336 "readline() should have returned an str object, "
337 "not '%.200s'", Py_TYPE(line)->tp_name);
338 Py_DECREF(line);
339 return NULL;
340 }
341 }
342
343 if (line == NULL)
344 return NULL;
345
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200346 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000347 /* Reached EOF */
348 Py_DECREF(line);
349 return NULL;
350 }
351
352 return line;
353}
354
355PyDoc_STRVAR(stringio_truncate_doc,
356 "Truncate size to pos.\n"
357 "\n"
358 "The pos argument defaults to the current file position, as\n"
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000359 "returned by tell(). The current file position is unchanged.\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000360 "Returns the new absolute position.\n");
361
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000362static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000363stringio_truncate(stringio *self, PyObject *args)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000364{
365 Py_ssize_t size;
366 PyObject *arg = Py_None;
367
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000368 CHECK_INITIALIZED(self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000369 if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
370 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000371 CHECK_CLOSED(self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000372
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000373 if (PyNumber_Check(arg)) {
374 size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
Benjamin Petersonc9e435e2008-09-30 02:22:04 +0000375 if (size == -1 && PyErr_Occurred())
376 return NULL;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000377 }
378 else if (arg == Py_None) {
379 /* Truncate to current position if no argument is passed. */
380 size = self->pos;
381 }
382 else {
383 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
384 Py_TYPE(arg)->tp_name);
385 return NULL;
386 }
387
388 if (size < 0) {
389 PyErr_Format(PyExc_ValueError,
390 "Negative size value %zd", size);
391 return NULL;
392 }
393
394 if (size < self->string_size) {
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000395 if (resize_buffer(self, size) < 0)
396 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000397 self->string_size = size;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000398 }
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000399
400 return PyLong_FromSsize_t(size);
401}
402
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000403PyDoc_STRVAR(stringio_seek_doc,
404 "Change stream position.\n"
405 "\n"
406 "Seek to character offset pos relative to position indicated by whence:\n"
407 " 0 Start of stream (the default). pos should be >= 0;\n"
408 " 1 Current position - pos must be 0;\n"
409 " 2 End of stream - pos must be 0.\n"
410 "Returns the new absolute position.\n");
411
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000412static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000413stringio_seek(stringio *self, PyObject *args)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000414{
415 Py_ssize_t pos;
416 int mode = 0;
417
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000418 CHECK_INITIALIZED(self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000419 if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode))
420 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000421 CHECK_CLOSED(self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000422
423 if (mode != 0 && mode != 1 && mode != 2) {
424 PyErr_Format(PyExc_ValueError,
425 "Invalid whence (%i, should be 0, 1 or 2)", mode);
426 return NULL;
427 }
428 else if (pos < 0 && mode == 0) {
429 PyErr_Format(PyExc_ValueError,
430 "Negative seek position %zd", pos);
431 return NULL;
432 }
433 else if (mode != 0 && pos != 0) {
434 PyErr_SetString(PyExc_IOError,
435 "Can't do nonzero cur-relative seeks");
436 return NULL;
437 }
438
439 /* mode 0: offset relative to beginning of the string.
440 mode 1: no change to current position.
441 mode 2: change position to end of file. */
442 if (mode == 1) {
443 pos = self->pos;
444 }
445 else if (mode == 2) {
446 pos = self->string_size;
447 }
448
449 self->pos = pos;
450
451 return PyLong_FromSsize_t(self->pos);
452}
453
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000454PyDoc_STRVAR(stringio_write_doc,
455 "Write string to file.\n"
456 "\n"
457 "Returns the number of characters written, which is always equal to\n"
458 "the length of the string.\n");
459
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000460static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000461stringio_write(stringio *self, PyObject *obj)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000462{
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000463 Py_ssize_t size;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000464
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465 CHECK_INITIALIZED(self);
466 if (!PyUnicode_Check(obj)) {
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000467 PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'",
468 Py_TYPE(obj)->tp_name);
469 return NULL;
470 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200471 if (PyUnicode_READY(obj))
472 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000473 CHECK_CLOSED(self);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200474 size = PyUnicode_GET_LENGTH(obj);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000475
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000476 if (size > 0 && write_str(self, obj) < 0)
477 return NULL;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000478
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000479 return PyLong_FromSsize_t(size);
480}
481
482PyDoc_STRVAR(stringio_close_doc,
483 "Close the IO object. Attempting any further operation after the\n"
484 "object is closed will raise a ValueError.\n"
485 "\n"
486 "This method has no effect if the file is already closed.\n");
487
488static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000489stringio_close(stringio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000490{
491 self->closed = 1;
492 /* Free up some memory */
493 if (resize_buffer(self, 0) < 0)
494 return NULL;
495 Py_CLEAR(self->readnl);
496 Py_CLEAR(self->writenl);
497 Py_CLEAR(self->decoder);
498 Py_RETURN_NONE;
499}
500
501static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000502stringio_traverse(stringio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000503{
504 Py_VISIT(self->dict);
505 return 0;
506}
507
508static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000509stringio_clear(stringio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000510{
511 Py_CLEAR(self->dict);
512 return 0;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000513}
514
515static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000516stringio_dealloc(stringio *self)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000517{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000518 _PyObject_GC_UNTRACK(self);
Alexandre Vassalottifc477042009-07-22 02:24:49 +0000519 self->ok = 0;
520 if (self->buf) {
521 PyMem_Free(self->buf);
522 self->buf = NULL;
523 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000524 Py_CLEAR(self->readnl);
525 Py_CLEAR(self->writenl);
526 Py_CLEAR(self->decoder);
Alexandre Vassalottifc477042009-07-22 02:24:49 +0000527 Py_CLEAR(self->dict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000528 if (self->weakreflist != NULL)
529 PyObject_ClearWeakRefs((PyObject *) self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000530 Py_TYPE(self)->tp_free(self);
531}
532
533static PyObject *
534stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
535{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000536 stringio *self;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000537
538 assert(type != NULL && type->tp_alloc != NULL);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000539 self = (stringio *)type->tp_alloc(type, 0);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000540 if (self == NULL)
541 return NULL;
542
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000543 /* tp_alloc initializes all the fields to zero. So we don't have to
544 initialize them here. */
545
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200546 self->buf = (Py_UCS4 *)PyMem_Malloc(0);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000547 if (self->buf == NULL) {
548 Py_DECREF(self);
549 return PyErr_NoMemory();
550 }
551
552 return (PyObject *)self;
553}
554
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000555static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000556stringio_init(stringio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000557{
558 char *kwlist[] = {"initial_value", "newline", NULL};
559 PyObject *value = NULL;
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +0000560 PyObject *newline_obj = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000561 char *newline = "\n";
562
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +0000563 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OO:__init__", kwlist,
564 &value, &newline_obj))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000565 return -1;
566
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +0000567 /* Parse the newline argument. This used to be done with the 'z'
568 specifier, however this allowed any object with the buffer interface to
569 be converted. Thus we have to parse it manually since we only want to
570 allow unicode objects or None. */
571 if (newline_obj == Py_None) {
572 newline = NULL;
573 }
574 else if (newline_obj) {
575 if (!PyUnicode_Check(newline_obj)) {
576 PyErr_Format(PyExc_TypeError,
577 "newline must be str or None, not %.200s",
578 Py_TYPE(newline_obj)->tp_name);
579 return -1;
580 }
581 newline = _PyUnicode_AsString(newline_obj);
582 if (newline == NULL)
583 return -1;
584 }
585
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000586 if (newline && newline[0] != '\0'
587 && !(newline[0] == '\n' && newline[1] == '\0')
588 && !(newline[0] == '\r' && newline[1] == '\0')
589 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
590 PyErr_Format(PyExc_ValueError,
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +0000591 "illegal newline value: %R", newline_obj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000592 return -1;
593 }
594 if (value && value != Py_None && !PyUnicode_Check(value)) {
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +0000595 PyErr_Format(PyExc_TypeError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000596 "initial_value must be str or None, not %.200s",
597 Py_TYPE(value)->tp_name);
598 return -1;
599 }
600
601 self->ok = 0;
602
603 Py_CLEAR(self->readnl);
604 Py_CLEAR(self->writenl);
605 Py_CLEAR(self->decoder);
606
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +0000607 assert((newline != NULL && newline_obj != Py_None) ||
608 (newline == NULL && newline_obj == Py_None));
609
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000610 if (newline) {
611 self->readnl = PyUnicode_FromString(newline);
612 if (self->readnl == NULL)
613 return -1;
614 }
615 self->readuniversal = (newline == NULL || newline[0] == '\0');
616 self->readtranslate = (newline == NULL);
617 /* If newline == "", we don't translate anything.
618 If newline == "\n" or newline == None, we translate to "\n", which is
619 a no-op.
620 (for newline == None, TextIOWrapper translates to os.sepline, but it
621 is pointless for StringIO)
622 */
623 if (newline != NULL && newline[0] == '\r') {
624 self->writenl = self->readnl;
625 Py_INCREF(self->writenl);
626 }
627
628 if (self->readuniversal) {
629 self->decoder = PyObject_CallFunction(
630 (PyObject *)&PyIncrementalNewlineDecoder_Type,
631 "Oi", Py_None, (int) self->readtranslate);
632 if (self->decoder == NULL)
633 return -1;
634 }
635
636 /* Now everything is set up, resize buffer to size of initial value,
637 and copy it */
638 self->string_size = 0;
639 if (value && value != Py_None) {
640 Py_ssize_t len = PyUnicode_GetSize(value);
641 /* This is a heuristic, for newline translation might change
642 the string length. */
643 if (resize_buffer(self, len) < 0)
644 return -1;
645 self->pos = 0;
646 if (write_str(self, value) < 0)
647 return -1;
648 }
649 else {
650 if (resize_buffer(self, 0) < 0)
651 return -1;
652 }
653 self->pos = 0;
654
655 self->closed = 0;
656 self->ok = 1;
657 return 0;
658}
659
660/* Properties and pseudo-properties */
661static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000662stringio_seekable(stringio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000663{
664 CHECK_INITIALIZED(self);
665 Py_RETURN_TRUE;
666}
667
668static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000669stringio_readable(stringio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000670{
671 CHECK_INITIALIZED(self);
672 Py_RETURN_TRUE;
673}
674
675static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000676stringio_writable(stringio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000677{
678 CHECK_INITIALIZED(self);
679 Py_RETURN_TRUE;
680}
681
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000682/* Pickling support.
683
684 The implementation of __getstate__ is similar to the one for BytesIO,
685 except that we also save the newline parameter. For __setstate__ and unlike
686 BytesIO, we call __init__ to restore the object's state. Doing so allows us
687 to avoid decoding the complex newline state while keeping the object
688 representation compact.
689
690 See comment in bytesio.c regarding why only pickle protocols and onward are
691 supported.
692*/
693
694static PyObject *
695stringio_getstate(stringio *self)
696{
697 PyObject *initvalue = stringio_getvalue(self);
698 PyObject *dict;
699 PyObject *state;
700
701 if (initvalue == NULL)
702 return NULL;
703 if (self->dict == NULL) {
704 Py_INCREF(Py_None);
705 dict = Py_None;
706 }
707 else {
708 dict = PyDict_Copy(self->dict);
709 if (dict == NULL)
710 return NULL;
711 }
712
713 state = Py_BuildValue("(OOnN)", initvalue,
714 self->readnl ? self->readnl : Py_None,
715 self->pos, dict);
716 Py_DECREF(initvalue);
717 return state;
718}
719
720static PyObject *
721stringio_setstate(stringio *self, PyObject *state)
722{
723 PyObject *initarg;
724 PyObject *position_obj;
725 PyObject *dict;
726 Py_ssize_t pos;
727
728 assert(state != NULL);
729 CHECK_CLOSED(self);
730
731 /* We allow the state tuple to be longer than 4, because we may need
732 someday to extend the object's state without breaking
733 backward-compatibility. */
734 if (!PyTuple_Check(state) || Py_SIZE(state) < 4) {
735 PyErr_Format(PyExc_TypeError,
736 "%.200s.__setstate__ argument should be 4-tuple, got %.200s",
737 Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
738 return NULL;
739 }
740
741 /* Initialize the object's state. */
742 initarg = PyTuple_GetSlice(state, 0, 2);
743 if (initarg == NULL)
744 return NULL;
745 if (stringio_init(self, initarg, NULL) < 0) {
746 Py_DECREF(initarg);
747 return NULL;
748 }
749 Py_DECREF(initarg);
750
751 /* Restore the buffer state. Even if __init__ did initialize the buffer,
752 we have to initialize it again since __init__ may translates the
753 newlines in the inital_value string. We clearly do not want that
754 because the string value in the state tuple has already been translated
755 once by __init__. So we do not take any chance and replace object's
756 buffer completely. */
757 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200758 PyObject *item;
759 Py_UCS4 *buf;
760 Py_ssize_t bufsize;
761
762 item = PyTuple_GET_ITEM(state, 0);
763 buf = PyUnicode_AsUCS4Copy(item);
764 if (buf == NULL)
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000765 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200766 bufsize = PyUnicode_GET_LENGTH(item);
767
768 if (resize_buffer(self, bufsize) < 0) {
769 PyMem_Free(buf);
770 return NULL;
771 }
772 memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4));
773 PyMem_Free(buf);
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000774 self->string_size = bufsize;
775 }
776
777 /* Set carefully the position value. Alternatively, we could use the seek
778 method instead of modifying self->pos directly to better protect the
779 object internal state against errneous (or malicious) inputs. */
780 position_obj = PyTuple_GET_ITEM(state, 2);
781 if (!PyLong_Check(position_obj)) {
782 PyErr_Format(PyExc_TypeError,
783 "third item of state must be an integer, got %.200s",
784 Py_TYPE(position_obj)->tp_name);
785 return NULL;
786 }
787 pos = PyLong_AsSsize_t(position_obj);
788 if (pos == -1 && PyErr_Occurred())
789 return NULL;
790 if (pos < 0) {
791 PyErr_SetString(PyExc_ValueError,
792 "position value cannot be negative");
793 return NULL;
794 }
795 self->pos = pos;
796
797 /* Set the dictionary of the instance variables. */
798 dict = PyTuple_GET_ITEM(state, 3);
799 if (dict != Py_None) {
800 if (!PyDict_Check(dict)) {
801 PyErr_Format(PyExc_TypeError,
802 "fourth item of state should be a dict, got a %.200s",
803 Py_TYPE(dict)->tp_name);
804 return NULL;
805 }
806 if (self->dict) {
807 /* Alternatively, we could replace the internal dictionary
808 completely. However, it seems more practical to just update it. */
809 if (PyDict_Update(self->dict, dict) < 0)
810 return NULL;
811 }
812 else {
813 Py_INCREF(dict);
814 self->dict = dict;
815 }
816 }
817
818 Py_RETURN_NONE;
819}
820
821
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000822static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000823stringio_closed(stringio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000824{
825 CHECK_INITIALIZED(self);
826 return PyBool_FromLong(self->closed);
827}
828
829static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000830stringio_line_buffering(stringio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000831{
832 CHECK_INITIALIZED(self);
833 CHECK_CLOSED(self);
834 Py_RETURN_FALSE;
835}
836
837static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000838stringio_newlines(stringio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000839{
840 CHECK_INITIALIZED(self);
841 CHECK_CLOSED(self);
842 if (self->decoder == NULL)
843 Py_RETURN_NONE;
844 return PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
845}
846
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000847static struct PyMethodDef stringio_methods[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000848 {"close", (PyCFunction)stringio_close, METH_NOARGS, stringio_close_doc},
Antoine Pitroud5c3f6c2010-09-02 19:48:07 +0000849 {"getvalue", (PyCFunction)stringio_getvalue, METH_NOARGS, stringio_getvalue_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000850 {"read", (PyCFunction)stringio_read, METH_VARARGS, stringio_read_doc},
851 {"readline", (PyCFunction)stringio_readline, METH_VARARGS, stringio_readline_doc},
852 {"tell", (PyCFunction)stringio_tell, METH_NOARGS, stringio_tell_doc},
853 {"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, stringio_truncate_doc},
854 {"seek", (PyCFunction)stringio_seek, METH_VARARGS, stringio_seek_doc},
855 {"write", (PyCFunction)stringio_write, METH_O, stringio_write_doc},
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000856
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000857 {"seekable", (PyCFunction)stringio_seekable, METH_NOARGS},
858 {"readable", (PyCFunction)stringio_readable, METH_NOARGS},
859 {"writable", (PyCFunction)stringio_writable, METH_NOARGS},
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000860
861 {"__getstate__", (PyCFunction)stringio_getstate, METH_NOARGS},
862 {"__setstate__", (PyCFunction)stringio_setstate, METH_O},
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000863 {NULL, NULL} /* sentinel */
864};
865
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000866static PyGetSetDef stringio_getset[] = {
867 {"closed", (getter)stringio_closed, NULL, NULL},
868 {"newlines", (getter)stringio_newlines, NULL, NULL},
869 /* (following comments straight off of the original Python wrapper:)
870 XXX Cruft to support the TextIOWrapper API. This would only
871 be meaningful if StringIO supported the buffer attribute.
872 Hopefully, a better solution, than adding these pseudo-attributes,
873 will be found.
874 */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000875 {"line_buffering", (getter)stringio_line_buffering, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000876 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000877};
878
879PyTypeObject PyStringIO_Type = {
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000880 PyVarObject_HEAD_INIT(NULL, 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000881 "_io.StringIO", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000882 sizeof(stringio), /*tp_basicsize*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000883 0, /*tp_itemsize*/
884 (destructor)stringio_dealloc, /*tp_dealloc*/
885 0, /*tp_print*/
886 0, /*tp_getattr*/
887 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +0000888 0, /*tp_reserved*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000889 0, /*tp_repr*/
890 0, /*tp_as_number*/
891 0, /*tp_as_sequence*/
892 0, /*tp_as_mapping*/
893 0, /*tp_hash*/
894 0, /*tp_call*/
895 0, /*tp_str*/
896 0, /*tp_getattro*/
897 0, /*tp_setattro*/
898 0, /*tp_as_buffer*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000899 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
900 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
901 stringio_doc, /*tp_doc*/
902 (traverseproc)stringio_traverse, /*tp_traverse*/
903 (inquiry)stringio_clear, /*tp_clear*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000904 0, /*tp_richcompare*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000905 offsetof(stringio, weakreflist), /*tp_weaklistoffset*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000906 0, /*tp_iter*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000907 (iternextfunc)stringio_iternext, /*tp_iternext*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000908 stringio_methods, /*tp_methods*/
909 0, /*tp_members*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000910 stringio_getset, /*tp_getset*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000911 0, /*tp_base*/
912 0, /*tp_dict*/
913 0, /*tp_descr_get*/
914 0, /*tp_descr_set*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000915 offsetof(stringio, dict), /*tp_dictoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000916 (initproc)stringio_init, /*tp_init*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000917 0, /*tp_alloc*/
918 stringio_new, /*tp_new*/
919};