blob: c40163f4def4c5a7fa6b95d772b068f8a0f8331a [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001#define PY_SSIZE_T_CLEAN
Alexandre Vassalotti794652d2008-06-11 22:58:36 +00002#include "Python.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003#include "structmember.h"
4#include "_iomodule.h"
Alexandre Vassalotti794652d2008-06-11 22:58:36 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006/* Implementation note: the buffer is always at least one character longer
7 than the enclosed string, for proper functioning of _PyIO_find_line_ending.
8*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +00009
10typedef struct {
11 PyObject_HEAD
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020012 Py_UCS4 *buf;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +000013 Py_ssize_t pos;
14 Py_ssize_t string_size;
15 size_t buf_size;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000016
17 char ok; /* initialized? */
18 char closed;
19 char readuniversal;
20 char readtranslate;
21 PyObject *decoder;
22 PyObject *readnl;
23 PyObject *writenl;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020024
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000025 PyObject *dict;
26 PyObject *weakreflist;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000027} stringio;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +000028
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000029#define CHECK_INITIALIZED(self) \
30 if (self->ok <= 0) { \
31 PyErr_SetString(PyExc_ValueError, \
32 "I/O operation on uninitialized object"); \
33 return NULL; \
34 }
35
36#define CHECK_CLOSED(self) \
37 if (self->closed) { \
38 PyErr_SetString(PyExc_ValueError, \
39 "I/O operation on closed file"); \
40 return NULL; \
41 }
42
43PyDoc_STRVAR(stringio_doc,
44 "Text I/O implementation using an in-memory buffer.\n"
45 "\n"
46 "The initial_value argument sets the value of object. The newline\n"
47 "argument is like the one of TextIOWrapper's constructor.");
48
Alexandre Vassalotti794652d2008-06-11 22:58:36 +000049
50/* Internal routine for changing the size, in terms of characters, of the
51 buffer of StringIO objects. The caller should ensure that the 'size'
52 argument is non-negative. Returns 0 on success, -1 otherwise. */
53static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000054resize_buffer(stringio *self, size_t size)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +000055{
56 /* Here, unsigned types are used to avoid dealing with signed integer
57 overflow, which is undefined in C. */
58 size_t alloc = self->buf_size;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020059 Py_UCS4 *new_buf = NULL;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +000060
61 assert(self->buf != NULL);
62
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000063 /* Reserve one more char for line ending detection. */
64 size = size + 1;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +000065 /* For simplicity, stay in the range of the signed type. Anyway, Python
66 doesn't allow strings to be longer than this. */
67 if (size > PY_SSIZE_T_MAX)
68 goto overflow;
69
70 if (size < alloc / 2) {
71 /* Major downsize; resize down to exact size. */
72 alloc = size + 1;
73 }
74 else if (size < alloc) {
75 /* Within allocated size; quick exit */
76 return 0;
77 }
78 else if (size <= alloc * 1.125) {
79 /* Moderate upsize; overallocate similar to list_resize() */
80 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
81 }
82 else {
83 /* Major upsize; resize up to exact size */
84 alloc = size + 1;
85 }
86
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020087 if (alloc > PY_SIZE_MAX / sizeof(Py_UCS4))
Alexandre Vassalotti794652d2008-06-11 22:58:36 +000088 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020089 new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4));
Alexandre Vassalotti794652d2008-06-11 22:58:36 +000090 if (new_buf == NULL) {
91 PyErr_NoMemory();
92 return -1;
93 }
94 self->buf_size = alloc;
95 self->buf = new_buf;
96
97 return 0;
98
99 overflow:
100 PyErr_SetString(PyExc_OverflowError,
101 "new buffer size too large");
102 return -1;
103}
104
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000105/* Internal routine for writing a whole PyUnicode object to the buffer of a
106 StringIO object. Returns 0 on success, or -1 on error. */
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000107static Py_ssize_t
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000108write_str(stringio *self, PyObject *obj)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000109{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000110 Py_ssize_t len;
111 PyObject *decoded = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200112
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000113 assert(self->buf != NULL);
114 assert(self->pos >= 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000115
116 if (self->decoder != NULL) {
117 decoded = _PyIncrementalNewlineDecoder_decode(
118 self->decoder, obj, 1 /* always final */);
119 }
120 else {
121 decoded = obj;
122 Py_INCREF(decoded);
123 }
124 if (self->writenl) {
125 PyObject *translated = PyUnicode_Replace(
126 decoded, _PyIO_str_nl, self->writenl, -1);
127 Py_DECREF(decoded);
128 decoded = translated;
129 }
130 if (decoded == NULL)
131 return -1;
132
133 assert(PyUnicode_Check(decoded));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200134 len = PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000135
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000136 assert(len >= 0);
137
138 /* This overflow check is not strictly necessary. However, it avoids us to
139 deal with funky things like comparing an unsigned and a signed
140 integer. */
141 if (self->pos > PY_SSIZE_T_MAX - len) {
142 PyErr_SetString(PyExc_OverflowError,
143 "new position too large");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000144 goto fail;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000145 }
146 if (self->pos + len > self->string_size) {
147 if (resize_buffer(self, self->pos + len) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000148 goto fail;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000149 }
150
151 if (self->pos > self->string_size) {
152 /* In case of overseek, pad with null bytes the buffer region between
153 the end of stream and the current position.
154
155 0 lo string_size hi
156 | |<---used--->|<----------available----------->|
157 | | <--to pad-->|<---to write---> |
Ezio Melotti13925002011-03-16 11:05:33 +0200158 0 buf position
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000159
160 */
161 memset(self->buf + self->string_size, '\0',
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200162 (self->pos - self->string_size) * sizeof(Py_UCS4));
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000163 }
164
165 /* Copy the data to the internal buffer, overwriting some of the
166 existing data if self->pos < self->string_size. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200167 if (!PyUnicode_AsUCS4(decoded,
168 self->buf + self->pos,
169 self->buf_size - self->pos,
170 0))
171 goto fail;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000172
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000173 /* Set the new length of the internal string if it has changed. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200174 self->pos += len;
175 if (self->string_size < self->pos)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000176 self->string_size = self->pos;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000177
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000178 Py_DECREF(decoded);
179 return 0;
180
181fail:
182 Py_XDECREF(decoded);
183 return -1;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000184}
185
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000186PyDoc_STRVAR(stringio_getvalue_doc,
187 "Retrieve the entire contents of the object.");
188
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000189static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000190stringio_getvalue(stringio *self)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000191{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000192 CHECK_INITIALIZED(self);
193 CHECK_CLOSED(self);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200194 return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf,
195 self->string_size);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000196}
197
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000198PyDoc_STRVAR(stringio_tell_doc,
199 "Tell the current file position.");
200
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000201static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000202stringio_tell(stringio *self)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000203{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000204 CHECK_INITIALIZED(self);
205 CHECK_CLOSED(self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000206 return PyLong_FromSsize_t(self->pos);
207}
208
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000209PyDoc_STRVAR(stringio_read_doc,
210 "Read at most n characters, returned as a string.\n"
211 "\n"
212 "If the argument is negative or omitted, read until EOF\n"
213 "is reached. Return an empty string at EOF.\n");
214
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000215static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000216stringio_read(stringio *self, PyObject *args)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000217{
218 Py_ssize_t size, n;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200219 Py_UCS4 *output;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000220 PyObject *arg = Py_None;
221
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000222 CHECK_INITIALIZED(self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000223 if (!PyArg_ParseTuple(args, "|O:read", &arg))
224 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000225 CHECK_CLOSED(self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000226
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000227 if (PyNumber_Check(arg)) {
228 size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
Amaury Forgeot d'Arc58fb9052008-09-30 20:22:44 +0000229 if (size == -1 && PyErr_Occurred())
230 return NULL;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000231 }
232 else if (arg == Py_None) {
233 /* Read until EOF is reached, by default. */
234 size = -1;
235 }
236 else {
237 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
238 Py_TYPE(arg)->tp_name);
239 return NULL;
240 }
241
242 /* adjust invalid sizes */
243 n = self->string_size - self->pos;
244 if (size < 0 || size > n) {
245 size = n;
246 if (size < 0)
247 size = 0;
248 }
249
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000250 output = self->buf + self->pos;
251 self->pos += size;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200252 return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000253}
254
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000255/* Internal helper, used by stringio_readline and stringio_iternext */
256static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000257_stringio_readline(stringio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000258{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200259 Py_UCS4 *start, *end, old_char;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000260 Py_ssize_t len, consumed;
261
262 /* In case of overseek, return the empty string */
263 if (self->pos >= self->string_size)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200264 return PyUnicode_New(0, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000265
266 start = self->buf + self->pos;
267 if (limit < 0 || limit > self->string_size - self->pos)
268 limit = self->string_size - self->pos;
269
270 end = start + limit;
271 old_char = *end;
272 *end = '\0';
273 len = _PyIO_find_line_ending(
274 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200275 PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000276 *end = old_char;
277 /* If we haven't found any line ending, we just return everything
278 (`consumed` is ignored). */
279 if (len < 0)
280 len = limit;
281 self->pos += len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200282 return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000283}
284
285PyDoc_STRVAR(stringio_readline_doc,
286 "Read until newline or EOF.\n"
287 "\n"
288 "Returns an empty string if EOF is hit immediately.\n");
289
290static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000291stringio_readline(stringio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000292{
293 PyObject *arg = Py_None;
294 Py_ssize_t limit = -1;
295
296 CHECK_INITIALIZED(self);
297 if (!PyArg_ParseTuple(args, "|O:readline", &arg))
298 return NULL;
299 CHECK_CLOSED(self);
300
301 if (PyNumber_Check(arg)) {
302 limit = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
303 if (limit == -1 && PyErr_Occurred())
304 return NULL;
305 }
306 else if (arg != Py_None) {
307 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
308 Py_TYPE(arg)->tp_name);
309 return NULL;
310 }
311 return _stringio_readline(self, limit);
312}
313
314static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000315stringio_iternext(stringio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000316{
317 PyObject *line;
318
319 CHECK_INITIALIZED(self);
320 CHECK_CLOSED(self);
321
322 if (Py_TYPE(self) == &PyStringIO_Type) {
323 /* Skip method call overhead for speed */
324 line = _stringio_readline(self, -1);
325 }
326 else {
327 /* XXX is subclassing StringIO really supported? */
328 line = PyObject_CallMethodObjArgs((PyObject *)self,
329 _PyIO_str_readline, NULL);
330 if (line && !PyUnicode_Check(line)) {
331 PyErr_Format(PyExc_IOError,
332 "readline() should have returned an str object, "
333 "not '%.200s'", Py_TYPE(line)->tp_name);
334 Py_DECREF(line);
335 return NULL;
336 }
337 }
338
339 if (line == NULL)
340 return NULL;
341
342 if (PyUnicode_GET_SIZE(line) == 0) {
343 /* Reached EOF */
344 Py_DECREF(line);
345 return NULL;
346 }
347
348 return line;
349}
350
351PyDoc_STRVAR(stringio_truncate_doc,
352 "Truncate size to pos.\n"
353 "\n"
354 "The pos argument defaults to the current file position, as\n"
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000355 "returned by tell(). The current file position is unchanged.\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000356 "Returns the new absolute position.\n");
357
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000358static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000359stringio_truncate(stringio *self, PyObject *args)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000360{
361 Py_ssize_t size;
362 PyObject *arg = Py_None;
363
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000364 CHECK_INITIALIZED(self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000365 if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
366 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000367 CHECK_CLOSED(self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000368
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369 if (PyNumber_Check(arg)) {
370 size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
Benjamin Petersonc9e435e2008-09-30 02:22:04 +0000371 if (size == -1 && PyErr_Occurred())
372 return NULL;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000373 }
374 else if (arg == Py_None) {
375 /* Truncate to current position if no argument is passed. */
376 size = self->pos;
377 }
378 else {
379 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
380 Py_TYPE(arg)->tp_name);
381 return NULL;
382 }
383
384 if (size < 0) {
385 PyErr_Format(PyExc_ValueError,
386 "Negative size value %zd", size);
387 return NULL;
388 }
389
390 if (size < self->string_size) {
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000391 if (resize_buffer(self, size) < 0)
392 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000393 self->string_size = size;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000394 }
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000395
396 return PyLong_FromSsize_t(size);
397}
398
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000399PyDoc_STRVAR(stringio_seek_doc,
400 "Change stream position.\n"
401 "\n"
402 "Seek to character offset pos relative to position indicated by whence:\n"
403 " 0 Start of stream (the default). pos should be >= 0;\n"
404 " 1 Current position - pos must be 0;\n"
405 " 2 End of stream - pos must be 0.\n"
406 "Returns the new absolute position.\n");
407
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000408static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000409stringio_seek(stringio *self, PyObject *args)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000410{
411 Py_ssize_t pos;
412 int mode = 0;
413
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000414 CHECK_INITIALIZED(self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000415 if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode))
416 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000417 CHECK_CLOSED(self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000418
419 if (mode != 0 && mode != 1 && mode != 2) {
420 PyErr_Format(PyExc_ValueError,
421 "Invalid whence (%i, should be 0, 1 or 2)", mode);
422 return NULL;
423 }
424 else if (pos < 0 && mode == 0) {
425 PyErr_Format(PyExc_ValueError,
426 "Negative seek position %zd", pos);
427 return NULL;
428 }
429 else if (mode != 0 && pos != 0) {
430 PyErr_SetString(PyExc_IOError,
431 "Can't do nonzero cur-relative seeks");
432 return NULL;
433 }
434
435 /* mode 0: offset relative to beginning of the string.
436 mode 1: no change to current position.
437 mode 2: change position to end of file. */
438 if (mode == 1) {
439 pos = self->pos;
440 }
441 else if (mode == 2) {
442 pos = self->string_size;
443 }
444
445 self->pos = pos;
446
447 return PyLong_FromSsize_t(self->pos);
448}
449
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000450PyDoc_STRVAR(stringio_write_doc,
451 "Write string to file.\n"
452 "\n"
453 "Returns the number of characters written, which is always equal to\n"
454 "the length of the string.\n");
455
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000456static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000457stringio_write(stringio *self, PyObject *obj)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000458{
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000459 Py_ssize_t size;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000460
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000461 CHECK_INITIALIZED(self);
462 if (!PyUnicode_Check(obj)) {
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000463 PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'",
464 Py_TYPE(obj)->tp_name);
465 return NULL;
466 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200467 if (PyUnicode_READY(obj))
468 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000469 CHECK_CLOSED(self);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200470 size = PyUnicode_GET_LENGTH(obj);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000471
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000472 if (size > 0 && write_str(self, obj) < 0)
473 return NULL;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000474
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000475 return PyLong_FromSsize_t(size);
476}
477
478PyDoc_STRVAR(stringio_close_doc,
479 "Close the IO object. Attempting any further operation after the\n"
480 "object is closed will raise a ValueError.\n"
481 "\n"
482 "This method has no effect if the file is already closed.\n");
483
484static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000485stringio_close(stringio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000486{
487 self->closed = 1;
488 /* Free up some memory */
489 if (resize_buffer(self, 0) < 0)
490 return NULL;
491 Py_CLEAR(self->readnl);
492 Py_CLEAR(self->writenl);
493 Py_CLEAR(self->decoder);
494 Py_RETURN_NONE;
495}
496
497static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000498stringio_traverse(stringio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000499{
500 Py_VISIT(self->dict);
501 return 0;
502}
503
504static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000505stringio_clear(stringio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000506{
507 Py_CLEAR(self->dict);
508 return 0;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000509}
510
511static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000512stringio_dealloc(stringio *self)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000513{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000514 _PyObject_GC_UNTRACK(self);
Alexandre Vassalottifc477042009-07-22 02:24:49 +0000515 self->ok = 0;
516 if (self->buf) {
517 PyMem_Free(self->buf);
518 self->buf = NULL;
519 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000520 Py_CLEAR(self->readnl);
521 Py_CLEAR(self->writenl);
522 Py_CLEAR(self->decoder);
Alexandre Vassalottifc477042009-07-22 02:24:49 +0000523 Py_CLEAR(self->dict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000524 if (self->weakreflist != NULL)
525 PyObject_ClearWeakRefs((PyObject *) self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000526 Py_TYPE(self)->tp_free(self);
527}
528
529static PyObject *
530stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
531{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000532 stringio *self;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000533
534 assert(type != NULL && type->tp_alloc != NULL);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000535 self = (stringio *)type->tp_alloc(type, 0);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000536 if (self == NULL)
537 return NULL;
538
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000539 /* tp_alloc initializes all the fields to zero. So we don't have to
540 initialize them here. */
541
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200542 self->buf = (Py_UCS4 *)PyMem_Malloc(0);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000543 if (self->buf == NULL) {
544 Py_DECREF(self);
545 return PyErr_NoMemory();
546 }
547
548 return (PyObject *)self;
549}
550
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000551static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000552stringio_init(stringio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000553{
554 char *kwlist[] = {"initial_value", "newline", NULL};
555 PyObject *value = NULL;
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +0000556 PyObject *newline_obj = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000557 char *newline = "\n";
558
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +0000559 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OO:__init__", kwlist,
560 &value, &newline_obj))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000561 return -1;
562
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +0000563 /* Parse the newline argument. This used to be done with the 'z'
564 specifier, however this allowed any object with the buffer interface to
565 be converted. Thus we have to parse it manually since we only want to
566 allow unicode objects or None. */
567 if (newline_obj == Py_None) {
568 newline = NULL;
569 }
570 else if (newline_obj) {
571 if (!PyUnicode_Check(newline_obj)) {
572 PyErr_Format(PyExc_TypeError,
573 "newline must be str or None, not %.200s",
574 Py_TYPE(newline_obj)->tp_name);
575 return -1;
576 }
577 newline = _PyUnicode_AsString(newline_obj);
578 if (newline == NULL)
579 return -1;
580 }
581
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000582 if (newline && newline[0] != '\0'
583 && !(newline[0] == '\n' && newline[1] == '\0')
584 && !(newline[0] == '\r' && newline[1] == '\0')
585 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
586 PyErr_Format(PyExc_ValueError,
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +0000587 "illegal newline value: %R", newline_obj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000588 return -1;
589 }
590 if (value && value != Py_None && !PyUnicode_Check(value)) {
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +0000591 PyErr_Format(PyExc_TypeError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000592 "initial_value must be str or None, not %.200s",
593 Py_TYPE(value)->tp_name);
594 return -1;
595 }
596
597 self->ok = 0;
598
599 Py_CLEAR(self->readnl);
600 Py_CLEAR(self->writenl);
601 Py_CLEAR(self->decoder);
602
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +0000603 assert((newline != NULL && newline_obj != Py_None) ||
604 (newline == NULL && newline_obj == Py_None));
605
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000606 if (newline) {
607 self->readnl = PyUnicode_FromString(newline);
608 if (self->readnl == NULL)
609 return -1;
610 }
611 self->readuniversal = (newline == NULL || newline[0] == '\0');
612 self->readtranslate = (newline == NULL);
613 /* If newline == "", we don't translate anything.
614 If newline == "\n" or newline == None, we translate to "\n", which is
615 a no-op.
616 (for newline == None, TextIOWrapper translates to os.sepline, but it
617 is pointless for StringIO)
618 */
619 if (newline != NULL && newline[0] == '\r') {
620 self->writenl = self->readnl;
621 Py_INCREF(self->writenl);
622 }
623
624 if (self->readuniversal) {
625 self->decoder = PyObject_CallFunction(
626 (PyObject *)&PyIncrementalNewlineDecoder_Type,
627 "Oi", Py_None, (int) self->readtranslate);
628 if (self->decoder == NULL)
629 return -1;
630 }
631
632 /* Now everything is set up, resize buffer to size of initial value,
633 and copy it */
634 self->string_size = 0;
635 if (value && value != Py_None) {
636 Py_ssize_t len = PyUnicode_GetSize(value);
637 /* This is a heuristic, for newline translation might change
638 the string length. */
639 if (resize_buffer(self, len) < 0)
640 return -1;
641 self->pos = 0;
642 if (write_str(self, value) < 0)
643 return -1;
644 }
645 else {
646 if (resize_buffer(self, 0) < 0)
647 return -1;
648 }
649 self->pos = 0;
650
651 self->closed = 0;
652 self->ok = 1;
653 return 0;
654}
655
656/* Properties and pseudo-properties */
657static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000658stringio_seekable(stringio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000659{
660 CHECK_INITIALIZED(self);
661 Py_RETURN_TRUE;
662}
663
664static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000665stringio_readable(stringio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000666{
667 CHECK_INITIALIZED(self);
668 Py_RETURN_TRUE;
669}
670
671static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000672stringio_writable(stringio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000673{
674 CHECK_INITIALIZED(self);
675 Py_RETURN_TRUE;
676}
677
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000678/* Pickling support.
679
680 The implementation of __getstate__ is similar to the one for BytesIO,
681 except that we also save the newline parameter. For __setstate__ and unlike
682 BytesIO, we call __init__ to restore the object's state. Doing so allows us
683 to avoid decoding the complex newline state while keeping the object
684 representation compact.
685
686 See comment in bytesio.c regarding why only pickle protocols and onward are
687 supported.
688*/
689
690static PyObject *
691stringio_getstate(stringio *self)
692{
693 PyObject *initvalue = stringio_getvalue(self);
694 PyObject *dict;
695 PyObject *state;
696
697 if (initvalue == NULL)
698 return NULL;
699 if (self->dict == NULL) {
700 Py_INCREF(Py_None);
701 dict = Py_None;
702 }
703 else {
704 dict = PyDict_Copy(self->dict);
705 if (dict == NULL)
706 return NULL;
707 }
708
709 state = Py_BuildValue("(OOnN)", initvalue,
710 self->readnl ? self->readnl : Py_None,
711 self->pos, dict);
712 Py_DECREF(initvalue);
713 return state;
714}
715
716static PyObject *
717stringio_setstate(stringio *self, PyObject *state)
718{
719 PyObject *initarg;
720 PyObject *position_obj;
721 PyObject *dict;
722 Py_ssize_t pos;
723
724 assert(state != NULL);
725 CHECK_CLOSED(self);
726
727 /* We allow the state tuple to be longer than 4, because we may need
728 someday to extend the object's state without breaking
729 backward-compatibility. */
730 if (!PyTuple_Check(state) || Py_SIZE(state) < 4) {
731 PyErr_Format(PyExc_TypeError,
732 "%.200s.__setstate__ argument should be 4-tuple, got %.200s",
733 Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
734 return NULL;
735 }
736
737 /* Initialize the object's state. */
738 initarg = PyTuple_GetSlice(state, 0, 2);
739 if (initarg == NULL)
740 return NULL;
741 if (stringio_init(self, initarg, NULL) < 0) {
742 Py_DECREF(initarg);
743 return NULL;
744 }
745 Py_DECREF(initarg);
746
747 /* Restore the buffer state. Even if __init__ did initialize the buffer,
748 we have to initialize it again since __init__ may translates the
749 newlines in the inital_value string. We clearly do not want that
750 because the string value in the state tuple has already been translated
751 once by __init__. So we do not take any chance and replace object's
752 buffer completely. */
753 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200754 PyObject *item;
755 Py_UCS4 *buf;
756 Py_ssize_t bufsize;
757
758 item = PyTuple_GET_ITEM(state, 0);
759 buf = PyUnicode_AsUCS4Copy(item);
760 if (buf == NULL)
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000761 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200762 bufsize = PyUnicode_GET_LENGTH(item);
763
764 if (resize_buffer(self, bufsize) < 0) {
765 PyMem_Free(buf);
766 return NULL;
767 }
768 memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4));
769 PyMem_Free(buf);
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000770 self->string_size = bufsize;
771 }
772
773 /* Set carefully the position value. Alternatively, we could use the seek
774 method instead of modifying self->pos directly to better protect the
775 object internal state against errneous (or malicious) inputs. */
776 position_obj = PyTuple_GET_ITEM(state, 2);
777 if (!PyLong_Check(position_obj)) {
778 PyErr_Format(PyExc_TypeError,
779 "third item of state must be an integer, got %.200s",
780 Py_TYPE(position_obj)->tp_name);
781 return NULL;
782 }
783 pos = PyLong_AsSsize_t(position_obj);
784 if (pos == -1 && PyErr_Occurred())
785 return NULL;
786 if (pos < 0) {
787 PyErr_SetString(PyExc_ValueError,
788 "position value cannot be negative");
789 return NULL;
790 }
791 self->pos = pos;
792
793 /* Set the dictionary of the instance variables. */
794 dict = PyTuple_GET_ITEM(state, 3);
795 if (dict != Py_None) {
796 if (!PyDict_Check(dict)) {
797 PyErr_Format(PyExc_TypeError,
798 "fourth item of state should be a dict, got a %.200s",
799 Py_TYPE(dict)->tp_name);
800 return NULL;
801 }
802 if (self->dict) {
803 /* Alternatively, we could replace the internal dictionary
804 completely. However, it seems more practical to just update it. */
805 if (PyDict_Update(self->dict, dict) < 0)
806 return NULL;
807 }
808 else {
809 Py_INCREF(dict);
810 self->dict = dict;
811 }
812 }
813
814 Py_RETURN_NONE;
815}
816
817
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000818static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000819stringio_closed(stringio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000820{
821 CHECK_INITIALIZED(self);
822 return PyBool_FromLong(self->closed);
823}
824
825static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000826stringio_line_buffering(stringio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000827{
828 CHECK_INITIALIZED(self);
829 CHECK_CLOSED(self);
830 Py_RETURN_FALSE;
831}
832
833static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000834stringio_newlines(stringio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000835{
836 CHECK_INITIALIZED(self);
837 CHECK_CLOSED(self);
838 if (self->decoder == NULL)
839 Py_RETURN_NONE;
840 return PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
841}
842
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000843static struct PyMethodDef stringio_methods[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000844 {"close", (PyCFunction)stringio_close, METH_NOARGS, stringio_close_doc},
Antoine Pitroud5c3f6c2010-09-02 19:48:07 +0000845 {"getvalue", (PyCFunction)stringio_getvalue, METH_NOARGS, stringio_getvalue_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000846 {"read", (PyCFunction)stringio_read, METH_VARARGS, stringio_read_doc},
847 {"readline", (PyCFunction)stringio_readline, METH_VARARGS, stringio_readline_doc},
848 {"tell", (PyCFunction)stringio_tell, METH_NOARGS, stringio_tell_doc},
849 {"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, stringio_truncate_doc},
850 {"seek", (PyCFunction)stringio_seek, METH_VARARGS, stringio_seek_doc},
851 {"write", (PyCFunction)stringio_write, METH_O, stringio_write_doc},
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000852
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000853 {"seekable", (PyCFunction)stringio_seekable, METH_NOARGS},
854 {"readable", (PyCFunction)stringio_readable, METH_NOARGS},
855 {"writable", (PyCFunction)stringio_writable, METH_NOARGS},
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000856
857 {"__getstate__", (PyCFunction)stringio_getstate, METH_NOARGS},
858 {"__setstate__", (PyCFunction)stringio_setstate, METH_O},
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000859 {NULL, NULL} /* sentinel */
860};
861
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000862static PyGetSetDef stringio_getset[] = {
863 {"closed", (getter)stringio_closed, NULL, NULL},
864 {"newlines", (getter)stringio_newlines, NULL, NULL},
865 /* (following comments straight off of the original Python wrapper:)
866 XXX Cruft to support the TextIOWrapper API. This would only
867 be meaningful if StringIO supported the buffer attribute.
868 Hopefully, a better solution, than adding these pseudo-attributes,
869 will be found.
870 */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000871 {"line_buffering", (getter)stringio_line_buffering, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000872 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000873};
874
875PyTypeObject PyStringIO_Type = {
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000876 PyVarObject_HEAD_INIT(NULL, 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000877 "_io.StringIO", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000878 sizeof(stringio), /*tp_basicsize*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000879 0, /*tp_itemsize*/
880 (destructor)stringio_dealloc, /*tp_dealloc*/
881 0, /*tp_print*/
882 0, /*tp_getattr*/
883 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +0000884 0, /*tp_reserved*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000885 0, /*tp_repr*/
886 0, /*tp_as_number*/
887 0, /*tp_as_sequence*/
888 0, /*tp_as_mapping*/
889 0, /*tp_hash*/
890 0, /*tp_call*/
891 0, /*tp_str*/
892 0, /*tp_getattro*/
893 0, /*tp_setattro*/
894 0, /*tp_as_buffer*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000895 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
896 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
897 stringio_doc, /*tp_doc*/
898 (traverseproc)stringio_traverse, /*tp_traverse*/
899 (inquiry)stringio_clear, /*tp_clear*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000900 0, /*tp_richcompare*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000901 offsetof(stringio, weakreflist), /*tp_weaklistoffset*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000902 0, /*tp_iter*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000903 (iternextfunc)stringio_iternext, /*tp_iternext*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000904 stringio_methods, /*tp_methods*/
905 0, /*tp_members*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000906 stringio_getset, /*tp_getset*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000907 0, /*tp_base*/
908 0, /*tp_dict*/
909 0, /*tp_descr_get*/
910 0, /*tp_descr_set*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000911 offsetof(stringio, dict), /*tp_dictoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000912 (initproc)stringio_init, /*tp_init*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000913 0, /*tp_alloc*/
914 stringio_new, /*tp_new*/
915};