blob: c9d14b114b181786f582a1840a370ca4b5e62279 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001#define PY_SSIZE_T_CLEAN
Alexandre Vassalotti794652d2008-06-11 22:58:36 +00002#include "Python.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003#include "structmember.h"
4#include "_iomodule.h"
Alexandre Vassalotti794652d2008-06-11 22:58:36 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006/* Implementation note: the buffer is always at least one character longer
7 than the enclosed string, for proper functioning of _PyIO_find_line_ending.
8*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +00009
10typedef struct {
11 PyObject_HEAD
12 Py_UNICODE *buf;
13 Py_ssize_t pos;
14 Py_ssize_t string_size;
15 size_t buf_size;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000016
17 char ok; /* initialized? */
18 char closed;
19 char readuniversal;
20 char readtranslate;
21 PyObject *decoder;
22 PyObject *readnl;
23 PyObject *writenl;
24
25 PyObject *dict;
26 PyObject *weakreflist;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000027} stringio;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +000028
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000029#define CHECK_INITIALIZED(self) \
30 if (self->ok <= 0) { \
31 PyErr_SetString(PyExc_ValueError, \
32 "I/O operation on uninitialized object"); \
33 return NULL; \
34 }
35
36#define CHECK_CLOSED(self) \
37 if (self->closed) { \
38 PyErr_SetString(PyExc_ValueError, \
39 "I/O operation on closed file"); \
40 return NULL; \
41 }
42
43PyDoc_STRVAR(stringio_doc,
44 "Text I/O implementation using an in-memory buffer.\n"
45 "\n"
46 "The initial_value argument sets the value of object. The newline\n"
47 "argument is like the one of TextIOWrapper's constructor.");
48
Alexandre Vassalotti794652d2008-06-11 22:58:36 +000049
50/* Internal routine for changing the size, in terms of characters, of the
51 buffer of StringIO objects. The caller should ensure that the 'size'
52 argument is non-negative. Returns 0 on success, -1 otherwise. */
53static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000054resize_buffer(stringio *self, size_t size)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +000055{
56 /* Here, unsigned types are used to avoid dealing with signed integer
57 overflow, which is undefined in C. */
58 size_t alloc = self->buf_size;
59 Py_UNICODE *new_buf = NULL;
60
61 assert(self->buf != NULL);
62
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000063 /* Reserve one more char for line ending detection. */
64 size = size + 1;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +000065 /* For simplicity, stay in the range of the signed type. Anyway, Python
66 doesn't allow strings to be longer than this. */
67 if (size > PY_SSIZE_T_MAX)
68 goto overflow;
69
70 if (size < alloc / 2) {
71 /* Major downsize; resize down to exact size. */
72 alloc = size + 1;
73 }
74 else if (size < alloc) {
75 /* Within allocated size; quick exit */
76 return 0;
77 }
78 else if (size <= alloc * 1.125) {
79 /* Moderate upsize; overallocate similar to list_resize() */
80 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
81 }
82 else {
83 /* Major upsize; resize up to exact size */
84 alloc = size + 1;
85 }
86
87 if (alloc > ((size_t)-1) / sizeof(Py_UNICODE))
88 goto overflow;
89 new_buf = (Py_UNICODE *)PyMem_Realloc(self->buf,
90 alloc * sizeof(Py_UNICODE));
91 if (new_buf == NULL) {
92 PyErr_NoMemory();
93 return -1;
94 }
95 self->buf_size = alloc;
96 self->buf = new_buf;
97
98 return 0;
99
100 overflow:
101 PyErr_SetString(PyExc_OverflowError,
102 "new buffer size too large");
103 return -1;
104}
105
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000106/* Internal routine for writing a whole PyUnicode object to the buffer of a
107 StringIO object. Returns 0 on success, or -1 on error. */
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000108static Py_ssize_t
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000109write_str(stringio *self, PyObject *obj)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000110{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000111 Py_UNICODE *str;
112 Py_ssize_t len;
113 PyObject *decoded = NULL;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000114 assert(self->buf != NULL);
115 assert(self->pos >= 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000116
117 if (self->decoder != NULL) {
118 decoded = _PyIncrementalNewlineDecoder_decode(
119 self->decoder, obj, 1 /* always final */);
120 }
121 else {
122 decoded = obj;
123 Py_INCREF(decoded);
124 }
125 if (self->writenl) {
126 PyObject *translated = PyUnicode_Replace(
127 decoded, _PyIO_str_nl, self->writenl, -1);
128 Py_DECREF(decoded);
129 decoded = translated;
130 }
131 if (decoded == NULL)
132 return -1;
133
134 assert(PyUnicode_Check(decoded));
135 str = PyUnicode_AS_UNICODE(decoded);
136 len = PyUnicode_GET_SIZE(decoded);
137
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000138 assert(len >= 0);
139
140 /* This overflow check is not strictly necessary. However, it avoids us to
141 deal with funky things like comparing an unsigned and a signed
142 integer. */
143 if (self->pos > PY_SSIZE_T_MAX - len) {
144 PyErr_SetString(PyExc_OverflowError,
145 "new position too large");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000146 goto fail;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000147 }
148 if (self->pos + len > self->string_size) {
149 if (resize_buffer(self, self->pos + len) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000150 goto fail;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000151 }
152
153 if (self->pos > self->string_size) {
154 /* In case of overseek, pad with null bytes the buffer region between
155 the end of stream and the current position.
156
157 0 lo string_size hi
158 | |<---used--->|<----------available----------->|
159 | | <--to pad-->|<---to write---> |
Ezio Melotti13925002011-03-16 11:05:33 +0200160 0 buf position
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000161
162 */
163 memset(self->buf + self->string_size, '\0',
164 (self->pos - self->string_size) * sizeof(Py_UNICODE));
165 }
166
167 /* Copy the data to the internal buffer, overwriting some of the
168 existing data if self->pos < self->string_size. */
169 memcpy(self->buf + self->pos, str, len * sizeof(Py_UNICODE));
170 self->pos += len;
171
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000172 /* Set the new length of the internal string if it has changed. */
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000173 if (self->string_size < self->pos) {
174 self->string_size = self->pos;
175 }
176
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000177 Py_DECREF(decoded);
178 return 0;
179
180fail:
181 Py_XDECREF(decoded);
182 return -1;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000183}
184
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000185PyDoc_STRVAR(stringio_getvalue_doc,
186 "Retrieve the entire contents of the object.");
187
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000188static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000189stringio_getvalue(stringio *self)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000190{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000191 CHECK_INITIALIZED(self);
192 CHECK_CLOSED(self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000193 return PyUnicode_FromUnicode(self->buf, self->string_size);
194}
195
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000196PyDoc_STRVAR(stringio_tell_doc,
197 "Tell the current file position.");
198
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000199static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000200stringio_tell(stringio *self)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000201{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000202 CHECK_INITIALIZED(self);
203 CHECK_CLOSED(self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000204 return PyLong_FromSsize_t(self->pos);
205}
206
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000207PyDoc_STRVAR(stringio_read_doc,
208 "Read at most n characters, returned as a string.\n"
209 "\n"
210 "If the argument is negative or omitted, read until EOF\n"
211 "is reached. Return an empty string at EOF.\n");
212
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000213static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000214stringio_read(stringio *self, PyObject *args)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000215{
216 Py_ssize_t size, n;
217 Py_UNICODE *output;
218 PyObject *arg = Py_None;
219
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000220 CHECK_INITIALIZED(self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000221 if (!PyArg_ParseTuple(args, "|O:read", &arg))
222 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000223 CHECK_CLOSED(self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000224
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000225 if (PyNumber_Check(arg)) {
226 size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
Amaury Forgeot d'Arc58fb9052008-09-30 20:22:44 +0000227 if (size == -1 && PyErr_Occurred())
228 return NULL;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000229 }
230 else if (arg == Py_None) {
231 /* Read until EOF is reached, by default. */
232 size = -1;
233 }
234 else {
235 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
236 Py_TYPE(arg)->tp_name);
237 return NULL;
238 }
239
240 /* adjust invalid sizes */
241 n = self->string_size - self->pos;
242 if (size < 0 || size > n) {
243 size = n;
244 if (size < 0)
245 size = 0;
246 }
247
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000248 output = self->buf + self->pos;
249 self->pos += size;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000250 return PyUnicode_FromUnicode(output, size);
251}
252
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000253/* Internal helper, used by stringio_readline and stringio_iternext */
254static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000255_stringio_readline(stringio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000256{
257 Py_UNICODE *start, *end, old_char;
258 Py_ssize_t len, consumed;
259
260 /* In case of overseek, return the empty string */
261 if (self->pos >= self->string_size)
262 return PyUnicode_FromString("");
263
264 start = self->buf + self->pos;
265 if (limit < 0 || limit > self->string_size - self->pos)
266 limit = self->string_size - self->pos;
267
268 end = start + limit;
269 old_char = *end;
270 *end = '\0';
271 len = _PyIO_find_line_ending(
272 self->readtranslate, self->readuniversal, self->readnl,
273 start, end, &consumed);
274 *end = old_char;
275 /* If we haven't found any line ending, we just return everything
276 (`consumed` is ignored). */
277 if (len < 0)
278 len = limit;
279 self->pos += len;
280 return PyUnicode_FromUnicode(start, len);
281}
282
283PyDoc_STRVAR(stringio_readline_doc,
284 "Read until newline or EOF.\n"
285 "\n"
286 "Returns an empty string if EOF is hit immediately.\n");
287
288static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000289stringio_readline(stringio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000290{
291 PyObject *arg = Py_None;
292 Py_ssize_t limit = -1;
293
294 CHECK_INITIALIZED(self);
295 if (!PyArg_ParseTuple(args, "|O:readline", &arg))
296 return NULL;
297 CHECK_CLOSED(self);
298
299 if (PyNumber_Check(arg)) {
300 limit = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
301 if (limit == -1 && PyErr_Occurred())
302 return NULL;
303 }
304 else if (arg != Py_None) {
305 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
306 Py_TYPE(arg)->tp_name);
307 return NULL;
308 }
309 return _stringio_readline(self, limit);
310}
311
312static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000313stringio_iternext(stringio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000314{
315 PyObject *line;
316
317 CHECK_INITIALIZED(self);
318 CHECK_CLOSED(self);
319
320 if (Py_TYPE(self) == &PyStringIO_Type) {
321 /* Skip method call overhead for speed */
322 line = _stringio_readline(self, -1);
323 }
324 else {
325 /* XXX is subclassing StringIO really supported? */
326 line = PyObject_CallMethodObjArgs((PyObject *)self,
327 _PyIO_str_readline, NULL);
328 if (line && !PyUnicode_Check(line)) {
329 PyErr_Format(PyExc_IOError,
330 "readline() should have returned an str object, "
331 "not '%.200s'", Py_TYPE(line)->tp_name);
332 Py_DECREF(line);
333 return NULL;
334 }
335 }
336
337 if (line == NULL)
338 return NULL;
339
340 if (PyUnicode_GET_SIZE(line) == 0) {
341 /* Reached EOF */
342 Py_DECREF(line);
343 return NULL;
344 }
345
346 return line;
347}
348
349PyDoc_STRVAR(stringio_truncate_doc,
350 "Truncate size to pos.\n"
351 "\n"
352 "The pos argument defaults to the current file position, as\n"
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000353 "returned by tell(). The current file position is unchanged.\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000354 "Returns the new absolute position.\n");
355
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000356static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000357stringio_truncate(stringio *self, PyObject *args)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000358{
359 Py_ssize_t size;
360 PyObject *arg = Py_None;
361
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000362 CHECK_INITIALIZED(self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000363 if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
364 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000365 CHECK_CLOSED(self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000366
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000367 if (PyNumber_Check(arg)) {
368 size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
Benjamin Petersonc9e435e2008-09-30 02:22:04 +0000369 if (size == -1 && PyErr_Occurred())
370 return NULL;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000371 }
372 else if (arg == Py_None) {
373 /* Truncate to current position if no argument is passed. */
374 size = self->pos;
375 }
376 else {
377 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
378 Py_TYPE(arg)->tp_name);
379 return NULL;
380 }
381
382 if (size < 0) {
383 PyErr_Format(PyExc_ValueError,
384 "Negative size value %zd", size);
385 return NULL;
386 }
387
388 if (size < self->string_size) {
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000389 if (resize_buffer(self, size) < 0)
390 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000391 self->string_size = size;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000392 }
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000393
394 return PyLong_FromSsize_t(size);
395}
396
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000397PyDoc_STRVAR(stringio_seek_doc,
398 "Change stream position.\n"
399 "\n"
400 "Seek to character offset pos relative to position indicated by whence:\n"
401 " 0 Start of stream (the default). pos should be >= 0;\n"
402 " 1 Current position - pos must be 0;\n"
403 " 2 End of stream - pos must be 0.\n"
404 "Returns the new absolute position.\n");
405
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000406static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000407stringio_seek(stringio *self, PyObject *args)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000408{
409 Py_ssize_t pos;
410 int mode = 0;
411
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000412 CHECK_INITIALIZED(self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000413 if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode))
414 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000415 CHECK_CLOSED(self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000416
417 if (mode != 0 && mode != 1 && mode != 2) {
418 PyErr_Format(PyExc_ValueError,
419 "Invalid whence (%i, should be 0, 1 or 2)", mode);
420 return NULL;
421 }
422 else if (pos < 0 && mode == 0) {
423 PyErr_Format(PyExc_ValueError,
424 "Negative seek position %zd", pos);
425 return NULL;
426 }
427 else if (mode != 0 && pos != 0) {
428 PyErr_SetString(PyExc_IOError,
429 "Can't do nonzero cur-relative seeks");
430 return NULL;
431 }
432
433 /* mode 0: offset relative to beginning of the string.
434 mode 1: no change to current position.
435 mode 2: change position to end of file. */
436 if (mode == 1) {
437 pos = self->pos;
438 }
439 else if (mode == 2) {
440 pos = self->string_size;
441 }
442
443 self->pos = pos;
444
445 return PyLong_FromSsize_t(self->pos);
446}
447
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000448PyDoc_STRVAR(stringio_write_doc,
449 "Write string to file.\n"
450 "\n"
451 "Returns the number of characters written, which is always equal to\n"
452 "the length of the string.\n");
453
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000454static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000455stringio_write(stringio *self, PyObject *obj)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000456{
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000457 Py_ssize_t size;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000458
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000459 CHECK_INITIALIZED(self);
460 if (!PyUnicode_Check(obj)) {
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000461 PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'",
462 Py_TYPE(obj)->tp_name);
463 return NULL;
464 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465 CHECK_CLOSED(self);
466 size = PyUnicode_GET_SIZE(obj);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000467
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000468 if (size > 0 && write_str(self, obj) < 0)
469 return NULL;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000470
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000471 return PyLong_FromSsize_t(size);
472}
473
474PyDoc_STRVAR(stringio_close_doc,
475 "Close the IO object. Attempting any further operation after the\n"
476 "object is closed will raise a ValueError.\n"
477 "\n"
478 "This method has no effect if the file is already closed.\n");
479
480static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000481stringio_close(stringio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000482{
483 self->closed = 1;
484 /* Free up some memory */
485 if (resize_buffer(self, 0) < 0)
486 return NULL;
487 Py_CLEAR(self->readnl);
488 Py_CLEAR(self->writenl);
489 Py_CLEAR(self->decoder);
490 Py_RETURN_NONE;
491}
492
493static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000494stringio_traverse(stringio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000495{
496 Py_VISIT(self->dict);
497 return 0;
498}
499
500static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000501stringio_clear(stringio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000502{
503 Py_CLEAR(self->dict);
504 return 0;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000505}
506
507static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000508stringio_dealloc(stringio *self)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000509{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000510 _PyObject_GC_UNTRACK(self);
Alexandre Vassalottifc477042009-07-22 02:24:49 +0000511 self->ok = 0;
512 if (self->buf) {
513 PyMem_Free(self->buf);
514 self->buf = NULL;
515 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000516 Py_CLEAR(self->readnl);
517 Py_CLEAR(self->writenl);
518 Py_CLEAR(self->decoder);
Alexandre Vassalottifc477042009-07-22 02:24:49 +0000519 Py_CLEAR(self->dict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000520 if (self->weakreflist != NULL)
521 PyObject_ClearWeakRefs((PyObject *) self);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000522 Py_TYPE(self)->tp_free(self);
523}
524
525static PyObject *
526stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
527{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000528 stringio *self;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000529
530 assert(type != NULL && type->tp_alloc != NULL);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000531 self = (stringio *)type->tp_alloc(type, 0);
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000532 if (self == NULL)
533 return NULL;
534
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000535 /* tp_alloc initializes all the fields to zero. So we don't have to
536 initialize them here. */
537
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000538 self->buf = (Py_UNICODE *)PyMem_Malloc(0);
539 if (self->buf == NULL) {
540 Py_DECREF(self);
541 return PyErr_NoMemory();
542 }
543
544 return (PyObject *)self;
545}
546
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000547static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000548stringio_init(stringio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000549{
550 char *kwlist[] = {"initial_value", "newline", NULL};
551 PyObject *value = NULL;
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +0000552 PyObject *newline_obj = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000553 char *newline = "\n";
554
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +0000555 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OO:__init__", kwlist,
556 &value, &newline_obj))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000557 return -1;
558
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +0000559 /* Parse the newline argument. This used to be done with the 'z'
560 specifier, however this allowed any object with the buffer interface to
561 be converted. Thus we have to parse it manually since we only want to
562 allow unicode objects or None. */
563 if (newline_obj == Py_None) {
564 newline = NULL;
565 }
566 else if (newline_obj) {
567 if (!PyUnicode_Check(newline_obj)) {
568 PyErr_Format(PyExc_TypeError,
569 "newline must be str or None, not %.200s",
570 Py_TYPE(newline_obj)->tp_name);
571 return -1;
572 }
573 newline = _PyUnicode_AsString(newline_obj);
574 if (newline == NULL)
575 return -1;
576 }
577
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000578 if (newline && newline[0] != '\0'
579 && !(newline[0] == '\n' && newline[1] == '\0')
580 && !(newline[0] == '\r' && newline[1] == '\0')
581 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
582 PyErr_Format(PyExc_ValueError,
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +0000583 "illegal newline value: %R", newline_obj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000584 return -1;
585 }
586 if (value && value != Py_None && !PyUnicode_Check(value)) {
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +0000587 PyErr_Format(PyExc_TypeError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000588 "initial_value must be str or None, not %.200s",
589 Py_TYPE(value)->tp_name);
590 return -1;
591 }
592
593 self->ok = 0;
594
595 Py_CLEAR(self->readnl);
596 Py_CLEAR(self->writenl);
597 Py_CLEAR(self->decoder);
598
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +0000599 assert((newline != NULL && newline_obj != Py_None) ||
600 (newline == NULL && newline_obj == Py_None));
601
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000602 if (newline) {
603 self->readnl = PyUnicode_FromString(newline);
604 if (self->readnl == NULL)
605 return -1;
606 }
607 self->readuniversal = (newline == NULL || newline[0] == '\0');
608 self->readtranslate = (newline == NULL);
609 /* If newline == "", we don't translate anything.
610 If newline == "\n" or newline == None, we translate to "\n", which is
611 a no-op.
612 (for newline == None, TextIOWrapper translates to os.sepline, but it
613 is pointless for StringIO)
614 */
615 if (newline != NULL && newline[0] == '\r') {
616 self->writenl = self->readnl;
617 Py_INCREF(self->writenl);
618 }
619
620 if (self->readuniversal) {
621 self->decoder = PyObject_CallFunction(
622 (PyObject *)&PyIncrementalNewlineDecoder_Type,
623 "Oi", Py_None, (int) self->readtranslate);
624 if (self->decoder == NULL)
625 return -1;
626 }
627
628 /* Now everything is set up, resize buffer to size of initial value,
629 and copy it */
630 self->string_size = 0;
631 if (value && value != Py_None) {
632 Py_ssize_t len = PyUnicode_GetSize(value);
633 /* This is a heuristic, for newline translation might change
634 the string length. */
635 if (resize_buffer(self, len) < 0)
636 return -1;
637 self->pos = 0;
638 if (write_str(self, value) < 0)
639 return -1;
640 }
641 else {
642 if (resize_buffer(self, 0) < 0)
643 return -1;
644 }
645 self->pos = 0;
646
647 self->closed = 0;
648 self->ok = 1;
649 return 0;
650}
651
652/* Properties and pseudo-properties */
653static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000654stringio_seekable(stringio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000655{
656 CHECK_INITIALIZED(self);
657 Py_RETURN_TRUE;
658}
659
660static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000661stringio_readable(stringio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000662{
663 CHECK_INITIALIZED(self);
664 Py_RETURN_TRUE;
665}
666
667static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000668stringio_writable(stringio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000669{
670 CHECK_INITIALIZED(self);
671 Py_RETURN_TRUE;
672}
673
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000674/* Pickling support.
675
676 The implementation of __getstate__ is similar to the one for BytesIO,
677 except that we also save the newline parameter. For __setstate__ and unlike
678 BytesIO, we call __init__ to restore the object's state. Doing so allows us
679 to avoid decoding the complex newline state while keeping the object
680 representation compact.
681
682 See comment in bytesio.c regarding why only pickle protocols and onward are
683 supported.
684*/
685
686static PyObject *
687stringio_getstate(stringio *self)
688{
689 PyObject *initvalue = stringio_getvalue(self);
690 PyObject *dict;
691 PyObject *state;
692
693 if (initvalue == NULL)
694 return NULL;
695 if (self->dict == NULL) {
696 Py_INCREF(Py_None);
697 dict = Py_None;
698 }
699 else {
700 dict = PyDict_Copy(self->dict);
701 if (dict == NULL)
702 return NULL;
703 }
704
705 state = Py_BuildValue("(OOnN)", initvalue,
706 self->readnl ? self->readnl : Py_None,
707 self->pos, dict);
708 Py_DECREF(initvalue);
709 return state;
710}
711
712static PyObject *
713stringio_setstate(stringio *self, PyObject *state)
714{
715 PyObject *initarg;
716 PyObject *position_obj;
717 PyObject *dict;
718 Py_ssize_t pos;
719
720 assert(state != NULL);
721 CHECK_CLOSED(self);
722
723 /* We allow the state tuple to be longer than 4, because we may need
724 someday to extend the object's state without breaking
725 backward-compatibility. */
726 if (!PyTuple_Check(state) || Py_SIZE(state) < 4) {
727 PyErr_Format(PyExc_TypeError,
728 "%.200s.__setstate__ argument should be 4-tuple, got %.200s",
729 Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
730 return NULL;
731 }
732
733 /* Initialize the object's state. */
734 initarg = PyTuple_GetSlice(state, 0, 2);
735 if (initarg == NULL)
736 return NULL;
737 if (stringio_init(self, initarg, NULL) < 0) {
738 Py_DECREF(initarg);
739 return NULL;
740 }
741 Py_DECREF(initarg);
742
743 /* Restore the buffer state. Even if __init__ did initialize the buffer,
744 we have to initialize it again since __init__ may translates the
745 newlines in the inital_value string. We clearly do not want that
746 because the string value in the state tuple has already been translated
747 once by __init__. So we do not take any chance and replace object's
748 buffer completely. */
749 {
750 Py_UNICODE *buf = PyUnicode_AS_UNICODE(PyTuple_GET_ITEM(state, 0));
751 Py_ssize_t bufsize = PyUnicode_GET_SIZE(PyTuple_GET_ITEM(state, 0));
752 if (resize_buffer(self, bufsize) < 0)
753 return NULL;
754 memcpy(self->buf, buf, bufsize * sizeof(Py_UNICODE));
755 self->string_size = bufsize;
756 }
757
758 /* Set carefully the position value. Alternatively, we could use the seek
759 method instead of modifying self->pos directly to better protect the
760 object internal state against errneous (or malicious) inputs. */
761 position_obj = PyTuple_GET_ITEM(state, 2);
762 if (!PyLong_Check(position_obj)) {
763 PyErr_Format(PyExc_TypeError,
764 "third item of state must be an integer, got %.200s",
765 Py_TYPE(position_obj)->tp_name);
766 return NULL;
767 }
768 pos = PyLong_AsSsize_t(position_obj);
769 if (pos == -1 && PyErr_Occurred())
770 return NULL;
771 if (pos < 0) {
772 PyErr_SetString(PyExc_ValueError,
773 "position value cannot be negative");
774 return NULL;
775 }
776 self->pos = pos;
777
778 /* Set the dictionary of the instance variables. */
779 dict = PyTuple_GET_ITEM(state, 3);
780 if (dict != Py_None) {
781 if (!PyDict_Check(dict)) {
782 PyErr_Format(PyExc_TypeError,
783 "fourth item of state should be a dict, got a %.200s",
784 Py_TYPE(dict)->tp_name);
785 return NULL;
786 }
787 if (self->dict) {
788 /* Alternatively, we could replace the internal dictionary
789 completely. However, it seems more practical to just update it. */
790 if (PyDict_Update(self->dict, dict) < 0)
791 return NULL;
792 }
793 else {
794 Py_INCREF(dict);
795 self->dict = dict;
796 }
797 }
798
799 Py_RETURN_NONE;
800}
801
802
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000803static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000804stringio_closed(stringio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000805{
806 CHECK_INITIALIZED(self);
807 return PyBool_FromLong(self->closed);
808}
809
810static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000811stringio_line_buffering(stringio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000812{
813 CHECK_INITIALIZED(self);
814 CHECK_CLOSED(self);
815 Py_RETURN_FALSE;
816}
817
818static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000819stringio_newlines(stringio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000820{
821 CHECK_INITIALIZED(self);
822 CHECK_CLOSED(self);
823 if (self->decoder == NULL)
824 Py_RETURN_NONE;
825 return PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
826}
827
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000828static struct PyMethodDef stringio_methods[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000829 {"close", (PyCFunction)stringio_close, METH_NOARGS, stringio_close_doc},
Antoine Pitroud5c3f6c2010-09-02 19:48:07 +0000830 {"getvalue", (PyCFunction)stringio_getvalue, METH_NOARGS, stringio_getvalue_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000831 {"read", (PyCFunction)stringio_read, METH_VARARGS, stringio_read_doc},
832 {"readline", (PyCFunction)stringio_readline, METH_VARARGS, stringio_readline_doc},
833 {"tell", (PyCFunction)stringio_tell, METH_NOARGS, stringio_tell_doc},
834 {"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, stringio_truncate_doc},
835 {"seek", (PyCFunction)stringio_seek, METH_VARARGS, stringio_seek_doc},
836 {"write", (PyCFunction)stringio_write, METH_O, stringio_write_doc},
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000837
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000838 {"seekable", (PyCFunction)stringio_seekable, METH_NOARGS},
839 {"readable", (PyCFunction)stringio_readable, METH_NOARGS},
840 {"writable", (PyCFunction)stringio_writable, METH_NOARGS},
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000841
842 {"__getstate__", (PyCFunction)stringio_getstate, METH_NOARGS},
843 {"__setstate__", (PyCFunction)stringio_setstate, METH_O},
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000844 {NULL, NULL} /* sentinel */
845};
846
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000847static PyGetSetDef stringio_getset[] = {
848 {"closed", (getter)stringio_closed, NULL, NULL},
849 {"newlines", (getter)stringio_newlines, NULL, NULL},
850 /* (following comments straight off of the original Python wrapper:)
851 XXX Cruft to support the TextIOWrapper API. This would only
852 be meaningful if StringIO supported the buffer attribute.
853 Hopefully, a better solution, than adding these pseudo-attributes,
854 will be found.
855 */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000856 {"line_buffering", (getter)stringio_line_buffering, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000857 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000858};
859
860PyTypeObject PyStringIO_Type = {
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000861 PyVarObject_HEAD_INIT(NULL, 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000862 "_io.StringIO", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000863 sizeof(stringio), /*tp_basicsize*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000864 0, /*tp_itemsize*/
865 (destructor)stringio_dealloc, /*tp_dealloc*/
866 0, /*tp_print*/
867 0, /*tp_getattr*/
868 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +0000869 0, /*tp_reserved*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000870 0, /*tp_repr*/
871 0, /*tp_as_number*/
872 0, /*tp_as_sequence*/
873 0, /*tp_as_mapping*/
874 0, /*tp_hash*/
875 0, /*tp_call*/
876 0, /*tp_str*/
877 0, /*tp_getattro*/
878 0, /*tp_setattro*/
879 0, /*tp_as_buffer*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000880 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
881 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
882 stringio_doc, /*tp_doc*/
883 (traverseproc)stringio_traverse, /*tp_traverse*/
884 (inquiry)stringio_clear, /*tp_clear*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000885 0, /*tp_richcompare*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000886 offsetof(stringio, weakreflist), /*tp_weaklistoffset*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000887 0, /*tp_iter*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000888 (iternextfunc)stringio_iternext, /*tp_iternext*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000889 stringio_methods, /*tp_methods*/
890 0, /*tp_members*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000891 stringio_getset, /*tp_getset*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000892 0, /*tp_base*/
893 0, /*tp_dict*/
894 0, /*tp_descr_get*/
895 0, /*tp_descr_set*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000896 offsetof(stringio, dict), /*tp_dictoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000897 (initproc)stringio_init, /*tp_init*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000898 0, /*tp_alloc*/
899 stringio_new, /*tp_new*/
900};