blob: 00cb06bf031de07a102465a96b7ec621c01d93e3 [file] [log] [blame]
Alexandre Vassalotti77250f42008-05-06 19:48:38 +00001#include "Python.h"
2
3typedef struct {
4 PyObject_HEAD
5 char *buf;
6 Py_ssize_t pos;
7 Py_ssize_t string_size;
8 size_t buf_size;
9} BytesIOObject;
10
11#define CHECK_CLOSED(self) \
12 if ((self)->buf == NULL) { \
13 PyErr_SetString(PyExc_ValueError, \
14 "I/O operation on closed file."); \
15 return NULL; \
16 }
17
18/* Internal routine to get a line from the buffer of a BytesIO
19 object. Returns the length between the current position to the
20 next newline character. */
21static Py_ssize_t
22get_line(BytesIOObject *self, char **output)
23{
24 char *n;
25 const char *str_end;
26 Py_ssize_t len;
27
28 assert(self->buf != NULL);
29
30 /* Move to the end of the line, up to the end of the string, s. */
31 str_end = self->buf + self->string_size;
32 for (n = self->buf + self->pos;
33 n < str_end && *n != '\n';
34 n++);
35
36 /* Skip the newline character */
37 if (n < str_end)
38 n++;
39
40 /* Get the length from the current position to the end of the line. */
41 len = n - (self->buf + self->pos);
42 *output = self->buf + self->pos;
43
44 assert(len >= 0);
45 assert(self->pos < PY_SSIZE_T_MAX - len);
46 self->pos += len;
47
48 return len;
49}
50
51/* Internal routine for changing the size of the buffer of BytesIO objects.
52 The caller should ensure that the 'size' argument is non-negative. Returns
53 0 on success, -1 otherwise. */
54static int
55resize_buffer(BytesIOObject *self, size_t size)
56{
57 /* Here, unsigned types are used to avoid dealing with signed integer
58 overflow, which is undefined in C. */
59 size_t alloc = self->buf_size;
60 char *new_buf = NULL;
61
62 assert(self->buf != NULL);
63
64 /* For simplicity, stay in the range of the signed type. Anyway, Python
65 doesn't allow strings to be longer than this. */
66 if (size > PY_SSIZE_T_MAX)
67 goto overflow;
68
69 if (size < alloc / 2) {
70 /* Major downsize; resize down to exact size. */
71 alloc = size + 1;
72 }
73 else if (size < alloc) {
74 /* Within allocated size; quick exit */
75 return 0;
76 }
77 else if (size <= alloc * 1.125) {
78 /* Moderate upsize; overallocate similar to list_resize() */
79 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
80 }
81 else {
82 /* Major upsize; resize up to exact size */
83 alloc = size + 1;
84 }
85
86 if (alloc > ((size_t)-1) / sizeof(char))
87 goto overflow;
88 new_buf = (char *)PyMem_Realloc(self->buf, alloc * sizeof(char));
89 if (new_buf == NULL) {
90 PyErr_NoMemory();
91 return -1;
92 }
93 self->buf_size = alloc;
94 self->buf = new_buf;
95
96 return 0;
97
98 overflow:
99 PyErr_SetString(PyExc_OverflowError,
100 "new buffer size too large");
101 return -1;
102}
103
104/* Internal routine for writing a string of bytes to the buffer of a BytesIO
105 object. Returns the number of bytes wrote, or -1 on error. */
106static Py_ssize_t
107write_bytes(BytesIOObject *self, const char *bytes, Py_ssize_t len)
108{
109 assert(self->buf != NULL);
110 assert(self->pos >= 0);
111 assert(len >= 0);
112
113 /* This overflow check is not strictly necessary. However, it avoids us to
114 deal with funky things like comparing an unsigned and a signed
115 integer. */
116 if (self->pos > PY_SSIZE_T_MAX - len) {
117 PyErr_SetString(PyExc_OverflowError,
118 "new position too large");
119 return -1;
120 }
121 if (self->pos + len > self->buf_size) {
122 if (resize_buffer(self, self->pos + len) < 0)
123 return -1;
124 }
125
126 if (self->pos > self->string_size) {
127 /* In case of overseek, pad with null bytes the buffer region between
128 the end of stream and the current position.
129
130 0 lo string_size hi
131 | |<---used--->|<----------available----------->|
132 | | <--to pad-->|<---to write---> |
133 0 buf position
134 */
135 memset(self->buf + self->string_size, '\0',
136 (self->pos - self->string_size) * sizeof(char));
137 }
138
139 /* Copy the data to the internal buffer, overwriting some of the existing
140 data if self->pos < self->string_size. */
141 memcpy(self->buf + self->pos, bytes, len);
142 self->pos += len;
143
144 /* Set the new length of the internal string if it has changed. */
145 if (self->string_size < self->pos) {
146 self->string_size = self->pos;
147 }
148
149 return len;
150}
151
152static PyObject *
153bytesio_get_closed(BytesIOObject *self)
154{
155 if (self->buf == NULL)
156 Py_RETURN_TRUE;
157 else
158 Py_RETURN_FALSE;
159}
160
161/* Generic getter for the writable, readable and seekable properties */
162static PyObject *
163return_true(BytesIOObject *self)
164{
165 Py_RETURN_TRUE;
166}
167
168PyDoc_STRVAR(flush_doc,
169"flush() -> None. Does nothing.");
170
171static PyObject *
172bytesio_flush(BytesIOObject *self)
173{
174 Py_RETURN_NONE;
175}
176
177PyDoc_STRVAR(getval_doc,
178"getvalue() -> string.\n"
179"\n"
180"Retrieve the entire contents of the BytesIO object.");
181
182static PyObject *
183bytesio_getvalue(BytesIOObject *self)
184{
185 CHECK_CLOSED(self);
186 return PyString_FromStringAndSize(self->buf, self->string_size);
187}
188
189PyDoc_STRVAR(isatty_doc,
190"isatty() -> False.\n"
191"\n"
192"Always returns False since BytesIO objects are not connected\n"
193"to a tty-like device.");
194
195static PyObject *
196bytesio_isatty(BytesIOObject *self)
197{
198 CHECK_CLOSED(self);
199 Py_RETURN_FALSE;
200}
201
202PyDoc_STRVAR(tell_doc,
203"tell() -> current file position, an integer\n");
204
205static PyObject *
206bytesio_tell(BytesIOObject *self)
207{
208 CHECK_CLOSED(self);
209 return PyLong_FromSsize_t(self->pos);
210}
211
212PyDoc_STRVAR(read_doc,
213"read([size]) -> read at most size bytes, returned as a string.\n"
214"\n"
215"If the size argument is negative, read until EOF is reached.\n"
216"Return an empty string at EOF.");
217
218static PyObject *
219bytesio_read(BytesIOObject *self, PyObject *args)
220{
221 Py_ssize_t size, n;
222 char *output;
223 PyObject *arg = Py_None;
224
225 CHECK_CLOSED(self);
226
227 if (!PyArg_ParseTuple(args, "|O:read", &arg))
228 return NULL;
229
230 if (PyLong_Check(arg)) {
231 size = PyLong_AsSsize_t(arg);
232 }
233 else if (arg == Py_None) {
234 /* Read until EOF is reached, by default. */
235 size = -1;
236 }
237 else {
238 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
239 Py_TYPE(arg)->tp_name);
240 return NULL;
241 }
242
243 /* adjust invalid sizes */
244 n = self->string_size - self->pos;
245 if (size < 0 || size > n) {
246 size = n;
247 if (size < 0)
248 size = 0;
249 }
250
251 assert(self->buf != NULL);
252 output = self->buf + self->pos;
253 self->pos += size;
254
255 return PyString_FromStringAndSize(output, size);
256}
257
258
259PyDoc_STRVAR(read1_doc,
260"read1(size) -> read at most size bytes, returned as a string.\n"
261"\n"
262"If the size argument is negative or omitted, read until EOF is reached.\n"
263"Return an empty string at EOF.");
264
265static PyObject *
266bytesio_read1(BytesIOObject *self, PyObject *n)
267{
268 PyObject *arg, *res;
269
270 arg = PyTuple_Pack(1, n);
271 if (arg == NULL)
272 return NULL;
273 res = bytesio_read(self, arg);
274 Py_DECREF(arg);
275 return res;
276}
277
278PyDoc_STRVAR(readline_doc,
279"readline([size]) -> next line from the file, as a string.\n"
280"\n"
281"Retain newline. A non-negative size argument limits the maximum\n"
282"number of bytes to return (an incomplete line may be returned then).\n"
283"Return an empty string at EOF.\n");
284
285static PyObject *
286bytesio_readline(BytesIOObject *self, PyObject *args)
287{
288 Py_ssize_t size, n;
289 char *output;
290 PyObject *arg = Py_None;
291
292 CHECK_CLOSED(self);
293
294 if (!PyArg_ParseTuple(args, "|O:readline", &arg))
295 return NULL;
296
297 if (PyLong_Check(arg)) {
298 size = PyLong_AsSsize_t(arg);
299 }
300 else if (arg == Py_None) {
301 /* No size limit, by default. */
302 size = -1;
303 }
304 else {
305 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
306 Py_TYPE(arg)->tp_name);
307 return NULL;
308 }
309
310 n = get_line(self, &output);
311
312 if (size >= 0 && size < n) {
313 size = n - size;
314 n -= size;
315 self->pos -= size;
316 }
317
318 return PyString_FromStringAndSize(output, n);
319}
320
321PyDoc_STRVAR(readlines_doc,
322"readlines([size]) -> list of strings, each a line from the file.\n"
323"\n"
324"Call readline() repeatedly and return a list of the lines so read.\n"
325"The optional size argument, if given, is an approximate bound on the\n"
326"total number of bytes in the lines returned.\n");
327
328static PyObject *
329bytesio_readlines(BytesIOObject *self, PyObject *args)
330{
331 Py_ssize_t maxsize, size, n;
332 PyObject *result, *line;
333 char *output;
334 PyObject *arg = Py_None;
335
336 CHECK_CLOSED(self);
337
338 if (!PyArg_ParseTuple(args, "|O:readlines", &arg))
339 return NULL;
340
341 if (PyLong_Check(arg)) {
342 maxsize = PyLong_AsSsize_t(arg);
343 }
344 else if (arg == Py_None) {
345 /* No size limit, by default. */
346 maxsize = -1;
347 }
348 else {
349 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
350 Py_TYPE(arg)->tp_name);
351 return NULL;
352 }
353
354 size = 0;
355 result = PyList_New(0);
356 if (!result)
357 return NULL;
358
359 while ((n = get_line(self, &output)) != 0) {
360 line = PyString_FromStringAndSize(output, n);
361 if (!line)
362 goto on_error;
363 if (PyList_Append(result, line) == -1) {
364 Py_DECREF(line);
365 goto on_error;
366 }
367 Py_DECREF(line);
368 size += n;
369 if (maxsize > 0 && size >= maxsize)
370 break;
371 }
372 return result;
373
374 on_error:
375 Py_DECREF(result);
376 return NULL;
377}
378
379PyDoc_STRVAR(readinto_doc,
380"readinto(bytes) -> int. Read up to len(b) bytes into b.\n"
381"\n"
382"Returns number of bytes read (0 for EOF), or None if the object\n"
383"is set not to block as has no data to read.");
384
385static PyObject *
386bytesio_readinto(BytesIOObject *self, PyObject *buffer)
387{
388 void *raw_buffer;
389 Py_ssize_t len;
390
391 CHECK_CLOSED(self);
392
393 if (PyObject_AsWriteBuffer(buffer, &raw_buffer, &len) == -1)
394 return NULL;
395
396 if (self->pos + len > self->string_size)
397 len = self->string_size - self->pos;
398
399 memcpy(raw_buffer, self->buf + self->pos, len);
400 assert(self->pos + len < PY_SSIZE_T_MAX);
401 assert(len >= 0);
402 self->pos += len;
403
404 return PyLong_FromSsize_t(len);
405}
406
407PyDoc_STRVAR(truncate_doc,
408"truncate([size]) -> int. Truncate the file to at most size bytes.\n"
409"\n"
410"Size defaults to the current file position, as returned by tell().\n"
411"Returns the new size. Imply an absolute seek to the position size.");
412
413static PyObject *
414bytesio_truncate(BytesIOObject *self, PyObject *args)
415{
416 Py_ssize_t size;
417 PyObject *arg = Py_None;
418
419 CHECK_CLOSED(self);
420
421 if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
422 return NULL;
423
424 if (PyLong_Check(arg)) {
425 size = PyLong_AsSsize_t(arg);
426 }
427 else if (arg == Py_None) {
428 /* Truncate to current position if no argument is passed. */
429 size = self->pos;
430 }
431 else {
432 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
433 Py_TYPE(arg)->tp_name);
434 return NULL;
435 }
436
437 if (size < 0) {
438 PyErr_Format(PyExc_ValueError,
439 "negative size value %zd", size);
440 return NULL;
441 }
442
443 if (size < self->string_size) {
444 self->string_size = size;
445 if (resize_buffer(self, size) < 0)
446 return NULL;
447 }
448 self->pos = size;
449
450 return PyLong_FromSsize_t(size);
451}
452
453static PyObject *
454bytesio_iternext(BytesIOObject *self)
455{
456 char *next;
457 Py_ssize_t n;
458
459 CHECK_CLOSED(self);
460
461 n = get_line(self, &next);
462
463 if (!next || n == 0)
464 return NULL;
465
466 return PyString_FromStringAndSize(next, n);
467}
468
469PyDoc_STRVAR(seek_doc,
470"seek(pos, whence=0) -> int. Change stream position.\n"
471"\n"
472"Seek to byte offset pos relative to position indicated by whence:\n"
473" 0 Start of stream (the default). pos should be >= 0;\n"
474" 1 Current position - pos may be negative;\n"
475" 2 End of stream - pos usually negative.\n"
476"Returns the new absolute position.");
477
478static PyObject *
479bytesio_seek(BytesIOObject *self, PyObject *args)
480{
481 Py_ssize_t pos;
482 int mode = 0;
483
484 CHECK_CLOSED(self);
485
486 if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode))
487 return NULL;
488
489 if (pos < 0 && mode == 0) {
490 PyErr_Format(PyExc_ValueError,
491 "negative seek value %zd", pos);
492 return NULL;
493 }
494
495 /* mode 0: offset relative to beginning of the string.
496 mode 1: offset relative to current position.
497 mode 2: offset relative the end of the string. */
498 if (mode == 1) {
499 if (pos > PY_SSIZE_T_MAX - self->pos) {
500 PyErr_SetString(PyExc_OverflowError,
501 "new position too large");
502 return NULL;
503 }
504 pos += self->pos;
505 }
506 else if (mode == 2) {
507 if (pos > PY_SSIZE_T_MAX - self->string_size) {
508 PyErr_SetString(PyExc_OverflowError,
509 "new position too large");
510 return NULL;
511 }
512 pos += self->string_size;
513 }
514 else if (mode != 0) {
515 PyErr_Format(PyExc_ValueError,
516 "invalid whence (%i, should be 0, 1 or 2)", mode);
517 return NULL;
518 }
519
520 if (pos < 0)
521 pos = 0;
522 self->pos = pos;
523
524 return PyLong_FromSsize_t(self->pos);
525}
526
527PyDoc_STRVAR(write_doc,
528"write(str) -> int. Write string str to file.\n"
529"\n"
530"Return the number of bytes written.");
531
532static PyObject *
533bytesio_write(BytesIOObject *self, PyObject *obj)
534{
535 const char *bytes;
536 Py_ssize_t size;
537 Py_ssize_t n = 0;
538
539 CHECK_CLOSED(self);
540
541 if (PyObject_AsReadBuffer(obj, (void *)&bytes, &size) < 0)
542 return NULL;
543
544 if (size != 0) {
545 n = write_bytes(self, bytes, size);
546 if (n < 0)
547 return NULL;
548 }
549
550 return PyLong_FromSsize_t(n);
551}
552
553PyDoc_STRVAR(writelines_doc,
554"writelines(sequence_of_strings) -> None. Write the strings to the file.\n"
555"\n"
556"Note that newlines are not added. The sequence can be any iterable object\n"
557"producing strings. This is equivalent to calling write() for each string.");
558
559static PyObject *
560bytesio_writelines(BytesIOObject *self, PyObject *v)
561{
562 PyObject *it, *item;
563 PyObject *ret;
564
565 CHECK_CLOSED(self);
566
567 it = PyObject_GetIter(v);
568 if (it == NULL)
569 return NULL;
570
571 while ((item = PyIter_Next(it)) != NULL) {
572 ret = bytesio_write(self, item);
573 Py_DECREF(item);
574 if (ret == NULL) {
575 Py_DECREF(it);
576 return NULL;
577 }
578 Py_DECREF(ret);
579 }
580 Py_DECREF(it);
581
582 /* See if PyIter_Next failed */
583 if (PyErr_Occurred())
584 return NULL;
585
586 Py_RETURN_NONE;
587}
588
589PyDoc_STRVAR(close_doc,
590"close() -> None. Disable all I/O operations.");
591
592static PyObject *
593bytesio_close(BytesIOObject *self)
594{
595 if (self->buf != NULL) {
596 PyMem_Free(self->buf);
597 self->buf = NULL;
598 }
599 Py_RETURN_NONE;
600}
601
602static void
603bytesio_dealloc(BytesIOObject *self)
604{
605 if (self->buf != NULL) {
606 PyMem_Free(self->buf);
607 self->buf = NULL;
608 }
609 Py_TYPE(self)->tp_free(self);
610}
611
612static PyObject *
613bytesio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
614{
615 BytesIOObject *self;
616
617 assert(type != NULL && type->tp_alloc != NULL);
618 self = (BytesIOObject *)type->tp_alloc(type, 0);
619 if (self == NULL)
620 return NULL;
621
622 self->string_size = 0;
623 self->pos = 0;
624 self->buf_size = 0;
625 self->buf = (char *)PyMem_Malloc(0);
626 if (self->buf == NULL) {
627 Py_DECREF(self);
628 return PyErr_NoMemory();
629 }
630
631 return (PyObject *)self;
632}
633
634static int
635bytesio_init(BytesIOObject *self, PyObject *args, PyObject *kwds)
636{
637 PyObject *initvalue = NULL;
638
639 if (!PyArg_ParseTuple(args, "|O:BytesIO", &initvalue))
640 return -1;
641
642 /* In case, __init__ is called multiple times. */
643 self->string_size = 0;
644 self->pos = 0;
645
646 if (initvalue && initvalue != Py_None) {
647 PyObject *res;
648 res = bytesio_write(self, initvalue);
649 if (res == NULL)
650 return -1;
651 Py_DECREF(res);
652 self->pos = 0;
653 }
654
655 return 0;
656}
657
658static PyGetSetDef bytesio_getsetlist[] = {
659 {"closed", (getter)bytesio_get_closed, NULL,
660 "True if the file is closed."},
661 {0}, /* sentinel */
662};
663
664static struct PyMethodDef bytesio_methods[] = {
665 {"readable", (PyCFunction)return_true, METH_NOARGS, NULL},
666 {"seekable", (PyCFunction)return_true, METH_NOARGS, NULL},
667 {"writable", (PyCFunction)return_true, METH_NOARGS, NULL},
668 {"close", (PyCFunction)bytesio_close, METH_NOARGS, close_doc},
669 {"flush", (PyCFunction)bytesio_flush, METH_NOARGS, flush_doc},
670 {"isatty", (PyCFunction)bytesio_isatty, METH_NOARGS, isatty_doc},
671 {"tell", (PyCFunction)bytesio_tell, METH_NOARGS, tell_doc},
672 {"write", (PyCFunction)bytesio_write, METH_O, write_doc},
673 {"writelines", (PyCFunction)bytesio_writelines, METH_O, writelines_doc},
674 {"read1", (PyCFunction)bytesio_read1, METH_O, read1_doc},
675 {"readinto", (PyCFunction)bytesio_readinto, METH_O, readinto_doc},
676 {"readline", (PyCFunction)bytesio_readline, METH_VARARGS, readline_doc},
677 {"readlines", (PyCFunction)bytesio_readlines, METH_VARARGS, readlines_doc},
678 {"read", (PyCFunction)bytesio_read, METH_VARARGS, read_doc},
679 {"getvalue", (PyCFunction)bytesio_getvalue, METH_VARARGS, getval_doc},
680 {"seek", (PyCFunction)bytesio_seek, METH_VARARGS, seek_doc},
681 {"truncate", (PyCFunction)bytesio_truncate, METH_VARARGS, truncate_doc},
682 {NULL, NULL} /* sentinel */
683};
684
685PyDoc_STRVAR(bytesio_doc,
686"BytesIO([buffer]) -> object\n"
687"\n"
688"Create a buffered I/O implementation using an in-memory bytes\n"
689"buffer, ready for reading and writing.");
690
691static PyTypeObject BytesIO_Type = {
692 PyVarObject_HEAD_INIT(NULL, 0)
693 "_bytesio._BytesIO", /*tp_name*/
694 sizeof(BytesIOObject), /*tp_basicsize*/
695 0, /*tp_itemsize*/
696 (destructor)bytesio_dealloc, /*tp_dealloc*/
697 0, /*tp_print*/
698 0, /*tp_getattr*/
699 0, /*tp_setattr*/
700 0, /*tp_compare*/
701 0, /*tp_repr*/
702 0, /*tp_as_number*/
703 0, /*tp_as_sequence*/
704 0, /*tp_as_mapping*/
705 0, /*tp_hash*/
706 0, /*tp_call*/
707 0, /*tp_str*/
708 0, /*tp_getattro*/
709 0, /*tp_setattro*/
710 0, /*tp_as_buffer*/
711 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
712 bytesio_doc, /*tp_doc*/
713 0, /*tp_traverse*/
714 0, /*tp_clear*/
715 0, /*tp_richcompare*/
716 0, /*tp_weaklistoffset*/
717 PyObject_SelfIter, /*tp_iter*/
718 (iternextfunc)bytesio_iternext, /*tp_iternext*/
719 bytesio_methods, /*tp_methods*/
720 0, /*tp_members*/
721 bytesio_getsetlist, /*tp_getset*/
722 0, /*tp_base*/
723 0, /*tp_dict*/
724 0, /*tp_descr_get*/
725 0, /*tp_descr_set*/
726 0, /*tp_dictoffset*/
727 (initproc)bytesio_init, /*tp_init*/
728 0, /*tp_alloc*/
729 bytesio_new, /*tp_new*/
730};
731
732PyMODINIT_FUNC
733init_bytesio(void)
734{
735 PyObject *m;
736
737 if (PyType_Ready(&BytesIO_Type) < 0)
738 return;
739 m = Py_InitModule("_bytesio", NULL);
740 if (m == NULL)
741 return;
742 Py_INCREF(&BytesIO_Type);
743 PyModule_AddObject(m, "_BytesIO", (PyObject *)&BytesIO_Type);
744}