blob: 8c5bb82de326bf3c3b00876ca715c236648e2b12 [file] [log] [blame]
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +00001#include "Python.h"
2
3typedef struct {
4 PyObject_HEAD
5 char *buf;
6 Py_ssize_t pos;
7 Py_ssize_t string_size;
8 size_t buf_size;
9} BytesIOObject;
10
11#define CHECK_CLOSED(self) \
12 if ((self)->buf == NULL) { \
13 PyErr_SetString(PyExc_ValueError, \
14 "I/O operation on closed file."); \
15 return NULL; \
16 }
17
18/* Internal routine to get a line from the buffer of a BytesIO
19 object. Returns the length between the current position to the
20 next newline character. */
21static Py_ssize_t
22get_line(BytesIOObject *self, char **output)
23{
24 char *n;
25 const char *str_end;
26 Py_ssize_t len;
27
28 assert(self->buf != NULL);
29
30 /* Move to the end of the line, up to the end of the string, s. */
31 str_end = self->buf + self->string_size;
32 for (n = self->buf + self->pos;
33 n < str_end && *n != '\n';
34 n++);
35
36 /* Skip the newline character */
37 if (n < str_end)
38 n++;
39
40 /* Get the length from the current position to the end of the line. */
41 len = n - (self->buf + self->pos);
42 *output = self->buf + self->pos;
43
44 assert(len >= 0);
45 assert(self->pos < PY_SSIZE_T_MAX - len);
46 self->pos += len;
47
48 return len;
49}
50
51/* Internal routine for changing the size of the buffer of BytesIO objects.
52 The caller should ensure that the 'size' argument is non-negative. Returns
53 0 on success, -1 otherwise. */
54static int
55resize_buffer(BytesIOObject *self, size_t size)
56{
57 /* Here, unsigned types are used to avoid dealing with signed integer
58 overflow, which is undefined in C. */
59 size_t alloc = self->buf_size;
60 char *new_buf = NULL;
61
62 assert(self->buf != NULL);
63
64 /* For simplicity, stay in the range of the signed type. Anyway, Python
65 doesn't allow strings to be longer than this. */
66 if (size > PY_SSIZE_T_MAX)
67 goto overflow;
68
69 if (size < alloc / 2) {
70 /* Major downsize; resize down to exact size. */
71 alloc = size + 1;
72 }
73 else if (size < alloc) {
74 /* Within allocated size; quick exit */
75 return 0;
76 }
77 else if (size <= alloc * 1.125) {
78 /* Moderate upsize; overallocate similar to list_resize() */
79 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
80 }
81 else {
82 /* Major upsize; resize up to exact size */
83 alloc = size + 1;
84 }
85
86 if (alloc > ((size_t)-1) / sizeof(char))
87 goto overflow;
88 new_buf = (char *)PyMem_Realloc(self->buf, alloc * sizeof(char));
89 if (new_buf == NULL) {
90 PyErr_NoMemory();
91 return -1;
92 }
93 self->buf_size = alloc;
94 self->buf = new_buf;
95
96 return 0;
97
98 overflow:
99 PyErr_SetString(PyExc_OverflowError,
100 "new buffer size too large");
101 return -1;
102}
103
104/* Internal routine for writing a string of bytes to the buffer of a BytesIO
105 object. Returns the number of bytes wrote, or -1 on error. */
106static Py_ssize_t
107write_bytes(BytesIOObject *self, const char *bytes, Py_ssize_t len)
108{
109 assert(self->buf != NULL);
110 assert(self->pos >= 0);
111 assert(len >= 0);
112
113 if ((size_t)self->pos + len > self->buf_size) {
114 if (resize_buffer(self, (size_t)self->pos + len) < 0)
115 return -1;
116 }
117
118 if (self->pos > self->string_size) {
119 /* In case of overseek, pad with null bytes the buffer region between
120 the end of stream and the current position.
121
122 0 lo string_size hi
123 | |<---used--->|<----------available----------->|
124 | | <--to pad-->|<---to write---> |
125 0 buf position
126 */
127 memset(self->buf + self->string_size, '\0',
128 (self->pos - self->string_size) * sizeof(char));
129 }
130
131 /* Copy the data to the internal buffer, overwriting some of the existing
132 data if self->pos < self->string_size. */
133 memcpy(self->buf + self->pos, bytes, len);
134 self->pos += len;
135
136 /* Set the new length of the internal string if it has changed. */
137 if (self->string_size < self->pos) {
138 self->string_size = self->pos;
139 }
140
141 return len;
142}
143
144static PyObject *
145bytesio_get_closed(BytesIOObject *self)
146{
147 if (self->buf == NULL)
148 Py_RETURN_TRUE;
149 else
150 Py_RETURN_FALSE;
151}
152
153/* Generic getter for the writable, readable and seekable properties */
154static PyObject *
155return_true(BytesIOObject *self)
156{
157 Py_RETURN_TRUE;
158}
159
160PyDoc_STRVAR(flush_doc,
161"flush() -> None. Does nothing.");
162
163static PyObject *
164bytesio_flush(BytesIOObject *self)
165{
166 Py_RETURN_NONE;
167}
168
169PyDoc_STRVAR(getval_doc,
170"getvalue() -> bytes.\n"
171"\n"
172"Retrieve the entire contents of the BytesIO object.");
173
174static PyObject *
175bytesio_getvalue(BytesIOObject *self)
176{
177 CHECK_CLOSED(self);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000178 return PyString_FromStringAndSize(self->buf, self->string_size);
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000179}
180
181PyDoc_STRVAR(isatty_doc,
182"isatty() -> False.\n"
183"\n"
184"Always returns False since BytesIO objects are not connected\n"
185"to a tty-like device.");
186
187static PyObject *
188bytesio_isatty(BytesIOObject *self)
189{
190 CHECK_CLOSED(self);
191 Py_RETURN_FALSE;
192}
193
194PyDoc_STRVAR(tell_doc,
195"tell() -> current file position, an integer\n");
196
197static PyObject *
198bytesio_tell(BytesIOObject *self)
199{
200 CHECK_CLOSED(self);
201 return PyInt_FromSsize_t(self->pos);
202}
203
204PyDoc_STRVAR(read_doc,
205"read([size]) -> read at most size bytes, returned as a string.\n"
206"\n"
207"If the size argument is negative, read until EOF is reached.\n"
208"Return an empty string at EOF.");
209
210static PyObject *
211bytesio_read(BytesIOObject *self, PyObject *args)
212{
213 Py_ssize_t size, n;
214 char *output;
215 PyObject *arg = Py_None;
216
217 CHECK_CLOSED(self);
218
219 if (!PyArg_ParseTuple(args, "|O:read", &arg))
220 return NULL;
221
222 if (PyInt_Check(arg)) {
223 size = PyInt_AsSsize_t(arg);
224 }
225 else if (arg == Py_None) {
226 /* Read until EOF is reached, by default. */
227 size = -1;
228 }
229 else {
230 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
231 Py_TYPE(arg)->tp_name);
232 return NULL;
233 }
234
235 /* adjust invalid sizes */
236 n = self->string_size - self->pos;
237 if (size < 0 || size > n) {
238 size = n;
239 if (size < 0)
240 size = 0;
241 }
242
243 assert(self->buf != NULL);
244 output = self->buf + self->pos;
245 self->pos += size;
246
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000247 return PyString_FromStringAndSize(output, size);
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000248}
249
250
251PyDoc_STRVAR(read1_doc,
252"read1(size) -> read at most size bytes, returned as a string.\n"
253"\n"
254"If the size argument is negative or omitted, read until EOF is reached.\n"
255"Return an empty string at EOF.");
256
257static PyObject *
258bytesio_read1(BytesIOObject *self, PyObject *n)
259{
260 PyObject *arg, *res;
261
262 arg = PyTuple_Pack(1, n);
263 if (arg == NULL)
264 return NULL;
265 res = bytesio_read(self, arg);
266 Py_DECREF(arg);
267 return res;
268}
269
270PyDoc_STRVAR(readline_doc,
271"readline([size]) -> next line from the file, as a string.\n"
272"\n"
273"Retain newline. A non-negative size argument limits the maximum\n"
274"number of bytes to return (an incomplete line may be returned then).\n"
275"Return an empty string at EOF.\n");
276
277static PyObject *
278bytesio_readline(BytesIOObject *self, PyObject *args)
279{
280 Py_ssize_t size, n;
281 char *output;
282 PyObject *arg = Py_None;
283
284 CHECK_CLOSED(self);
285
286 if (!PyArg_ParseTuple(args, "|O:readline", &arg))
287 return NULL;
288
289 if (PyInt_Check(arg)) {
290 size = PyInt_AsSsize_t(arg);
291 }
292 else if (arg == Py_None) {
293 /* No size limit, by default. */
294 size = -1;
295 }
296 else {
297 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
298 Py_TYPE(arg)->tp_name);
299 return NULL;
300 }
301
302 n = get_line(self, &output);
303
304 if (size >= 0 && size < n) {
305 size = n - size;
306 n -= size;
307 self->pos -= size;
308 }
309
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000310 return PyString_FromStringAndSize(output, n);
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000311}
312
313PyDoc_STRVAR(readlines_doc,
314"readlines([size]) -> list of strings, each a line from the file.\n"
315"\n"
316"Call readline() repeatedly and return a list of the lines so read.\n"
317"The optional size argument, if given, is an approximate bound on the\n"
318"total number of bytes in the lines returned.\n");
319
320static PyObject *
321bytesio_readlines(BytesIOObject *self, PyObject *args)
322{
323 Py_ssize_t maxsize, size, n;
324 PyObject *result, *line;
325 char *output;
326 PyObject *arg = Py_None;
327
328 CHECK_CLOSED(self);
329
330 if (!PyArg_ParseTuple(args, "|O:readlines", &arg))
331 return NULL;
332
333 if (PyInt_Check(arg)) {
334 maxsize = PyInt_AsSsize_t(arg);
335 }
336 else if (arg == Py_None) {
337 /* No size limit, by default. */
338 maxsize = -1;
339 }
340 else {
341 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
342 Py_TYPE(arg)->tp_name);
343 return NULL;
344 }
345
346 size = 0;
347 result = PyList_New(0);
348 if (!result)
349 return NULL;
350
351 while ((n = get_line(self, &output)) != 0) {
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000352 line = PyString_FromStringAndSize(output, n);
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000353 if (!line)
354 goto on_error;
355 if (PyList_Append(result, line) == -1) {
356 Py_DECREF(line);
357 goto on_error;
358 }
359 Py_DECREF(line);
360 size += n;
361 if (maxsize > 0 && size >= maxsize)
362 break;
363 }
364 return result;
365
366 on_error:
367 Py_DECREF(result);
368 return NULL;
369}
370
371PyDoc_STRVAR(readinto_doc,
372"readinto(bytearray) -> int. Read up to len(b) bytes into b.\n"
373"\n"
374"Returns number of bytes read (0 for EOF), or None if the object\n"
375"is set not to block as has no data to read.");
376
377static PyObject *
378bytesio_readinto(BytesIOObject *self, PyObject *buffer)
379{
380 void *raw_buffer;
381 Py_ssize_t len;
382
383 CHECK_CLOSED(self);
384
385 if (PyObject_AsWriteBuffer(buffer, &raw_buffer, &len) == -1)
386 return NULL;
387
388 if (self->pos + len > self->string_size)
389 len = self->string_size - self->pos;
390
391 memcpy(raw_buffer, self->buf + self->pos, len);
392 assert(self->pos + len < PY_SSIZE_T_MAX);
393 assert(len >= 0);
394 self->pos += len;
395
396 return PyInt_FromSsize_t(len);
397}
398
399PyDoc_STRVAR(truncate_doc,
400"truncate([size]) -> int. Truncate the file to at most size bytes.\n"
401"\n"
402"Size defaults to the current file position, as returned by tell().\n"
403"Returns the new size. Imply an absolute seek to the position size.");
404
405static PyObject *
406bytesio_truncate(BytesIOObject *self, PyObject *args)
407{
408 Py_ssize_t size;
409 PyObject *arg = Py_None;
410
411 CHECK_CLOSED(self);
412
413 if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
414 return NULL;
415
416 if (PyInt_Check(arg)) {
417 size = PyInt_AsSsize_t(arg);
418 }
419 else if (arg == Py_None) {
420 /* Truncate to current position if no argument is passed. */
421 size = self->pos;
422 }
423 else {
424 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
425 Py_TYPE(arg)->tp_name);
426 return NULL;
427 }
428
429 if (size < 0) {
430 PyErr_Format(PyExc_ValueError,
431 "negative size value %zd", size);
432 return NULL;
433 }
434
435 if (size < self->string_size) {
436 self->string_size = size;
437 if (resize_buffer(self, size) < 0)
438 return NULL;
439 }
440 self->pos = size;
441
442 return PyInt_FromSsize_t(size);
443}
444
445static PyObject *
446bytesio_iternext(BytesIOObject *self)
447{
448 char *next;
449 Py_ssize_t n;
450
451 CHECK_CLOSED(self);
452
453 n = get_line(self, &next);
454
455 if (!next || n == 0)
456 return NULL;
457
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000458 return PyString_FromStringAndSize(next, n);
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000459}
460
461PyDoc_STRVAR(seek_doc,
462"seek(pos, whence=0) -> int. Change stream position.\n"
463"\n"
464"Seek to byte offset pos relative to position indicated by whence:\n"
465" 0 Start of stream (the default). pos should be >= 0;\n"
466" 1 Current position - pos may be negative;\n"
467" 2 End of stream - pos usually negative.\n"
468"Returns the new absolute position.");
469
470static PyObject *
471bytesio_seek(BytesIOObject *self, PyObject *args)
472{
473 PyObject *pos_obj, *mode_obj;
474 Py_ssize_t pos;
475 int mode = 0;
476
477 CHECK_CLOSED(self);
478
479 /* Special-case for 2.x to prevent floats from passing through.
480 This only needed to make a test in test_io succeed. */
481 if (!PyArg_UnpackTuple(args, "seek", 1, 2, &pos_obj, &mode_obj))
482 return NULL;
483 if (PyFloat_Check(pos_obj)) {
484 PyErr_SetString(PyExc_TypeError,
485 "position argument must be an integer");
486 return NULL;
487 }
488
489 if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode))
490 return NULL;
491
492 if (pos < 0 && mode == 0) {
493 PyErr_Format(PyExc_ValueError,
494 "negative seek value %zd", pos);
495 return NULL;
496 }
497
498 /* mode 0: offset relative to beginning of the string.
499 mode 1: offset relative to current position.
500 mode 2: offset relative the end of the string. */
501 if (mode == 1) {
502 if (pos > PY_SSIZE_T_MAX - self->pos) {
503 PyErr_SetString(PyExc_OverflowError,
504 "new position too large");
505 return NULL;
506 }
507 pos += self->pos;
508 }
509 else if (mode == 2) {
510 if (pos > PY_SSIZE_T_MAX - self->string_size) {
511 PyErr_SetString(PyExc_OverflowError,
512 "new position too large");
513 return NULL;
514 }
515 pos += self->string_size;
516 }
517 else if (mode != 0) {
518 PyErr_Format(PyExc_ValueError,
519 "invalid whence (%i, should be 0, 1 or 2)", mode);
520 return NULL;
521 }
522
523 if (pos < 0)
524 pos = 0;
525 self->pos = pos;
526
527 return PyInt_FromSsize_t(self->pos);
528}
529
530PyDoc_STRVAR(write_doc,
531"write(bytes) -> int. Write bytes to file.\n"
532"\n"
533"Return the number of bytes written.");
534
535static PyObject *
536bytesio_write(BytesIOObject *self, PyObject *obj)
537{
538 const char *bytes;
539 Py_ssize_t size;
540 Py_ssize_t n = 0;
541
542 CHECK_CLOSED(self);
543
544 /* Special-case in 2.x to prevent unicode objects to pass through. */
545 if (PyUnicode_Check(obj)) {
546 PyErr_SetString(PyExc_TypeError,
547 "expecting a bytes object, got unicode");
548 return NULL;
549 }
550
551 if (PyObject_AsReadBuffer(obj, (void *)&bytes, &size) < 0)
552 return NULL;
553
554 if (size != 0) {
555 n = write_bytes(self, bytes, size);
556 if (n < 0)
557 return NULL;
558 }
559
560 return PyInt_FromSsize_t(n);
561}
562
563PyDoc_STRVAR(writelines_doc,
564"writelines(sequence_of_strings) -> None. Write strings to the file.\n"
565"\n"
566"Note that newlines are not added. The sequence can be any iterable\n"
567"object producing strings. This is equivalent to calling write() for\n"
568"each string.");
569
570static PyObject *
571bytesio_writelines(BytesIOObject *self, PyObject *v)
572{
573 PyObject *it, *item;
574 PyObject *ret;
575
576 CHECK_CLOSED(self);
577
578 it = PyObject_GetIter(v);
579 if (it == NULL)
580 return NULL;
581
582 while ((item = PyIter_Next(it)) != NULL) {
583 ret = bytesio_write(self, item);
584 Py_DECREF(item);
585 if (ret == NULL) {
586 Py_DECREF(it);
587 return NULL;
588 }
589 Py_DECREF(ret);
590 }
591 Py_DECREF(it);
592
593 /* See if PyIter_Next failed */
594 if (PyErr_Occurred())
595 return NULL;
596
597 Py_RETURN_NONE;
598}
599
600PyDoc_STRVAR(close_doc,
601"close() -> None. Disable all I/O operations.");
602
603static PyObject *
604bytesio_close(BytesIOObject *self)
605{
606 if (self->buf != NULL) {
607 PyMem_Free(self->buf);
608 self->buf = NULL;
609 }
610 Py_RETURN_NONE;
611}
612
613static void
614bytesio_dealloc(BytesIOObject *self)
615{
616 if (self->buf != NULL) {
617 PyMem_Free(self->buf);
618 self->buf = NULL;
619 }
620 Py_TYPE(self)->tp_free(self);
621}
622
623static PyObject *
624bytesio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
625{
626 BytesIOObject *self;
627
628 assert(type != NULL && type->tp_alloc != NULL);
629 self = (BytesIOObject *)type->tp_alloc(type, 0);
630 if (self == NULL)
631 return NULL;
632
633 self->string_size = 0;
634 self->pos = 0;
635 self->buf_size = 0;
636 self->buf = (char *)PyMem_Malloc(0);
637 if (self->buf == NULL) {
638 Py_DECREF(self);
639 return PyErr_NoMemory();
640 }
641
642 return (PyObject *)self;
643}
644
645static int
646bytesio_init(BytesIOObject *self, PyObject *args, PyObject *kwds)
647{
648 PyObject *initvalue = NULL;
649
650 if (!PyArg_ParseTuple(args, "|O:BytesIO", &initvalue))
651 return -1;
652
653 /* In case, __init__ is called multiple times. */
654 self->string_size = 0;
655 self->pos = 0;
656
657 if (initvalue && initvalue != Py_None) {
658 PyObject *res;
659 res = bytesio_write(self, initvalue);
660 if (res == NULL)
661 return -1;
662 Py_DECREF(res);
663 self->pos = 0;
664 }
665
666 return 0;
667}
668
669static PyGetSetDef bytesio_getsetlist[] = {
670 {"closed", (getter)bytesio_get_closed, NULL,
671 "True if the file is closed."},
672 {0}, /* sentinel */
673};
674
675static struct PyMethodDef bytesio_methods[] = {
676 {"readable", (PyCFunction)return_true, METH_NOARGS, NULL},
677 {"seekable", (PyCFunction)return_true, METH_NOARGS, NULL},
678 {"writable", (PyCFunction)return_true, METH_NOARGS, NULL},
679 {"close", (PyCFunction)bytesio_close, METH_NOARGS, close_doc},
680 {"flush", (PyCFunction)bytesio_flush, METH_NOARGS, flush_doc},
681 {"isatty", (PyCFunction)bytesio_isatty, METH_NOARGS, isatty_doc},
682 {"tell", (PyCFunction)bytesio_tell, METH_NOARGS, tell_doc},
683 {"write", (PyCFunction)bytesio_write, METH_O, write_doc},
684 {"writelines", (PyCFunction)bytesio_writelines, METH_O, writelines_doc},
685 {"read1", (PyCFunction)bytesio_read1, METH_O, read1_doc},
686 {"readinto", (PyCFunction)bytesio_readinto, METH_O, readinto_doc},
687 {"readline", (PyCFunction)bytesio_readline, METH_VARARGS, readline_doc},
688 {"readlines", (PyCFunction)bytesio_readlines, METH_VARARGS, readlines_doc},
689 {"read", (PyCFunction)bytesio_read, METH_VARARGS, read_doc},
690 {"getvalue", (PyCFunction)bytesio_getvalue, METH_VARARGS, getval_doc},
691 {"seek", (PyCFunction)bytesio_seek, METH_VARARGS, seek_doc},
692 {"truncate", (PyCFunction)bytesio_truncate, METH_VARARGS, truncate_doc},
693 {NULL, NULL} /* sentinel */
694};
695
696PyDoc_STRVAR(bytesio_doc,
697"BytesIO([buffer]) -> object\n"
698"\n"
699"Create a buffered I/O implementation using an in-memory bytes\n"
700"buffer, ready for reading and writing.");
701
702static PyTypeObject BytesIO_Type = {
703 PyVarObject_HEAD_INIT(NULL, 0)
704 "_bytesio._BytesIO", /*tp_name*/
705 sizeof(BytesIOObject), /*tp_basicsize*/
706 0, /*tp_itemsize*/
707 (destructor)bytesio_dealloc, /*tp_dealloc*/
708 0, /*tp_print*/
709 0, /*tp_getattr*/
710 0, /*tp_setattr*/
711 0, /*tp_compare*/
712 0, /*tp_repr*/
713 0, /*tp_as_number*/
714 0, /*tp_as_sequence*/
715 0, /*tp_as_mapping*/
716 0, /*tp_hash*/
717 0, /*tp_call*/
718 0, /*tp_str*/
719 0, /*tp_getattro*/
720 0, /*tp_setattro*/
721 0, /*tp_as_buffer*/
722 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
723 bytesio_doc, /*tp_doc*/
724 0, /*tp_traverse*/
725 0, /*tp_clear*/
726 0, /*tp_richcompare*/
727 0, /*tp_weaklistoffset*/
728 PyObject_SelfIter, /*tp_iter*/
729 (iternextfunc)bytesio_iternext, /*tp_iternext*/
730 bytesio_methods, /*tp_methods*/
731 0, /*tp_members*/
732 bytesio_getsetlist, /*tp_getset*/
733 0, /*tp_base*/
734 0, /*tp_dict*/
735 0, /*tp_descr_get*/
736 0, /*tp_descr_set*/
737 0, /*tp_dictoffset*/
738 (initproc)bytesio_init, /*tp_init*/
739 0, /*tp_alloc*/
740 bytesio_new, /*tp_new*/
741};
742
743PyMODINIT_FUNC
744init_bytesio(void)
745{
746 PyObject *m;
747
748 if (PyType_Ready(&BytesIO_Type) < 0)
749 return;
750 m = Py_InitModule("_bytesio", NULL);
751 if (m == NULL)
752 return;
753 Py_INCREF(&BytesIO_Type);
754 PyModule_AddObject(m, "_BytesIO", (PyObject *)&BytesIO_Type);
755}