| #include "Python.h" |
| |
| /* This module is a stripped down version of _bytesio.c with a Py_UNICODE |
| buffer. Most of the functionality is provided by subclassing _StringIO. */ |
| |
| |
| typedef struct { |
| PyObject_HEAD |
| Py_UNICODE *buf; |
| Py_ssize_t pos; |
| Py_ssize_t string_size; |
| size_t buf_size; |
| } StringIOObject; |
| |
| |
| /* Internal routine for changing the size, in terms of characters, of the |
| buffer of StringIO objects. The caller should ensure that the 'size' |
| argument is non-negative. Returns 0 on success, -1 otherwise. */ |
| static int |
| resize_buffer(StringIOObject *self, size_t size) |
| { |
| /* Here, unsigned types are used to avoid dealing with signed integer |
| overflow, which is undefined in C. */ |
| size_t alloc = self->buf_size; |
| Py_UNICODE *new_buf = NULL; |
| |
| assert(self->buf != NULL); |
| |
| /* For simplicity, stay in the range of the signed type. Anyway, Python |
| doesn't allow strings to be longer than this. */ |
| if (size > PY_SSIZE_T_MAX) |
| goto overflow; |
| |
| if (size < alloc / 2) { |
| /* Major downsize; resize down to exact size. */ |
| alloc = size + 1; |
| } |
| else if (size < alloc) { |
| /* Within allocated size; quick exit */ |
| return 0; |
| } |
| else if (size <= alloc * 1.125) { |
| /* Moderate upsize; overallocate similar to list_resize() */ |
| alloc = size + (size >> 3) + (size < 9 ? 3 : 6); |
| } |
| else { |
| /* Major upsize; resize up to exact size */ |
| alloc = size + 1; |
| } |
| |
| if (alloc > ((size_t)-1) / sizeof(Py_UNICODE)) |
| goto overflow; |
| new_buf = (Py_UNICODE *)PyMem_Realloc(self->buf, |
| alloc * sizeof(Py_UNICODE)); |
| if (new_buf == NULL) { |
| PyErr_NoMemory(); |
| return -1; |
| } |
| self->buf_size = alloc; |
| self->buf = new_buf; |
| |
| return 0; |
| |
| overflow: |
| PyErr_SetString(PyExc_OverflowError, |
| "new buffer size too large"); |
| return -1; |
| } |
| |
| /* Internal routine for writing a string of characters to the buffer of a |
| StringIO object. Returns the number of bytes wrote, or -1 on error. */ |
| static Py_ssize_t |
| write_str(StringIOObject *self, const Py_UNICODE *str, Py_ssize_t len) |
| { |
| assert(self->buf != NULL); |
| assert(self->pos >= 0); |
| assert(len >= 0); |
| |
| /* This overflow check is not strictly necessary. However, it avoids us to |
| deal with funky things like comparing an unsigned and a signed |
| integer. */ |
| if (self->pos > PY_SSIZE_T_MAX - len) { |
| PyErr_SetString(PyExc_OverflowError, |
| "new position too large"); |
| return -1; |
| } |
| if (self->pos + len > self->string_size) { |
| if (resize_buffer(self, self->pos + len) < 0) |
| return -1; |
| } |
| |
| if (self->pos > self->string_size) { |
| /* In case of overseek, pad with null bytes the buffer region between |
| the end of stream and the current position. |
| |
| 0 lo string_size hi |
| | |<---used--->|<----------available----------->| |
| | | <--to pad-->|<---to write---> | |
| 0 buf positon |
| |
| */ |
| memset(self->buf + self->string_size, '\0', |
| (self->pos - self->string_size) * sizeof(Py_UNICODE)); |
| } |
| |
| /* Copy the data to the internal buffer, overwriting some of the |
| existing data if self->pos < self->string_size. */ |
| memcpy(self->buf + self->pos, str, len * sizeof(Py_UNICODE)); |
| self->pos += len; |
| |
| /* Set the new length of the internal string if it has changed */ |
| if (self->string_size < self->pos) { |
| self->string_size = self->pos; |
| } |
| |
| return len; |
| } |
| |
| static PyObject * |
| stringio_getvalue(StringIOObject *self) |
| { |
| return PyUnicode_FromUnicode(self->buf, self->string_size); |
| } |
| |
| static PyObject * |
| stringio_tell(StringIOObject *self) |
| { |
| return PyLong_FromSsize_t(self->pos); |
| } |
| |
| static PyObject * |
| stringio_read(StringIOObject *self, PyObject *args) |
| { |
| Py_ssize_t size, n; |
| Py_UNICODE *output; |
| PyObject *arg = Py_None; |
| |
| if (!PyArg_ParseTuple(args, "|O:read", &arg)) |
| return NULL; |
| |
| if (PyLong_Check(arg)) { |
| size = PyLong_AsSsize_t(arg); |
| } |
| else if (arg == Py_None) { |
| /* Read until EOF is reached, by default. */ |
| size = -1; |
| } |
| else { |
| PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'", |
| Py_TYPE(arg)->tp_name); |
| return NULL; |
| } |
| |
| /* adjust invalid sizes */ |
| n = self->string_size - self->pos; |
| if (size < 0 || size > n) { |
| size = n; |
| if (size < 0) |
| size = 0; |
| } |
| |
| assert(self->buf != NULL); |
| output = self->buf + self->pos; |
| self->pos += size; |
| |
| return PyUnicode_FromUnicode(output, size); |
| } |
| |
| static PyObject * |
| stringio_truncate(StringIOObject *self, PyObject *args) |
| { |
| Py_ssize_t size; |
| PyObject *arg = Py_None; |
| |
| if (!PyArg_ParseTuple(args, "|O:truncate", &arg)) |
| return NULL; |
| |
| if (PyLong_Check(arg)) { |
| size = PyLong_AsSsize_t(arg); |
| } |
| else if (arg == Py_None) { |
| /* Truncate to current position if no argument is passed. */ |
| size = self->pos; |
| } |
| else { |
| PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'", |
| Py_TYPE(arg)->tp_name); |
| return NULL; |
| } |
| |
| if (size < 0) { |
| PyErr_Format(PyExc_ValueError, |
| "Negative size value %zd", size); |
| return NULL; |
| } |
| |
| if (size < self->string_size) { |
| self->string_size = size; |
| if (resize_buffer(self, size) < 0) |
| return NULL; |
| } |
| self->pos = size; |
| |
| return PyLong_FromSsize_t(size); |
| } |
| |
| static PyObject * |
| stringio_seek(StringIOObject *self, PyObject *args) |
| { |
| Py_ssize_t pos; |
| int mode = 0; |
| |
| if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode)) |
| return NULL; |
| |
| if (mode != 0 && mode != 1 && mode != 2) { |
| PyErr_Format(PyExc_ValueError, |
| "Invalid whence (%i, should be 0, 1 or 2)", mode); |
| return NULL; |
| } |
| else if (pos < 0 && mode == 0) { |
| PyErr_Format(PyExc_ValueError, |
| "Negative seek position %zd", pos); |
| return NULL; |
| } |
| else if (mode != 0 && pos != 0) { |
| PyErr_SetString(PyExc_IOError, |
| "Can't do nonzero cur-relative seeks"); |
| return NULL; |
| } |
| |
| /* mode 0: offset relative to beginning of the string. |
| mode 1: no change to current position. |
| mode 2: change position to end of file. */ |
| if (mode == 1) { |
| pos = self->pos; |
| } |
| else if (mode == 2) { |
| pos = self->string_size; |
| } |
| |
| self->pos = pos; |
| |
| return PyLong_FromSsize_t(self->pos); |
| } |
| |
| static PyObject * |
| stringio_write(StringIOObject *self, PyObject *obj) |
| { |
| const Py_UNICODE *str; |
| Py_ssize_t size; |
| Py_ssize_t n = 0; |
| |
| if (PyUnicode_Check(obj)) { |
| str = PyUnicode_AsUnicode(obj); |
| size = PyUnicode_GetSize(obj); |
| } |
| else { |
| PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'", |
| Py_TYPE(obj)->tp_name); |
| return NULL; |
| } |
| |
| if (size != 0) { |
| n = write_str(self, str, size); |
| if (n < 0) |
| return NULL; |
| } |
| |
| return PyLong_FromSsize_t(n); |
| } |
| |
| static void |
| stringio_dealloc(StringIOObject *self) |
| { |
| PyMem_Free(self->buf); |
| Py_TYPE(self)->tp_free(self); |
| } |
| |
| static PyObject * |
| stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) |
| { |
| StringIOObject *self; |
| |
| assert(type != NULL && type->tp_alloc != NULL); |
| self = (StringIOObject *)type->tp_alloc(type, 0); |
| if (self == NULL) |
| return NULL; |
| |
| self->string_size = 0; |
| self->pos = 0; |
| self->buf_size = 0; |
| self->buf = (Py_UNICODE *)PyMem_Malloc(0); |
| if (self->buf == NULL) { |
| Py_DECREF(self); |
| return PyErr_NoMemory(); |
| } |
| |
| return (PyObject *)self; |
| } |
| |
| static struct PyMethodDef stringio_methods[] = { |
| {"getvalue", (PyCFunction)stringio_getvalue, METH_VARARGS, NULL}, |
| {"read", (PyCFunction)stringio_read, METH_VARARGS, NULL}, |
| {"tell", (PyCFunction)stringio_tell, METH_NOARGS, NULL}, |
| {"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, NULL}, |
| {"seek", (PyCFunction)stringio_seek, METH_VARARGS, NULL}, |
| {"write", (PyCFunction)stringio_write, METH_O, NULL}, |
| {NULL, NULL} /* sentinel */ |
| }; |
| |
| static PyTypeObject StringIO_Type = { |
| PyVarObject_HEAD_INIT(NULL, 0) |
| "_stringio._StringIO", /*tp_name*/ |
| sizeof(StringIOObject), /*tp_basicsize*/ |
| 0, /*tp_itemsize*/ |
| (destructor)stringio_dealloc, /*tp_dealloc*/ |
| 0, /*tp_print*/ |
| 0, /*tp_getattr*/ |
| 0, /*tp_setattr*/ |
| 0, /*tp_compare*/ |
| 0, /*tp_repr*/ |
| 0, /*tp_as_number*/ |
| 0, /*tp_as_sequence*/ |
| 0, /*tp_as_mapping*/ |
| 0, /*tp_hash*/ |
| 0, /*tp_call*/ |
| 0, /*tp_str*/ |
| 0, /*tp_getattro*/ |
| 0, /*tp_setattro*/ |
| 0, /*tp_as_buffer*/ |
| Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ |
| 0, /*tp_doc*/ |
| 0, /*tp_traverse*/ |
| 0, /*tp_clear*/ |
| 0, /*tp_richcompare*/ |
| 0, /*tp_weaklistoffset*/ |
| 0, /*tp_iter*/ |
| 0, /*tp_iternext*/ |
| stringio_methods, /*tp_methods*/ |
| 0, /*tp_members*/ |
| 0, /*tp_getset*/ |
| 0, /*tp_base*/ |
| 0, /*tp_dict*/ |
| 0, /*tp_descr_get*/ |
| 0, /*tp_descr_set*/ |
| 0, /*tp_dictoffset*/ |
| 0, /*tp_init*/ |
| 0, /*tp_alloc*/ |
| stringio_new, /*tp_new*/ |
| }; |
| |
| static struct PyModuleDef _stringiomodule = { |
| PyModuleDef_HEAD_INIT, |
| "_stringio", |
| NULL, |
| -1, |
| NULL, |
| NULL, |
| NULL, |
| NULL, |
| NULL |
| }; |
| |
| PyMODINIT_FUNC |
| PyInit__stringio(void) |
| { |
| PyObject *m; |
| |
| if (PyType_Ready(&StringIO_Type) < 0) |
| return NULL; |
| m = PyModule_Create(&_stringiomodule); |
| if (m == NULL) |
| return NULL; |
| Py_INCREF(&StringIO_Type); |
| if (PyModule_AddObject(m, "_StringIO", (PyObject *)&StringIO_Type) < 0) |
| return NULL; |
| return m; |
| } |