Alexandre Vassalotti | 794652d | 2008-06-11 22:58:36 +0000 | [diff] [blame] | 1 | #include "Python.h" |
| 2 | |
| 3 | /* This module is a stripped down version of _bytesio.c with a Py_UNICODE |
| 4 | buffer. Most of the functionality is provided by subclassing _StringIO. */ |
| 5 | |
| 6 | |
| 7 | typedef struct { |
| 8 | PyObject_HEAD |
| 9 | Py_UNICODE *buf; |
| 10 | Py_ssize_t pos; |
| 11 | Py_ssize_t string_size; |
| 12 | size_t buf_size; |
| 13 | } StringIOObject; |
| 14 | |
| 15 | |
| 16 | /* Internal routine for changing the size, in terms of characters, of the |
| 17 | buffer of StringIO objects. The caller should ensure that the 'size' |
| 18 | argument is non-negative. Returns 0 on success, -1 otherwise. */ |
| 19 | static int |
| 20 | resize_buffer(StringIOObject *self, size_t size) |
| 21 | { |
| 22 | /* Here, unsigned types are used to avoid dealing with signed integer |
| 23 | overflow, which is undefined in C. */ |
| 24 | size_t alloc = self->buf_size; |
| 25 | Py_UNICODE *new_buf = NULL; |
| 26 | |
| 27 | assert(self->buf != NULL); |
| 28 | |
| 29 | /* For simplicity, stay in the range of the signed type. Anyway, Python |
| 30 | doesn't allow strings to be longer than this. */ |
| 31 | if (size > PY_SSIZE_T_MAX) |
| 32 | goto overflow; |
| 33 | |
| 34 | if (size < alloc / 2) { |
| 35 | /* Major downsize; resize down to exact size. */ |
| 36 | alloc = size + 1; |
| 37 | } |
| 38 | else if (size < alloc) { |
| 39 | /* Within allocated size; quick exit */ |
| 40 | return 0; |
| 41 | } |
| 42 | else if (size <= alloc * 1.125) { |
| 43 | /* Moderate upsize; overallocate similar to list_resize() */ |
| 44 | alloc = size + (size >> 3) + (size < 9 ? 3 : 6); |
| 45 | } |
| 46 | else { |
| 47 | /* Major upsize; resize up to exact size */ |
| 48 | alloc = size + 1; |
| 49 | } |
| 50 | |
| 51 | if (alloc > ((size_t)-1) / sizeof(Py_UNICODE)) |
| 52 | goto overflow; |
| 53 | new_buf = (Py_UNICODE *)PyMem_Realloc(self->buf, |
| 54 | alloc * sizeof(Py_UNICODE)); |
| 55 | if (new_buf == NULL) { |
| 56 | PyErr_NoMemory(); |
| 57 | return -1; |
| 58 | } |
| 59 | self->buf_size = alloc; |
| 60 | self->buf = new_buf; |
| 61 | |
| 62 | return 0; |
| 63 | |
| 64 | overflow: |
| 65 | PyErr_SetString(PyExc_OverflowError, |
| 66 | "new buffer size too large"); |
| 67 | return -1; |
| 68 | } |
| 69 | |
| 70 | /* Internal routine for writing a string of characters to the buffer of a |
| 71 | StringIO object. Returns the number of bytes wrote, or -1 on error. */ |
| 72 | static Py_ssize_t |
| 73 | write_str(StringIOObject *self, const Py_UNICODE *str, Py_ssize_t len) |
| 74 | { |
| 75 | assert(self->buf != NULL); |
| 76 | assert(self->pos >= 0); |
| 77 | assert(len >= 0); |
| 78 | |
| 79 | /* This overflow check is not strictly necessary. However, it avoids us to |
| 80 | deal with funky things like comparing an unsigned and a signed |
| 81 | integer. */ |
| 82 | if (self->pos > PY_SSIZE_T_MAX - len) { |
| 83 | PyErr_SetString(PyExc_OverflowError, |
| 84 | "new position too large"); |
| 85 | return -1; |
| 86 | } |
| 87 | if (self->pos + len > self->string_size) { |
| 88 | if (resize_buffer(self, self->pos + len) < 0) |
| 89 | return -1; |
| 90 | } |
| 91 | |
| 92 | if (self->pos > self->string_size) { |
| 93 | /* In case of overseek, pad with null bytes the buffer region between |
| 94 | the end of stream and the current position. |
| 95 | |
| 96 | 0 lo string_size hi |
| 97 | | |<---used--->|<----------available----------->| |
| 98 | | | <--to pad-->|<---to write---> | |
| 99 | 0 buf positon |
| 100 | |
| 101 | */ |
| 102 | memset(self->buf + self->string_size, '\0', |
| 103 | (self->pos - self->string_size) * sizeof(Py_UNICODE)); |
| 104 | } |
| 105 | |
| 106 | /* Copy the data to the internal buffer, overwriting some of the |
| 107 | existing data if self->pos < self->string_size. */ |
| 108 | memcpy(self->buf + self->pos, str, len * sizeof(Py_UNICODE)); |
| 109 | self->pos += len; |
| 110 | |
| 111 | /* Set the new length of the internal string if it has changed */ |
| 112 | if (self->string_size < self->pos) { |
| 113 | self->string_size = self->pos; |
| 114 | } |
| 115 | |
| 116 | return len; |
| 117 | } |
| 118 | |
| 119 | static PyObject * |
| 120 | stringio_getvalue(StringIOObject *self) |
| 121 | { |
| 122 | return PyUnicode_FromUnicode(self->buf, self->string_size); |
| 123 | } |
| 124 | |
| 125 | static PyObject * |
| 126 | stringio_tell(StringIOObject *self) |
| 127 | { |
| 128 | return PyLong_FromSsize_t(self->pos); |
| 129 | } |
| 130 | |
| 131 | static PyObject * |
| 132 | stringio_read(StringIOObject *self, PyObject *args) |
| 133 | { |
| 134 | Py_ssize_t size, n; |
| 135 | Py_UNICODE *output; |
| 136 | PyObject *arg = Py_None; |
| 137 | |
| 138 | if (!PyArg_ParseTuple(args, "|O:read", &arg)) |
| 139 | return NULL; |
| 140 | |
| 141 | if (PyLong_Check(arg)) { |
| 142 | size = PyLong_AsSsize_t(arg); |
Amaury Forgeot d'Arc | 58fb905 | 2008-09-30 20:22:44 +0000 | [diff] [blame] | 143 | if (size == -1 && PyErr_Occurred()) |
| 144 | return NULL; |
Alexandre Vassalotti | 794652d | 2008-06-11 22:58:36 +0000 | [diff] [blame] | 145 | } |
| 146 | else if (arg == Py_None) { |
| 147 | /* Read until EOF is reached, by default. */ |
| 148 | size = -1; |
| 149 | } |
| 150 | else { |
| 151 | PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'", |
| 152 | Py_TYPE(arg)->tp_name); |
| 153 | return NULL; |
| 154 | } |
| 155 | |
| 156 | /* adjust invalid sizes */ |
| 157 | n = self->string_size - self->pos; |
| 158 | if (size < 0 || size > n) { |
| 159 | size = n; |
| 160 | if (size < 0) |
| 161 | size = 0; |
| 162 | } |
| 163 | |
| 164 | assert(self->buf != NULL); |
| 165 | output = self->buf + self->pos; |
| 166 | self->pos += size; |
| 167 | |
| 168 | return PyUnicode_FromUnicode(output, size); |
| 169 | } |
| 170 | |
| 171 | static PyObject * |
| 172 | stringio_truncate(StringIOObject *self, PyObject *args) |
| 173 | { |
| 174 | Py_ssize_t size; |
| 175 | PyObject *arg = Py_None; |
| 176 | |
| 177 | if (!PyArg_ParseTuple(args, "|O:truncate", &arg)) |
| 178 | return NULL; |
| 179 | |
| 180 | if (PyLong_Check(arg)) { |
| 181 | size = PyLong_AsSsize_t(arg); |
Benjamin Peterson | c9e435e | 2008-09-30 02:22:04 +0000 | [diff] [blame] | 182 | if (size == -1 && PyErr_Occurred()) |
| 183 | return NULL; |
Alexandre Vassalotti | 794652d | 2008-06-11 22:58:36 +0000 | [diff] [blame] | 184 | } |
| 185 | else if (arg == Py_None) { |
| 186 | /* Truncate to current position if no argument is passed. */ |
| 187 | size = self->pos; |
| 188 | } |
| 189 | else { |
| 190 | PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'", |
| 191 | Py_TYPE(arg)->tp_name); |
| 192 | return NULL; |
| 193 | } |
| 194 | |
| 195 | if (size < 0) { |
| 196 | PyErr_Format(PyExc_ValueError, |
| 197 | "Negative size value %zd", size); |
| 198 | return NULL; |
| 199 | } |
| 200 | |
| 201 | if (size < self->string_size) { |
| 202 | self->string_size = size; |
| 203 | if (resize_buffer(self, size) < 0) |
| 204 | return NULL; |
| 205 | } |
| 206 | self->pos = size; |
| 207 | |
| 208 | return PyLong_FromSsize_t(size); |
| 209 | } |
| 210 | |
| 211 | static PyObject * |
| 212 | stringio_seek(StringIOObject *self, PyObject *args) |
| 213 | { |
| 214 | Py_ssize_t pos; |
| 215 | int mode = 0; |
| 216 | |
| 217 | if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode)) |
| 218 | return NULL; |
| 219 | |
| 220 | if (mode != 0 && mode != 1 && mode != 2) { |
| 221 | PyErr_Format(PyExc_ValueError, |
| 222 | "Invalid whence (%i, should be 0, 1 or 2)", mode); |
| 223 | return NULL; |
| 224 | } |
| 225 | else if (pos < 0 && mode == 0) { |
| 226 | PyErr_Format(PyExc_ValueError, |
| 227 | "Negative seek position %zd", pos); |
| 228 | return NULL; |
| 229 | } |
| 230 | else if (mode != 0 && pos != 0) { |
| 231 | PyErr_SetString(PyExc_IOError, |
| 232 | "Can't do nonzero cur-relative seeks"); |
| 233 | return NULL; |
| 234 | } |
| 235 | |
| 236 | /* mode 0: offset relative to beginning of the string. |
| 237 | mode 1: no change to current position. |
| 238 | mode 2: change position to end of file. */ |
| 239 | if (mode == 1) { |
| 240 | pos = self->pos; |
| 241 | } |
| 242 | else if (mode == 2) { |
| 243 | pos = self->string_size; |
| 244 | } |
| 245 | |
| 246 | self->pos = pos; |
| 247 | |
| 248 | return PyLong_FromSsize_t(self->pos); |
| 249 | } |
| 250 | |
| 251 | static PyObject * |
| 252 | stringio_write(StringIOObject *self, PyObject *obj) |
| 253 | { |
| 254 | const Py_UNICODE *str; |
| 255 | Py_ssize_t size; |
| 256 | Py_ssize_t n = 0; |
| 257 | |
| 258 | if (PyUnicode_Check(obj)) { |
| 259 | str = PyUnicode_AsUnicode(obj); |
| 260 | size = PyUnicode_GetSize(obj); |
| 261 | } |
| 262 | else { |
| 263 | PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'", |
| 264 | Py_TYPE(obj)->tp_name); |
| 265 | return NULL; |
| 266 | } |
| 267 | |
| 268 | if (size != 0) { |
| 269 | n = write_str(self, str, size); |
| 270 | if (n < 0) |
| 271 | return NULL; |
| 272 | } |
| 273 | |
| 274 | return PyLong_FromSsize_t(n); |
| 275 | } |
| 276 | |
| 277 | static void |
| 278 | stringio_dealloc(StringIOObject *self) |
| 279 | { |
| 280 | PyMem_Free(self->buf); |
| 281 | Py_TYPE(self)->tp_free(self); |
| 282 | } |
| 283 | |
| 284 | static PyObject * |
| 285 | stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) |
| 286 | { |
| 287 | StringIOObject *self; |
| 288 | |
| 289 | assert(type != NULL && type->tp_alloc != NULL); |
| 290 | self = (StringIOObject *)type->tp_alloc(type, 0); |
| 291 | if (self == NULL) |
| 292 | return NULL; |
| 293 | |
| 294 | self->string_size = 0; |
| 295 | self->pos = 0; |
| 296 | self->buf_size = 0; |
| 297 | self->buf = (Py_UNICODE *)PyMem_Malloc(0); |
| 298 | if (self->buf == NULL) { |
| 299 | Py_DECREF(self); |
| 300 | return PyErr_NoMemory(); |
| 301 | } |
| 302 | |
| 303 | return (PyObject *)self; |
| 304 | } |
| 305 | |
| 306 | static struct PyMethodDef stringio_methods[] = { |
| 307 | {"getvalue", (PyCFunction)stringio_getvalue, METH_VARARGS, NULL}, |
| 308 | {"read", (PyCFunction)stringio_read, METH_VARARGS, NULL}, |
| 309 | {"tell", (PyCFunction)stringio_tell, METH_NOARGS, NULL}, |
| 310 | {"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, NULL}, |
| 311 | {"seek", (PyCFunction)stringio_seek, METH_VARARGS, NULL}, |
| 312 | {"write", (PyCFunction)stringio_write, METH_O, NULL}, |
| 313 | {NULL, NULL} /* sentinel */ |
| 314 | }; |
| 315 | |
| 316 | static PyTypeObject StringIO_Type = { |
| 317 | PyVarObject_HEAD_INIT(NULL, 0) |
| 318 | "_stringio._StringIO", /*tp_name*/ |
| 319 | sizeof(StringIOObject), /*tp_basicsize*/ |
| 320 | 0, /*tp_itemsize*/ |
| 321 | (destructor)stringio_dealloc, /*tp_dealloc*/ |
| 322 | 0, /*tp_print*/ |
| 323 | 0, /*tp_getattr*/ |
| 324 | 0, /*tp_setattr*/ |
| 325 | 0, /*tp_compare*/ |
| 326 | 0, /*tp_repr*/ |
| 327 | 0, /*tp_as_number*/ |
| 328 | 0, /*tp_as_sequence*/ |
| 329 | 0, /*tp_as_mapping*/ |
| 330 | 0, /*tp_hash*/ |
| 331 | 0, /*tp_call*/ |
| 332 | 0, /*tp_str*/ |
| 333 | 0, /*tp_getattro*/ |
| 334 | 0, /*tp_setattro*/ |
| 335 | 0, /*tp_as_buffer*/ |
| 336 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ |
| 337 | 0, /*tp_doc*/ |
| 338 | 0, /*tp_traverse*/ |
| 339 | 0, /*tp_clear*/ |
| 340 | 0, /*tp_richcompare*/ |
| 341 | 0, /*tp_weaklistoffset*/ |
| 342 | 0, /*tp_iter*/ |
| 343 | 0, /*tp_iternext*/ |
| 344 | stringio_methods, /*tp_methods*/ |
| 345 | 0, /*tp_members*/ |
| 346 | 0, /*tp_getset*/ |
| 347 | 0, /*tp_base*/ |
| 348 | 0, /*tp_dict*/ |
| 349 | 0, /*tp_descr_get*/ |
| 350 | 0, /*tp_descr_set*/ |
| 351 | 0, /*tp_dictoffset*/ |
| 352 | 0, /*tp_init*/ |
| 353 | 0, /*tp_alloc*/ |
| 354 | stringio_new, /*tp_new*/ |
| 355 | }; |
| 356 | |
| 357 | static struct PyModuleDef _stringiomodule = { |
| 358 | PyModuleDef_HEAD_INIT, |
| 359 | "_stringio", |
| 360 | NULL, |
| 361 | -1, |
| 362 | NULL, |
| 363 | NULL, |
| 364 | NULL, |
| 365 | NULL, |
| 366 | NULL |
| 367 | }; |
| 368 | |
| 369 | PyMODINIT_FUNC |
| 370 | PyInit__stringio(void) |
| 371 | { |
| 372 | PyObject *m; |
| 373 | |
| 374 | if (PyType_Ready(&StringIO_Type) < 0) |
| 375 | return NULL; |
| 376 | m = PyModule_Create(&_stringiomodule); |
| 377 | if (m == NULL) |
| 378 | return NULL; |
| 379 | Py_INCREF(&StringIO_Type); |
| 380 | if (PyModule_AddObject(m, "_StringIO", (PyObject *)&StringIO_Type) < 0) |
| 381 | return NULL; |
| 382 | return m; |
| 383 | } |