blob: e627258b1ce9cead7615979b0a62b4c003498048 [file] [log] [blame]
Alexandre Vassalotti794652d2008-06-11 22:58:36 +00001#include "Python.h"
2
3/* This module is a stripped down version of _bytesio.c with a Py_UNICODE
4 buffer. Most of the functionality is provided by subclassing _StringIO. */
5
6
7typedef struct {
8 PyObject_HEAD
9 Py_UNICODE *buf;
10 Py_ssize_t pos;
11 Py_ssize_t string_size;
12 size_t buf_size;
13} StringIOObject;
14
15
16/* Internal routine for changing the size, in terms of characters, of the
17 buffer of StringIO objects. The caller should ensure that the 'size'
18 argument is non-negative. Returns 0 on success, -1 otherwise. */
19static int
20resize_buffer(StringIOObject *self, size_t size)
21{
22 /* Here, unsigned types are used to avoid dealing with signed integer
23 overflow, which is undefined in C. */
24 size_t alloc = self->buf_size;
25 Py_UNICODE *new_buf = NULL;
26
27 assert(self->buf != NULL);
28
29 /* For simplicity, stay in the range of the signed type. Anyway, Python
30 doesn't allow strings to be longer than this. */
31 if (size > PY_SSIZE_T_MAX)
32 goto overflow;
33
34 if (size < alloc / 2) {
35 /* Major downsize; resize down to exact size. */
36 alloc = size + 1;
37 }
38 else if (size < alloc) {
39 /* Within allocated size; quick exit */
40 return 0;
41 }
42 else if (size <= alloc * 1.125) {
43 /* Moderate upsize; overallocate similar to list_resize() */
44 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
45 }
46 else {
47 /* Major upsize; resize up to exact size */
48 alloc = size + 1;
49 }
50
51 if (alloc > ((size_t)-1) / sizeof(Py_UNICODE))
52 goto overflow;
53 new_buf = (Py_UNICODE *)PyMem_Realloc(self->buf,
54 alloc * sizeof(Py_UNICODE));
55 if (new_buf == NULL) {
56 PyErr_NoMemory();
57 return -1;
58 }
59 self->buf_size = alloc;
60 self->buf = new_buf;
61
62 return 0;
63
64 overflow:
65 PyErr_SetString(PyExc_OverflowError,
66 "new buffer size too large");
67 return -1;
68}
69
70/* Internal routine for writing a string of characters to the buffer of a
71 StringIO object. Returns the number of bytes wrote, or -1 on error. */
72static Py_ssize_t
73write_str(StringIOObject *self, const Py_UNICODE *str, Py_ssize_t len)
74{
75 assert(self->buf != NULL);
76 assert(self->pos >= 0);
77 assert(len >= 0);
78
79 /* This overflow check is not strictly necessary. However, it avoids us to
80 deal with funky things like comparing an unsigned and a signed
81 integer. */
82 if (self->pos > PY_SSIZE_T_MAX - len) {
83 PyErr_SetString(PyExc_OverflowError,
84 "new position too large");
85 return -1;
86 }
87 if (self->pos + len > self->string_size) {
88 if (resize_buffer(self, self->pos + len) < 0)
89 return -1;
90 }
91
92 if (self->pos > self->string_size) {
93 /* In case of overseek, pad with null bytes the buffer region between
94 the end of stream and the current position.
95
96 0 lo string_size hi
97 | |<---used--->|<----------available----------->|
98 | | <--to pad-->|<---to write---> |
99 0 buf positon
100
101 */
102 memset(self->buf + self->string_size, '\0',
103 (self->pos - self->string_size) * sizeof(Py_UNICODE));
104 }
105
106 /* Copy the data to the internal buffer, overwriting some of the
107 existing data if self->pos < self->string_size. */
108 memcpy(self->buf + self->pos, str, len * sizeof(Py_UNICODE));
109 self->pos += len;
110
111 /* Set the new length of the internal string if it has changed */
112 if (self->string_size < self->pos) {
113 self->string_size = self->pos;
114 }
115
116 return len;
117}
118
119static PyObject *
120stringio_getvalue(StringIOObject *self)
121{
122 return PyUnicode_FromUnicode(self->buf, self->string_size);
123}
124
125static PyObject *
126stringio_tell(StringIOObject *self)
127{
128 return PyLong_FromSsize_t(self->pos);
129}
130
131static PyObject *
132stringio_read(StringIOObject *self, PyObject *args)
133{
134 Py_ssize_t size, n;
135 Py_UNICODE *output;
136 PyObject *arg = Py_None;
137
138 if (!PyArg_ParseTuple(args, "|O:read", &arg))
139 return NULL;
140
141 if (PyLong_Check(arg)) {
142 size = PyLong_AsSsize_t(arg);
Amaury Forgeot d'Arc58fb9052008-09-30 20:22:44 +0000143 if (size == -1 && PyErr_Occurred())
144 return NULL;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000145 }
146 else if (arg == Py_None) {
147 /* Read until EOF is reached, by default. */
148 size = -1;
149 }
150 else {
151 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
152 Py_TYPE(arg)->tp_name);
153 return NULL;
154 }
155
156 /* adjust invalid sizes */
157 n = self->string_size - self->pos;
158 if (size < 0 || size > n) {
159 size = n;
160 if (size < 0)
161 size = 0;
162 }
163
164 assert(self->buf != NULL);
165 output = self->buf + self->pos;
166 self->pos += size;
167
168 return PyUnicode_FromUnicode(output, size);
169}
170
171static PyObject *
172stringio_truncate(StringIOObject *self, PyObject *args)
173{
174 Py_ssize_t size;
175 PyObject *arg = Py_None;
176
177 if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
178 return NULL;
179
180 if (PyLong_Check(arg)) {
181 size = PyLong_AsSsize_t(arg);
Benjamin Petersonc9e435e2008-09-30 02:22:04 +0000182 if (size == -1 && PyErr_Occurred())
183 return NULL;
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000184 }
185 else if (arg == Py_None) {
186 /* Truncate to current position if no argument is passed. */
187 size = self->pos;
188 }
189 else {
190 PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
191 Py_TYPE(arg)->tp_name);
192 return NULL;
193 }
194
195 if (size < 0) {
196 PyErr_Format(PyExc_ValueError,
197 "Negative size value %zd", size);
198 return NULL;
199 }
200
201 if (size < self->string_size) {
202 self->string_size = size;
203 if (resize_buffer(self, size) < 0)
204 return NULL;
205 }
206 self->pos = size;
207
208 return PyLong_FromSsize_t(size);
209}
210
211static PyObject *
212stringio_seek(StringIOObject *self, PyObject *args)
213{
214 Py_ssize_t pos;
215 int mode = 0;
216
217 if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode))
218 return NULL;
219
220 if (mode != 0 && mode != 1 && mode != 2) {
221 PyErr_Format(PyExc_ValueError,
222 "Invalid whence (%i, should be 0, 1 or 2)", mode);
223 return NULL;
224 }
225 else if (pos < 0 && mode == 0) {
226 PyErr_Format(PyExc_ValueError,
227 "Negative seek position %zd", pos);
228 return NULL;
229 }
230 else if (mode != 0 && pos != 0) {
231 PyErr_SetString(PyExc_IOError,
232 "Can't do nonzero cur-relative seeks");
233 return NULL;
234 }
235
236 /* mode 0: offset relative to beginning of the string.
237 mode 1: no change to current position.
238 mode 2: change position to end of file. */
239 if (mode == 1) {
240 pos = self->pos;
241 }
242 else if (mode == 2) {
243 pos = self->string_size;
244 }
245
246 self->pos = pos;
247
248 return PyLong_FromSsize_t(self->pos);
249}
250
251static PyObject *
252stringio_write(StringIOObject *self, PyObject *obj)
253{
254 const Py_UNICODE *str;
255 Py_ssize_t size;
256 Py_ssize_t n = 0;
257
258 if (PyUnicode_Check(obj)) {
259 str = PyUnicode_AsUnicode(obj);
260 size = PyUnicode_GetSize(obj);
261 }
262 else {
263 PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'",
264 Py_TYPE(obj)->tp_name);
265 return NULL;
266 }
267
268 if (size != 0) {
269 n = write_str(self, str, size);
270 if (n < 0)
271 return NULL;
272 }
273
274 return PyLong_FromSsize_t(n);
275}
276
277static void
278stringio_dealloc(StringIOObject *self)
279{
280 PyMem_Free(self->buf);
281 Py_TYPE(self)->tp_free(self);
282}
283
284static PyObject *
285stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
286{
287 StringIOObject *self;
288
289 assert(type != NULL && type->tp_alloc != NULL);
290 self = (StringIOObject *)type->tp_alloc(type, 0);
291 if (self == NULL)
292 return NULL;
293
294 self->string_size = 0;
295 self->pos = 0;
296 self->buf_size = 0;
297 self->buf = (Py_UNICODE *)PyMem_Malloc(0);
298 if (self->buf == NULL) {
299 Py_DECREF(self);
300 return PyErr_NoMemory();
301 }
302
303 return (PyObject *)self;
304}
305
306static struct PyMethodDef stringio_methods[] = {
307 {"getvalue", (PyCFunction)stringio_getvalue, METH_VARARGS, NULL},
308 {"read", (PyCFunction)stringio_read, METH_VARARGS, NULL},
309 {"tell", (PyCFunction)stringio_tell, METH_NOARGS, NULL},
310 {"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, NULL},
311 {"seek", (PyCFunction)stringio_seek, METH_VARARGS, NULL},
312 {"write", (PyCFunction)stringio_write, METH_O, NULL},
313 {NULL, NULL} /* sentinel */
314};
315
316static PyTypeObject StringIO_Type = {
317 PyVarObject_HEAD_INIT(NULL, 0)
318 "_stringio._StringIO", /*tp_name*/
319 sizeof(StringIOObject), /*tp_basicsize*/
320 0, /*tp_itemsize*/
321 (destructor)stringio_dealloc, /*tp_dealloc*/
322 0, /*tp_print*/
323 0, /*tp_getattr*/
324 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +0000325 0, /*tp_reserved*/
Alexandre Vassalotti794652d2008-06-11 22:58:36 +0000326 0, /*tp_repr*/
327 0, /*tp_as_number*/
328 0, /*tp_as_sequence*/
329 0, /*tp_as_mapping*/
330 0, /*tp_hash*/
331 0, /*tp_call*/
332 0, /*tp_str*/
333 0, /*tp_getattro*/
334 0, /*tp_setattro*/
335 0, /*tp_as_buffer*/
336 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
337 0, /*tp_doc*/
338 0, /*tp_traverse*/
339 0, /*tp_clear*/
340 0, /*tp_richcompare*/
341 0, /*tp_weaklistoffset*/
342 0, /*tp_iter*/
343 0, /*tp_iternext*/
344 stringio_methods, /*tp_methods*/
345 0, /*tp_members*/
346 0, /*tp_getset*/
347 0, /*tp_base*/
348 0, /*tp_dict*/
349 0, /*tp_descr_get*/
350 0, /*tp_descr_set*/
351 0, /*tp_dictoffset*/
352 0, /*tp_init*/
353 0, /*tp_alloc*/
354 stringio_new, /*tp_new*/
355};
356
357static struct PyModuleDef _stringiomodule = {
358 PyModuleDef_HEAD_INIT,
359 "_stringio",
360 NULL,
361 -1,
362 NULL,
363 NULL,
364 NULL,
365 NULL,
366 NULL
367};
368
369PyMODINIT_FUNC
370PyInit__stringio(void)
371{
372 PyObject *m;
373
374 if (PyType_Ready(&StringIO_Type) < 0)
375 return NULL;
376 m = PyModule_Create(&_stringiomodule);
377 if (m == NULL)
378 return NULL;
379 Py_INCREF(&StringIO_Type);
380 if (PyModule_AddObject(m, "_StringIO", (PyObject *)&StringIO_Type) < 0)
381 return NULL;
382 return m;
383}