blob: 25fec6d9be8ed881f2df7e9f14465cd47f2fdb35 [file] [log] [blame]
Christian Heimes1a6387e2008-03-26 12:49:49 +00001/* PyBytes (bytearray) implementation */
2
3#define PY_SSIZE_T_CLEAN
4#include "Python.h"
5#include "structmember.h"
6#include "bytes_methods.h"
7
Christian Heimes3497f942008-05-26 12:29:14 +00008static PyByteArrayObject *nullbytes = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00009
10void
Christian Heimes3497f942008-05-26 12:29:14 +000011PyByteArray_Fini(void)
Christian Heimes1a6387e2008-03-26 12:49:49 +000012{
13 Py_CLEAR(nullbytes);
14}
15
16int
Christian Heimes3497f942008-05-26 12:29:14 +000017PyByteArray_Init(void)
Christian Heimes1a6387e2008-03-26 12:49:49 +000018{
Christian Heimes3497f942008-05-26 12:29:14 +000019 nullbytes = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
Christian Heimes1a6387e2008-03-26 12:49:49 +000020 if (nullbytes == NULL)
21 return 0;
22 nullbytes->ob_bytes = NULL;
23 Py_SIZE(nullbytes) = nullbytes->ob_alloc = 0;
24 nullbytes->ob_exports = 0;
25 return 1;
26}
27
28/* end nullbytes support */
29
30/* Helpers */
31
32static int
33_getbytevalue(PyObject* arg, int *value)
34{
35 long face_value;
36
37 if (PyInt_Check(arg)) {
38 face_value = PyInt_AsLong(arg);
39 if (face_value < 0 || face_value >= 256) {
40 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
41 return 0;
42 }
43 }
44 else if (PyString_CheckExact(arg)) {
45 if (Py_SIZE(arg) != 1) {
46 PyErr_SetString(PyExc_ValueError, "string must be of size 1");
47 return 0;
48 }
49 face_value = Py_CHARMASK(((PyStringObject*)arg)->ob_sval[0]);
50 }
51 else {
52 PyErr_Format(PyExc_TypeError, "an integer or string of size 1 is required");
53 return 0;
54 }
55
56 *value = face_value;
57 return 1;
58}
59
60static Py_ssize_t
Christian Heimes3497f942008-05-26 12:29:14 +000061bytes_buffer_getreadbuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes1a6387e2008-03-26 12:49:49 +000062{
63 if ( index != 0 ) {
64 PyErr_SetString(PyExc_SystemError,
65 "accessing non-existent bytes segment");
66 return -1;
67 }
68 *ptr = (void *)self->ob_bytes;
69 return Py_SIZE(self);
70}
71
72static Py_ssize_t
Christian Heimes3497f942008-05-26 12:29:14 +000073bytes_buffer_getwritebuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes1a6387e2008-03-26 12:49:49 +000074{
75 if ( index != 0 ) {
76 PyErr_SetString(PyExc_SystemError,
77 "accessing non-existent bytes segment");
78 return -1;
79 }
80 *ptr = (void *)self->ob_bytes;
81 return Py_SIZE(self);
82}
83
84static Py_ssize_t
Christian Heimes3497f942008-05-26 12:29:14 +000085bytes_buffer_getsegcount(PyByteArrayObject *self, Py_ssize_t *lenp)
Christian Heimes1a6387e2008-03-26 12:49:49 +000086{
87 if ( lenp )
88 *lenp = Py_SIZE(self);
89 return 1;
90}
91
92static Py_ssize_t
Christian Heimes3497f942008-05-26 12:29:14 +000093bytes_buffer_getcharbuf(PyByteArrayObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes1a6387e2008-03-26 12:49:49 +000094{
95 if ( index != 0 ) {
96 PyErr_SetString(PyExc_SystemError,
97 "accessing non-existent bytes segment");
98 return -1;
99 }
100 *ptr = self->ob_bytes;
101 return Py_SIZE(self);
102}
103
104static int
Christian Heimes3497f942008-05-26 12:29:14 +0000105bytes_getbuffer(PyByteArrayObject *obj, Py_buffer *view, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000106{
107 int ret;
108 void *ptr;
109 if (view == NULL) {
110 obj->ob_exports++;
111 return 0;
112 }
113 if (obj->ob_bytes == NULL)
114 ptr = "";
115 else
116 ptr = obj->ob_bytes;
117 ret = PyBuffer_FillInfo(view, ptr, Py_SIZE(obj), 0, flags);
118 if (ret >= 0) {
119 obj->ob_exports++;
120 }
121 return ret;
122}
123
124static void
Christian Heimes3497f942008-05-26 12:29:14 +0000125bytes_releasebuffer(PyByteArrayObject *obj, Py_buffer *view)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000126{
127 obj->ob_exports--;
128}
129
130static Py_ssize_t
131_getbuffer(PyObject *obj, Py_buffer *view)
132{
133 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
134
135 if (buffer == NULL || buffer->bf_getbuffer == NULL)
136 {
137 PyErr_Format(PyExc_TypeError,
138 "Type %.100s doesn't support the buffer API",
139 Py_TYPE(obj)->tp_name);
140 return -1;
141 }
142
143 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
144 return -1;
145 return view->len;
146}
147
148/* Direct API functions */
149
150PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +0000151PyByteArray_FromObject(PyObject *input)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000152{
Christian Heimes3497f942008-05-26 12:29:14 +0000153 return PyObject_CallFunctionObjArgs((PyObject *)&PyByteArray_Type,
Christian Heimes1a6387e2008-03-26 12:49:49 +0000154 input, NULL);
155}
156
157PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +0000158PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000159{
Christian Heimes3497f942008-05-26 12:29:14 +0000160 PyByteArrayObject *new;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000161 Py_ssize_t alloc;
162
Gregory P. Smithc00eb732008-04-09 23:16:37 +0000163 if (size < 0) {
164 PyErr_SetString(PyExc_SystemError,
Christian Heimes3497f942008-05-26 12:29:14 +0000165 "Negative size passed to PyByteArray_FromStringAndSize");
Gregory P. Smithc00eb732008-04-09 23:16:37 +0000166 return NULL;
167 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000168
Christian Heimes3497f942008-05-26 12:29:14 +0000169 new = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000170 if (new == NULL)
171 return NULL;
172
173 if (size == 0) {
174 new->ob_bytes = NULL;
175 alloc = 0;
176 }
177 else {
178 alloc = size + 1;
179 new->ob_bytes = PyMem_Malloc(alloc);
180 if (new->ob_bytes == NULL) {
181 Py_DECREF(new);
182 return PyErr_NoMemory();
183 }
184 if (bytes != NULL)
185 memcpy(new->ob_bytes, bytes, size);
186 new->ob_bytes[size] = '\0'; /* Trailing null byte */
187 }
188 Py_SIZE(new) = size;
189 new->ob_alloc = alloc;
190 new->ob_exports = 0;
191
192 return (PyObject *)new;
193}
194
195Py_ssize_t
Christian Heimes3497f942008-05-26 12:29:14 +0000196PyByteArray_Size(PyObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000197{
198 assert(self != NULL);
Christian Heimes3497f942008-05-26 12:29:14 +0000199 assert(PyByteArray_Check(self));
Christian Heimes1a6387e2008-03-26 12:49:49 +0000200
Christian Heimes3497f942008-05-26 12:29:14 +0000201 return PyByteArray_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000202}
203
204char *
Christian Heimes3497f942008-05-26 12:29:14 +0000205PyByteArray_AsString(PyObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000206{
207 assert(self != NULL);
Christian Heimes3497f942008-05-26 12:29:14 +0000208 assert(PyByteArray_Check(self));
Christian Heimes1a6387e2008-03-26 12:49:49 +0000209
Christian Heimes3497f942008-05-26 12:29:14 +0000210 return PyByteArray_AS_STRING(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000211}
212
213int
Christian Heimes3497f942008-05-26 12:29:14 +0000214PyByteArray_Resize(PyObject *self, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000215{
216 void *sval;
Christian Heimes3497f942008-05-26 12:29:14 +0000217 Py_ssize_t alloc = ((PyByteArrayObject *)self)->ob_alloc;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000218
219 assert(self != NULL);
Christian Heimes3497f942008-05-26 12:29:14 +0000220 assert(PyByteArray_Check(self));
Christian Heimes1a6387e2008-03-26 12:49:49 +0000221 assert(size >= 0);
222
223 if (size < alloc / 2) {
224 /* Major downsize; resize down to exact size */
225 alloc = size + 1;
226 }
227 else if (size < alloc) {
228 /* Within allocated size; quick exit */
229 Py_SIZE(self) = size;
Christian Heimes3497f942008-05-26 12:29:14 +0000230 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
Christian Heimes1a6387e2008-03-26 12:49:49 +0000231 return 0;
232 }
233 else if (size <= alloc * 1.125) {
234 /* Moderate upsize; overallocate similar to list_resize() */
235 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
236 }
237 else {
238 /* Major upsize; resize up to exact size */
239 alloc = size + 1;
240 }
241
Christian Heimes3497f942008-05-26 12:29:14 +0000242 if (((PyByteArrayObject *)self)->ob_exports > 0) {
Christian Heimes1a6387e2008-03-26 12:49:49 +0000243 /*
Christian Heimes3497f942008-05-26 12:29:14 +0000244 fprintf(stderr, "%d: %s", ((PyByteArrayObject *)self)->ob_exports,
245 ((PyByteArrayObject *)self)->ob_bytes);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000246 */
247 PyErr_SetString(PyExc_BufferError,
248 "Existing exports of data: object cannot be re-sized");
249 return -1;
250 }
251
Christian Heimes3497f942008-05-26 12:29:14 +0000252 sval = PyMem_Realloc(((PyByteArrayObject *)self)->ob_bytes, alloc);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000253 if (sval == NULL) {
254 PyErr_NoMemory();
255 return -1;
256 }
257
Christian Heimes3497f942008-05-26 12:29:14 +0000258 ((PyByteArrayObject *)self)->ob_bytes = sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000259 Py_SIZE(self) = size;
Christian Heimes3497f942008-05-26 12:29:14 +0000260 ((PyByteArrayObject *)self)->ob_alloc = alloc;
261 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
Christian Heimes1a6387e2008-03-26 12:49:49 +0000262
263 return 0;
264}
265
266PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +0000267PyByteArray_Concat(PyObject *a, PyObject *b)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000268{
269 Py_ssize_t size;
270 Py_buffer va, vb;
Christian Heimes3497f942008-05-26 12:29:14 +0000271 PyByteArrayObject *result = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000272
273 va.len = -1;
274 vb.len = -1;
275 if (_getbuffer(a, &va) < 0 ||
276 _getbuffer(b, &vb) < 0) {
277 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
278 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
279 goto done;
280 }
281
282 size = va.len + vb.len;
283 if (size < 0) {
284 return PyErr_NoMemory();
285 goto done;
286 }
287
Christian Heimes3497f942008-05-26 12:29:14 +0000288 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, size);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000289 if (result != NULL) {
290 memcpy(result->ob_bytes, va.buf, va.len);
291 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
292 }
293
294 done:
295 if (va.len != -1)
296 PyObject_ReleaseBuffer(a, &va);
297 if (vb.len != -1)
298 PyObject_ReleaseBuffer(b, &vb);
299 return (PyObject *)result;
300}
301
302/* Functions stuffed into the type object */
303
304static Py_ssize_t
Christian Heimes3497f942008-05-26 12:29:14 +0000305bytes_length(PyByteArrayObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000306{
307 return Py_SIZE(self);
308}
309
310static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +0000311bytes_iconcat(PyByteArrayObject *self, PyObject *other)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000312{
313 Py_ssize_t mysize;
314 Py_ssize_t size;
315 Py_buffer vo;
316
317 if (_getbuffer(other, &vo) < 0) {
Alexandre Vassalotti1aed6242008-05-09 21:49:43 +0000318 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
319 Py_TYPE(other)->tp_name, Py_TYPE(self)->tp_name);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000320 return NULL;
321 }
322
323 mysize = Py_SIZE(self);
324 size = mysize + vo.len;
325 if (size < 0) {
326 PyObject_ReleaseBuffer(other, &vo);
327 return PyErr_NoMemory();
328 }
329 if (size < self->ob_alloc) {
330 Py_SIZE(self) = size;
331 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
332 }
Christian Heimes3497f942008-05-26 12:29:14 +0000333 else if (PyByteArray_Resize((PyObject *)self, size) < 0) {
Christian Heimes1a6387e2008-03-26 12:49:49 +0000334 PyObject_ReleaseBuffer(other, &vo);
335 return NULL;
336 }
337 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
338 PyObject_ReleaseBuffer(other, &vo);
339 Py_INCREF(self);
340 return (PyObject *)self;
341}
342
343static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +0000344bytes_repeat(PyByteArrayObject *self, Py_ssize_t count)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000345{
Christian Heimes3497f942008-05-26 12:29:14 +0000346 PyByteArrayObject *result;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000347 Py_ssize_t mysize;
348 Py_ssize_t size;
349
350 if (count < 0)
351 count = 0;
352 mysize = Py_SIZE(self);
353 size = mysize * count;
354 if (count != 0 && size / count != mysize)
355 return PyErr_NoMemory();
Christian Heimes3497f942008-05-26 12:29:14 +0000356 result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000357 if (result != NULL && size != 0) {
358 if (mysize == 1)
359 memset(result->ob_bytes, self->ob_bytes[0], size);
360 else {
361 Py_ssize_t i;
362 for (i = 0; i < count; i++)
363 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
364 }
365 }
366 return (PyObject *)result;
367}
368
369static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +0000370bytes_irepeat(PyByteArrayObject *self, Py_ssize_t count)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000371{
372 Py_ssize_t mysize;
373 Py_ssize_t size;
374
375 if (count < 0)
376 count = 0;
377 mysize = Py_SIZE(self);
378 size = mysize * count;
379 if (count != 0 && size / count != mysize)
380 return PyErr_NoMemory();
381 if (size < self->ob_alloc) {
382 Py_SIZE(self) = size;
383 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
384 }
Christian Heimes3497f942008-05-26 12:29:14 +0000385 else if (PyByteArray_Resize((PyObject *)self, size) < 0)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000386 return NULL;
387
388 if (mysize == 1)
389 memset(self->ob_bytes, self->ob_bytes[0], size);
390 else {
391 Py_ssize_t i;
392 for (i = 1; i < count; i++)
393 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
394 }
395
396 Py_INCREF(self);
397 return (PyObject *)self;
398}
399
400static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +0000401bytes_getitem(PyByteArrayObject *self, Py_ssize_t i)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000402{
403 if (i < 0)
404 i += Py_SIZE(self);
405 if (i < 0 || i >= Py_SIZE(self)) {
406 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
407 return NULL;
408 }
409 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
410}
411
412static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +0000413bytes_subscript(PyByteArrayObject *self, PyObject *item)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000414{
415 if (PyIndex_Check(item)) {
416 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
417
418 if (i == -1 && PyErr_Occurred())
419 return NULL;
420
421 if (i < 0)
Christian Heimes3497f942008-05-26 12:29:14 +0000422 i += PyByteArray_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000423
424 if (i < 0 || i >= Py_SIZE(self)) {
425 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
426 return NULL;
427 }
428 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
429 }
430 else if (PySlice_Check(item)) {
431 Py_ssize_t start, stop, step, slicelength, cur, i;
432 if (PySlice_GetIndicesEx((PySliceObject *)item,
Christian Heimes3497f942008-05-26 12:29:14 +0000433 PyByteArray_GET_SIZE(self),
Christian Heimes1a6387e2008-03-26 12:49:49 +0000434 &start, &stop, &step, &slicelength) < 0) {
435 return NULL;
436 }
437
438 if (slicelength <= 0)
Christian Heimes3497f942008-05-26 12:29:14 +0000439 return PyByteArray_FromStringAndSize("", 0);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000440 else if (step == 1) {
Christian Heimes3497f942008-05-26 12:29:14 +0000441 return PyByteArray_FromStringAndSize(self->ob_bytes + start,
Christian Heimes1a6387e2008-03-26 12:49:49 +0000442 slicelength);
443 }
444 else {
Christian Heimes3497f942008-05-26 12:29:14 +0000445 char *source_buf = PyByteArray_AS_STRING(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000446 char *result_buf = (char *)PyMem_Malloc(slicelength);
447 PyObject *result;
448
449 if (result_buf == NULL)
450 return PyErr_NoMemory();
451
452 for (cur = start, i = 0; i < slicelength;
453 cur += step, i++) {
454 result_buf[i] = source_buf[cur];
455 }
Christian Heimes3497f942008-05-26 12:29:14 +0000456 result = PyByteArray_FromStringAndSize(result_buf, slicelength);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000457 PyMem_Free(result_buf);
458 return result;
459 }
460 }
461 else {
462 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integers");
463 return NULL;
464 }
465}
466
467static int
Christian Heimes3497f942008-05-26 12:29:14 +0000468bytes_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi,
Christian Heimes1a6387e2008-03-26 12:49:49 +0000469 PyObject *values)
470{
471 Py_ssize_t avail, needed;
472 void *bytes;
473 Py_buffer vbytes;
474 int res = 0;
475
476 vbytes.len = -1;
477 if (values == (PyObject *)self) {
478 /* Make a copy and call this function recursively */
479 int err;
Christian Heimes3497f942008-05-26 12:29:14 +0000480 values = PyByteArray_FromObject(values);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000481 if (values == NULL)
482 return -1;
483 err = bytes_setslice(self, lo, hi, values);
484 Py_DECREF(values);
485 return err;
486 }
487 if (values == NULL) {
488 /* del b[lo:hi] */
489 bytes = NULL;
490 needed = 0;
491 }
492 else {
493 if (_getbuffer(values, &vbytes) < 0) {
494 PyErr_Format(PyExc_TypeError,
495 "can't set bytes slice from %.100s",
496 Py_TYPE(values)->tp_name);
497 return -1;
498 }
499 needed = vbytes.len;
500 bytes = vbytes.buf;
501 }
502
503 if (lo < 0)
504 lo = 0;
505 if (hi < lo)
506 hi = lo;
507 if (hi > Py_SIZE(self))
508 hi = Py_SIZE(self);
509
510 avail = hi - lo;
511 if (avail < 0)
512 lo = hi = avail = 0;
513
514 if (avail != needed) {
515 if (avail > needed) {
516 /*
517 0 lo hi old_size
518 | |<----avail----->|<-----tomove------>|
519 | |<-needed->|<-----tomove------>|
520 0 lo new_hi new_size
521 */
522 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
523 Py_SIZE(self) - hi);
524 }
525 /* XXX(nnorwitz): need to verify this can't overflow! */
Christian Heimes3497f942008-05-26 12:29:14 +0000526 if (PyByteArray_Resize((PyObject *)self,
Christian Heimes1a6387e2008-03-26 12:49:49 +0000527 Py_SIZE(self) + needed - avail) < 0) {
528 res = -1;
529 goto finish;
530 }
531 if (avail < needed) {
532 /*
533 0 lo hi old_size
534 | |<-avail->|<-----tomove------>|
535 | |<----needed---->|<-----tomove------>|
536 0 lo new_hi new_size
537 */
538 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
539 Py_SIZE(self) - lo - needed);
540 }
541 }
542
543 if (needed > 0)
544 memcpy(self->ob_bytes + lo, bytes, needed);
545
546
547 finish:
548 if (vbytes.len != -1)
549 PyObject_ReleaseBuffer(values, &vbytes);
550 return res;
551}
552
553static int
Christian Heimes3497f942008-05-26 12:29:14 +0000554bytes_setitem(PyByteArrayObject *self, Py_ssize_t i, PyObject *value)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000555{
Neal Norwitz0bcd6132008-03-27 03:49:54 +0000556 int ival;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000557
558 if (i < 0)
559 i += Py_SIZE(self);
560
561 if (i < 0 || i >= Py_SIZE(self)) {
562 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
563 return -1;
564 }
565
566 if (value == NULL)
567 return bytes_setslice(self, i, i+1, NULL);
568
569 if (!_getbytevalue(value, &ival))
570 return -1;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000571
572 self->ob_bytes[i] = ival;
573 return 0;
574}
575
576static int
Christian Heimes3497f942008-05-26 12:29:14 +0000577bytes_ass_subscript(PyByteArrayObject *self, PyObject *item, PyObject *values)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000578{
579 Py_ssize_t start, stop, step, slicelen, needed;
580 char *bytes;
581
582 if (PyIndex_Check(item)) {
583 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
584
585 if (i == -1 && PyErr_Occurred())
586 return -1;
587
588 if (i < 0)
Christian Heimes3497f942008-05-26 12:29:14 +0000589 i += PyByteArray_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000590
591 if (i < 0 || i >= Py_SIZE(self)) {
592 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
593 return -1;
594 }
595
596 if (values == NULL) {
597 /* Fall through to slice assignment */
598 start = i;
599 stop = i + 1;
600 step = 1;
601 slicelen = 1;
602 }
603 else {
604 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
605 if (ival == -1 && PyErr_Occurred()) {
Neal Norwitz0bcd6132008-03-27 03:49:54 +0000606 int int_value;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000607 /* Also accept str of size 1 in 2.x */
608 PyErr_Clear();
Neal Norwitz0bcd6132008-03-27 03:49:54 +0000609 if (!_getbytevalue(values, &int_value))
Christian Heimes1a6387e2008-03-26 12:49:49 +0000610 return -1;
Neal Norwitz0bcd6132008-03-27 03:49:54 +0000611 ival = (int) int_value;
612 } else if (ival < 0 || ival >= 256) {
Christian Heimes1a6387e2008-03-26 12:49:49 +0000613 PyErr_SetString(PyExc_ValueError,
614 "byte must be in range(0, 256)");
615 return -1;
616 }
617 self->ob_bytes[i] = (char)ival;
618 return 0;
619 }
620 }
621 else if (PySlice_Check(item)) {
622 if (PySlice_GetIndicesEx((PySliceObject *)item,
Christian Heimes3497f942008-05-26 12:29:14 +0000623 PyByteArray_GET_SIZE(self),
Christian Heimes1a6387e2008-03-26 12:49:49 +0000624 &start, &stop, &step, &slicelen) < 0) {
625 return -1;
626 }
627 }
628 else {
629 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integer");
630 return -1;
631 }
632
633 if (values == NULL) {
634 bytes = NULL;
635 needed = 0;
636 }
Christian Heimes3497f942008-05-26 12:29:14 +0000637 else if (values == (PyObject *)self || !PyByteArray_Check(values)) {
Christian Heimes1a6387e2008-03-26 12:49:49 +0000638 /* Make a copy an call this function recursively */
639 int err;
Christian Heimes3497f942008-05-26 12:29:14 +0000640 values = PyByteArray_FromObject(values);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000641 if (values == NULL)
642 return -1;
643 err = bytes_ass_subscript(self, item, values);
644 Py_DECREF(values);
645 return err;
646 }
647 else {
Christian Heimes3497f942008-05-26 12:29:14 +0000648 assert(PyByteArray_Check(values));
649 bytes = ((PyByteArrayObject *)values)->ob_bytes;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000650 needed = Py_SIZE(values);
651 }
652 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
653 if ((step < 0 && start < stop) ||
654 (step > 0 && start > stop))
655 stop = start;
656 if (step == 1) {
657 if (slicelen != needed) {
658 if (slicelen > needed) {
659 /*
660 0 start stop old_size
661 | |<---slicelen--->|<-----tomove------>|
662 | |<-needed->|<-----tomove------>|
663 0 lo new_hi new_size
664 */
665 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
666 Py_SIZE(self) - stop);
667 }
Christian Heimes3497f942008-05-26 12:29:14 +0000668 if (PyByteArray_Resize((PyObject *)self,
Christian Heimes1a6387e2008-03-26 12:49:49 +0000669 Py_SIZE(self) + needed - slicelen) < 0)
670 return -1;
671 if (slicelen < needed) {
672 /*
673 0 lo hi old_size
674 | |<-avail->|<-----tomove------>|
675 | |<----needed---->|<-----tomove------>|
676 0 lo new_hi new_size
677 */
678 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
679 Py_SIZE(self) - start - needed);
680 }
681 }
682
683 if (needed > 0)
684 memcpy(self->ob_bytes + start, bytes, needed);
685
686 return 0;
687 }
688 else {
689 if (needed == 0) {
690 /* Delete slice */
691 Py_ssize_t cur, i;
692
693 if (step < 0) {
694 stop = start + 1;
695 start = stop + step * (slicelen - 1) - 1;
696 step = -step;
697 }
698 for (cur = start, i = 0;
699 i < slicelen; cur += step, i++) {
700 Py_ssize_t lim = step - 1;
701
Christian Heimes3497f942008-05-26 12:29:14 +0000702 if (cur + step >= PyByteArray_GET_SIZE(self))
703 lim = PyByteArray_GET_SIZE(self) - cur - 1;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000704
705 memmove(self->ob_bytes + cur - i,
706 self->ob_bytes + cur + 1, lim);
707 }
708 /* Move the tail of the bytes, in one chunk */
709 cur = start + slicelen*step;
Christian Heimes3497f942008-05-26 12:29:14 +0000710 if (cur < PyByteArray_GET_SIZE(self)) {
Christian Heimes1a6387e2008-03-26 12:49:49 +0000711 memmove(self->ob_bytes + cur - slicelen,
712 self->ob_bytes + cur,
Christian Heimes3497f942008-05-26 12:29:14 +0000713 PyByteArray_GET_SIZE(self) - cur);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000714 }
Christian Heimes3497f942008-05-26 12:29:14 +0000715 if (PyByteArray_Resize((PyObject *)self,
716 PyByteArray_GET_SIZE(self) - slicelen) < 0)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000717 return -1;
718
719 return 0;
720 }
721 else {
722 /* Assign slice */
723 Py_ssize_t cur, i;
724
725 if (needed != slicelen) {
726 PyErr_Format(PyExc_ValueError,
727 "attempt to assign bytes of size %zd "
728 "to extended slice of size %zd",
729 needed, slicelen);
730 return -1;
731 }
732 for (cur = start, i = 0; i < slicelen; cur += step, i++)
733 self->ob_bytes[cur] = bytes[i];
734 return 0;
735 }
736 }
737}
738
739static int
Christian Heimes3497f942008-05-26 12:29:14 +0000740bytes_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000741{
742 static char *kwlist[] = {"source", "encoding", "errors", 0};
743 PyObject *arg = NULL;
744 const char *encoding = NULL;
745 const char *errors = NULL;
746 Py_ssize_t count;
747 PyObject *it;
748 PyObject *(*iternext)(PyObject *);
749
750 if (Py_SIZE(self) != 0) {
751 /* Empty previous contents (yes, do this first of all!) */
Christian Heimes3497f942008-05-26 12:29:14 +0000752 if (PyByteArray_Resize((PyObject *)self, 0) < 0)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000753 return -1;
754 }
755
756 /* Parse arguments */
757 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
758 &arg, &encoding, &errors))
759 return -1;
760
761 /* Make a quick exit if no first argument */
762 if (arg == NULL) {
763 if (encoding != NULL || errors != NULL) {
764 PyErr_SetString(PyExc_TypeError,
765 "encoding or errors without sequence argument");
766 return -1;
767 }
768 return 0;
769 }
770
771 if (PyString_Check(arg)) {
772 PyObject *new, *encoded;
773 if (encoding != NULL) {
774 encoded = PyCodec_Encode(arg, encoding, errors);
775 if (encoded == NULL)
776 return -1;
777 assert(PyString_Check(encoded));
778 }
779 else {
780 encoded = arg;
781 Py_INCREF(arg);
782 }
783 new = bytes_iconcat(self, arg);
784 Py_DECREF(encoded);
785 if (new == NULL)
786 return -1;
787 Py_DECREF(new);
788 return 0;
789 }
790
791 if (PyUnicode_Check(arg)) {
792 /* Encode via the codec registry */
793 PyObject *encoded, *new;
794 if (encoding == NULL) {
795 PyErr_SetString(PyExc_TypeError,
796 "unicode argument without an encoding");
797 return -1;
798 }
799 encoded = PyCodec_Encode(arg, encoding, errors);
800 if (encoded == NULL)
801 return -1;
802 assert(PyString_Check(encoded));
803 new = bytes_iconcat(self, encoded);
804 Py_DECREF(encoded);
805 if (new == NULL)
806 return -1;
807 Py_DECREF(new);
808 return 0;
809 }
810
811 /* If it's not unicode, there can't be encoding or errors */
812 if (encoding != NULL || errors != NULL) {
813 PyErr_SetString(PyExc_TypeError,
814 "encoding or errors without a string argument");
815 return -1;
816 }
817
818 /* Is it an int? */
819 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
820 if (count == -1 && PyErr_Occurred())
821 PyErr_Clear();
822 else {
823 if (count < 0) {
824 PyErr_SetString(PyExc_ValueError, "negative count");
825 return -1;
826 }
827 if (count > 0) {
Christian Heimes3497f942008-05-26 12:29:14 +0000828 if (PyByteArray_Resize((PyObject *)self, count))
Christian Heimes1a6387e2008-03-26 12:49:49 +0000829 return -1;
830 memset(self->ob_bytes, 0, count);
831 }
832 return 0;
833 }
834
835 /* Use the buffer API */
836 if (PyObject_CheckBuffer(arg)) {
837 Py_ssize_t size;
838 Py_buffer view;
839 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
840 return -1;
841 size = view.len;
Christian Heimes3497f942008-05-26 12:29:14 +0000842 if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000843 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
844 goto fail;
845 PyObject_ReleaseBuffer(arg, &view);
846 return 0;
847 fail:
848 PyObject_ReleaseBuffer(arg, &view);
849 return -1;
850 }
851
852 /* XXX Optimize this if the arguments is a list, tuple */
853
854 /* Get the iterator */
855 it = PyObject_GetIter(arg);
856 if (it == NULL)
857 return -1;
858 iternext = *Py_TYPE(it)->tp_iternext;
859
860 /* Run the iterator to exhaustion */
861 for (;;) {
862 PyObject *item;
863 Py_ssize_t value;
864
865 /* Get the next item */
866 item = iternext(it);
867 if (item == NULL) {
868 if (PyErr_Occurred()) {
869 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
870 goto error;
871 PyErr_Clear();
872 }
873 break;
874 }
875
876 /* Interpret it as an int (__index__) */
877 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
878 Py_DECREF(item);
879 if (value == -1 && PyErr_Occurred())
880 goto error;
881
882 /* Range check */
883 if (value < 0 || value >= 256) {
884 PyErr_SetString(PyExc_ValueError,
885 "bytes must be in range(0, 256)");
886 goto error;
887 }
888
889 /* Append the byte */
890 if (Py_SIZE(self) < self->ob_alloc)
891 Py_SIZE(self)++;
Christian Heimes3497f942008-05-26 12:29:14 +0000892 else if (PyByteArray_Resize((PyObject *)self, Py_SIZE(self)+1) < 0)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000893 goto error;
894 self->ob_bytes[Py_SIZE(self)-1] = value;
895 }
896
897 /* Clean up and return success */
898 Py_DECREF(it);
899 return 0;
900
901 error:
902 /* Error handling when it != NULL */
903 Py_DECREF(it);
904 return -1;
905}
906
907/* Mostly copied from string_repr, but without the
908 "smart quote" functionality. */
909static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +0000910bytes_repr(PyByteArrayObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000911{
912 static const char *hexdigits = "0123456789abcdef";
913 const char *quote_prefix = "bytearray(b";
914 const char *quote_postfix = ")";
915 Py_ssize_t length = Py_SIZE(self);
916 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
917 size_t newsize = 14 + 4 * length;
918 PyObject *v;
919 if (newsize > PY_SSIZE_T_MAX || newsize / 4 - 3 != length) {
920 PyErr_SetString(PyExc_OverflowError,
921 "bytearray object is too large to make repr");
922 return NULL;
923 }
924 v = PyUnicode_FromUnicode(NULL, newsize);
925 if (v == NULL) {
926 return NULL;
927 }
928 else {
929 register Py_ssize_t i;
930 register Py_UNICODE c;
931 register Py_UNICODE *p;
932 int quote;
933
934 /* Figure out which quote to use; single is preferred */
935 quote = '\'';
936 {
937 char *test, *start;
Christian Heimes3497f942008-05-26 12:29:14 +0000938 start = PyByteArray_AS_STRING(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000939 for (test = start; test < start+length; ++test) {
940 if (*test == '"') {
941 quote = '\''; /* back to single */
942 goto decided;
943 }
944 else if (*test == '\'')
945 quote = '"';
946 }
947 decided:
948 ;
949 }
950
951 p = PyUnicode_AS_UNICODE(v);
952 while (*quote_prefix)
953 *p++ = *quote_prefix++;
954 *p++ = quote;
955
956 for (i = 0; i < length; i++) {
957 /* There's at least enough room for a hex escape
958 and a closing quote. */
959 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
960 c = self->ob_bytes[i];
961 if (c == '\'' || c == '\\')
962 *p++ = '\\', *p++ = c;
963 else if (c == '\t')
964 *p++ = '\\', *p++ = 't';
965 else if (c == '\n')
966 *p++ = '\\', *p++ = 'n';
967 else if (c == '\r')
968 *p++ = '\\', *p++ = 'r';
969 else if (c == 0)
970 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
971 else if (c < ' ' || c >= 0x7f) {
972 *p++ = '\\';
973 *p++ = 'x';
974 *p++ = hexdigits[(c & 0xf0) >> 4];
975 *p++ = hexdigits[c & 0xf];
976 }
977 else
978 *p++ = c;
979 }
980 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
981 *p++ = quote;
982 while (*quote_postfix) {
983 *p++ = *quote_postfix++;
984 }
985 *p = '\0';
986 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
987 Py_DECREF(v);
988 return NULL;
989 }
990 return v;
991 }
992}
993
994static PyObject *
995bytes_str(PyObject *op)
996{
997#if 0
998 if (Py_BytesWarningFlag) {
999 if (PyErr_WarnEx(PyExc_BytesWarning,
1000 "str() on a bytearray instance", 1))
1001 return NULL;
1002 }
Christian Heimes3497f942008-05-26 12:29:14 +00001003 return bytes_repr((PyByteArrayObject*)op);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001004#endif
Christian Heimes3497f942008-05-26 12:29:14 +00001005 return PyString_FromStringAndSize(((PyByteArrayObject*)op)->ob_bytes, Py_SIZE(op));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001006}
1007
1008static PyObject *
1009bytes_richcompare(PyObject *self, PyObject *other, int op)
1010{
1011 Py_ssize_t self_size, other_size;
1012 Py_buffer self_bytes, other_bytes;
1013 PyObject *res;
1014 Py_ssize_t minsize;
1015 int cmp;
1016
1017 /* Bytes can be compared to anything that supports the (binary)
1018 buffer API. Except that a comparison with Unicode is always an
1019 error, even if the comparison is for equality. */
1020 if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
1021 PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
1022 if (Py_BytesWarningFlag && op == Py_EQ) {
1023 if (PyErr_WarnEx(PyExc_BytesWarning,
1024 "Comparsion between bytearray and string", 1))
1025 return NULL;
1026 }
1027
1028 Py_INCREF(Py_NotImplemented);
1029 return Py_NotImplemented;
1030 }
1031
1032 self_size = _getbuffer(self, &self_bytes);
1033 if (self_size < 0) {
1034 PyErr_Clear();
1035 Py_INCREF(Py_NotImplemented);
1036 return Py_NotImplemented;
1037 }
1038
1039 other_size = _getbuffer(other, &other_bytes);
1040 if (other_size < 0) {
1041 PyErr_Clear();
1042 PyObject_ReleaseBuffer(self, &self_bytes);
1043 Py_INCREF(Py_NotImplemented);
1044 return Py_NotImplemented;
1045 }
1046
1047 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
1048 /* Shortcut: if the lengths differ, the objects differ */
1049 cmp = (op == Py_NE);
1050 }
1051 else {
1052 minsize = self_size;
1053 if (other_size < minsize)
1054 minsize = other_size;
1055
1056 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
1057 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1058
1059 if (cmp == 0) {
1060 if (self_size < other_size)
1061 cmp = -1;
1062 else if (self_size > other_size)
1063 cmp = 1;
1064 }
1065
1066 switch (op) {
1067 case Py_LT: cmp = cmp < 0; break;
1068 case Py_LE: cmp = cmp <= 0; break;
1069 case Py_EQ: cmp = cmp == 0; break;
1070 case Py_NE: cmp = cmp != 0; break;
1071 case Py_GT: cmp = cmp > 0; break;
1072 case Py_GE: cmp = cmp >= 0; break;
1073 }
1074 }
1075
1076 res = cmp ? Py_True : Py_False;
1077 PyObject_ReleaseBuffer(self, &self_bytes);
1078 PyObject_ReleaseBuffer(other, &other_bytes);
1079 Py_INCREF(res);
1080 return res;
1081}
1082
1083static void
Christian Heimes3497f942008-05-26 12:29:14 +00001084bytes_dealloc(PyByteArrayObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001085{
1086 if (self->ob_bytes != 0) {
1087 PyMem_Free(self->ob_bytes);
1088 }
1089 Py_TYPE(self)->tp_free((PyObject *)self);
1090}
1091
1092
1093/* -------------------------------------------------------------------- */
1094/* Methods */
1095
1096#define STRINGLIB_CHAR char
1097#define STRINGLIB_CMP memcmp
Christian Heimes3497f942008-05-26 12:29:14 +00001098#define STRINGLIB_LEN PyByteArray_GET_SIZE
1099#define STRINGLIB_STR PyByteArray_AS_STRING
1100#define STRINGLIB_NEW PyByteArray_FromStringAndSize
Christian Heimes1a6387e2008-03-26 12:49:49 +00001101#define STRINGLIB_EMPTY nullbytes
Christian Heimes3497f942008-05-26 12:29:14 +00001102#define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
Christian Heimes1a6387e2008-03-26 12:49:49 +00001103#define STRINGLIB_MUTABLE 1
1104
1105#include "stringlib/fastsearch.h"
1106#include "stringlib/count.h"
1107#include "stringlib/find.h"
1108#include "stringlib/partition.h"
1109#include "stringlib/ctype.h"
1110#include "stringlib/transmogrify.h"
1111
1112
1113/* The following Py_LOCAL_INLINE and Py_LOCAL functions
1114were copied from the old char* style string object. */
1115
1116Py_LOCAL_INLINE(void)
1117_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1118{
1119 if (*end > len)
1120 *end = len;
1121 else if (*end < 0)
1122 *end += len;
1123 if (*end < 0)
1124 *end = 0;
1125 if (*start < 0)
1126 *start += len;
1127 if (*start < 0)
1128 *start = 0;
1129}
1130
1131
1132Py_LOCAL_INLINE(Py_ssize_t)
Christian Heimes3497f942008-05-26 12:29:14 +00001133bytes_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001134{
1135 PyObject *subobj;
1136 Py_buffer subbuf;
1137 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1138 Py_ssize_t res;
1139
1140 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1141 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1142 return -2;
1143 if (_getbuffer(subobj, &subbuf) < 0)
1144 return -2;
1145 if (dir > 0)
1146 res = stringlib_find_slice(
Christian Heimes3497f942008-05-26 12:29:14 +00001147 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
Christian Heimes1a6387e2008-03-26 12:49:49 +00001148 subbuf.buf, subbuf.len, start, end);
1149 else
1150 res = stringlib_rfind_slice(
Christian Heimes3497f942008-05-26 12:29:14 +00001151 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
Christian Heimes1a6387e2008-03-26 12:49:49 +00001152 subbuf.buf, subbuf.len, start, end);
1153 PyObject_ReleaseBuffer(subobj, &subbuf);
1154 return res;
1155}
1156
1157PyDoc_STRVAR(find__doc__,
1158"B.find(sub [,start [,end]]) -> int\n\
1159\n\
1160Return the lowest index in B where subsection sub is found,\n\
1161such that sub is contained within s[start,end]. Optional\n\
1162arguments start and end are interpreted as in slice notation.\n\
1163\n\
1164Return -1 on failure.");
1165
1166static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00001167bytes_find(PyByteArrayObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001168{
1169 Py_ssize_t result = bytes_find_internal(self, args, +1);
1170 if (result == -2)
1171 return NULL;
1172 return PyInt_FromSsize_t(result);
1173}
1174
1175PyDoc_STRVAR(count__doc__,
1176"B.count(sub [,start [,end]]) -> int\n\
1177\n\
1178Return the number of non-overlapping occurrences of subsection sub in\n\
1179bytes B[start:end]. Optional arguments start and end are interpreted\n\
1180as in slice notation.");
1181
1182static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00001183bytes_count(PyByteArrayObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001184{
1185 PyObject *sub_obj;
Christian Heimes3497f942008-05-26 12:29:14 +00001186 const char *str = PyByteArray_AS_STRING(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001187 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1188 Py_buffer vsub;
1189 PyObject *count_obj;
1190
1191 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1192 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1193 return NULL;
1194
1195 if (_getbuffer(sub_obj, &vsub) < 0)
1196 return NULL;
1197
Christian Heimes3497f942008-05-26 12:29:14 +00001198 _adjust_indices(&start, &end, PyByteArray_GET_SIZE(self));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001199
1200 count_obj = PyInt_FromSsize_t(
1201 stringlib_count(str + start, end - start, vsub.buf, vsub.len)
1202 );
1203 PyObject_ReleaseBuffer(sub_obj, &vsub);
1204 return count_obj;
1205}
1206
1207
1208PyDoc_STRVAR(index__doc__,
1209"B.index(sub [,start [,end]]) -> int\n\
1210\n\
1211Like B.find() but raise ValueError when the subsection is not found.");
1212
1213static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00001214bytes_index(PyByteArrayObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001215{
1216 Py_ssize_t result = bytes_find_internal(self, args, +1);
1217 if (result == -2)
1218 return NULL;
1219 if (result == -1) {
1220 PyErr_SetString(PyExc_ValueError,
1221 "subsection not found");
1222 return NULL;
1223 }
1224 return PyInt_FromSsize_t(result);
1225}
1226
1227
1228PyDoc_STRVAR(rfind__doc__,
1229"B.rfind(sub [,start [,end]]) -> int\n\
1230\n\
1231Return the highest index in B where subsection sub is found,\n\
1232such that sub is contained within s[start,end]. Optional\n\
1233arguments start and end are interpreted as in slice notation.\n\
1234\n\
1235Return -1 on failure.");
1236
1237static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00001238bytes_rfind(PyByteArrayObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001239{
1240 Py_ssize_t result = bytes_find_internal(self, args, -1);
1241 if (result == -2)
1242 return NULL;
1243 return PyInt_FromSsize_t(result);
1244}
1245
1246
1247PyDoc_STRVAR(rindex__doc__,
1248"B.rindex(sub [,start [,end]]) -> int\n\
1249\n\
1250Like B.rfind() but raise ValueError when the subsection is not found.");
1251
1252static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00001253bytes_rindex(PyByteArrayObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001254{
1255 Py_ssize_t result = bytes_find_internal(self, args, -1);
1256 if (result == -2)
1257 return NULL;
1258 if (result == -1) {
1259 PyErr_SetString(PyExc_ValueError,
1260 "subsection not found");
1261 return NULL;
1262 }
1263 return PyInt_FromSsize_t(result);
1264}
1265
1266
1267static int
1268bytes_contains(PyObject *self, PyObject *arg)
1269{
1270 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1271 if (ival == -1 && PyErr_Occurred()) {
1272 Py_buffer varg;
1273 int pos;
1274 PyErr_Clear();
1275 if (_getbuffer(arg, &varg) < 0)
1276 return -1;
Christian Heimes3497f942008-05-26 12:29:14 +00001277 pos = stringlib_find(PyByteArray_AS_STRING(self), Py_SIZE(self),
Christian Heimes1a6387e2008-03-26 12:49:49 +00001278 varg.buf, varg.len, 0);
1279 PyObject_ReleaseBuffer(arg, &varg);
1280 return pos >= 0;
1281 }
1282 if (ival < 0 || ival >= 256) {
1283 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1284 return -1;
1285 }
1286
Christian Heimes3497f942008-05-26 12:29:14 +00001287 return memchr(PyByteArray_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001288}
1289
1290
1291/* Matches the end (direction >= 0) or start (direction < 0) of self
1292 * against substr, using the start and end arguments. Returns
1293 * -1 on error, 0 if not found and 1 if found.
1294 */
1295Py_LOCAL(int)
Christian Heimes3497f942008-05-26 12:29:14 +00001296_bytes_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start,
Christian Heimes1a6387e2008-03-26 12:49:49 +00001297 Py_ssize_t end, int direction)
1298{
Christian Heimes3497f942008-05-26 12:29:14 +00001299 Py_ssize_t len = PyByteArray_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001300 const char* str;
1301 Py_buffer vsubstr;
1302 int rv = 0;
1303
Christian Heimes3497f942008-05-26 12:29:14 +00001304 str = PyByteArray_AS_STRING(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001305
1306 if (_getbuffer(substr, &vsubstr) < 0)
1307 return -1;
1308
1309 _adjust_indices(&start, &end, len);
1310
1311 if (direction < 0) {
1312 /* startswith */
1313 if (start+vsubstr.len > len) {
1314 goto done;
1315 }
1316 } else {
1317 /* endswith */
1318 if (end-start < vsubstr.len || start > len) {
1319 goto done;
1320 }
1321
1322 if (end-vsubstr.len > start)
1323 start = end - vsubstr.len;
1324 }
1325 if (end-start >= vsubstr.len)
1326 rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1327
1328done:
1329 PyObject_ReleaseBuffer(substr, &vsubstr);
1330 return rv;
1331}
1332
1333
1334PyDoc_STRVAR(startswith__doc__,
1335"B.startswith(prefix [,start [,end]]) -> bool\n\
1336\n\
1337Return True if B starts with the specified prefix, False otherwise.\n\
1338With optional start, test B beginning at that position.\n\
1339With optional end, stop comparing B at that position.\n\
1340prefix can also be a tuple of strings to try.");
1341
1342static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00001343bytes_startswith(PyByteArrayObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001344{
1345 Py_ssize_t start = 0;
1346 Py_ssize_t end = PY_SSIZE_T_MAX;
1347 PyObject *subobj;
1348 int result;
1349
1350 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1351 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1352 return NULL;
1353 if (PyTuple_Check(subobj)) {
1354 Py_ssize_t i;
1355 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1356 result = _bytes_tailmatch(self,
1357 PyTuple_GET_ITEM(subobj, i),
1358 start, end, -1);
1359 if (result == -1)
1360 return NULL;
1361 else if (result) {
1362 Py_RETURN_TRUE;
1363 }
1364 }
1365 Py_RETURN_FALSE;
1366 }
1367 result = _bytes_tailmatch(self, subobj, start, end, -1);
1368 if (result == -1)
1369 return NULL;
1370 else
1371 return PyBool_FromLong(result);
1372}
1373
1374PyDoc_STRVAR(endswith__doc__,
1375"B.endswith(suffix [,start [,end]]) -> bool\n\
1376\n\
1377Return True if B ends with the specified suffix, False otherwise.\n\
1378With optional start, test B beginning at that position.\n\
1379With optional end, stop comparing B at that position.\n\
1380suffix can also be a tuple of strings to try.");
1381
1382static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00001383bytes_endswith(PyByteArrayObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001384{
1385 Py_ssize_t start = 0;
1386 Py_ssize_t end = PY_SSIZE_T_MAX;
1387 PyObject *subobj;
1388 int result;
1389
1390 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1391 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1392 return NULL;
1393 if (PyTuple_Check(subobj)) {
1394 Py_ssize_t i;
1395 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1396 result = _bytes_tailmatch(self,
1397 PyTuple_GET_ITEM(subobj, i),
1398 start, end, +1);
1399 if (result == -1)
1400 return NULL;
1401 else if (result) {
1402 Py_RETURN_TRUE;
1403 }
1404 }
1405 Py_RETURN_FALSE;
1406 }
1407 result = _bytes_tailmatch(self, subobj, start, end, +1);
1408 if (result == -1)
1409 return NULL;
1410 else
1411 return PyBool_FromLong(result);
1412}
1413
1414
1415PyDoc_STRVAR(translate__doc__,
1416"B.translate(table[, deletechars]) -> bytearray\n\
1417\n\
1418Return a copy of B, where all characters occurring in the\n\
1419optional argument deletechars are removed, and the remaining\n\
1420characters have been mapped through the given translation\n\
1421table, which must be a bytes object of length 256.");
1422
1423static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00001424bytes_translate(PyByteArrayObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001425{
1426 register char *input, *output;
1427 register const char *table;
1428 register Py_ssize_t i, c, changed = 0;
1429 PyObject *input_obj = (PyObject*)self;
1430 const char *output_start;
1431 Py_ssize_t inlen;
1432 PyObject *result;
1433 int trans_table[256];
1434 PyObject *tableobj, *delobj = NULL;
1435 Py_buffer vtable, vdel;
1436
1437 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1438 &tableobj, &delobj))
1439 return NULL;
1440
1441 if (_getbuffer(tableobj, &vtable) < 0)
1442 return NULL;
1443
1444 if (vtable.len != 256) {
1445 PyErr_SetString(PyExc_ValueError,
1446 "translation table must be 256 characters long");
1447 result = NULL;
1448 goto done;
1449 }
1450
1451 if (delobj != NULL) {
1452 if (_getbuffer(delobj, &vdel) < 0) {
1453 result = NULL;
1454 goto done;
1455 }
1456 }
1457 else {
1458 vdel.buf = NULL;
1459 vdel.len = 0;
1460 }
1461
1462 table = (const char *)vtable.buf;
Christian Heimes3497f942008-05-26 12:29:14 +00001463 inlen = PyByteArray_GET_SIZE(input_obj);
1464 result = PyByteArray_FromStringAndSize((char *)NULL, inlen);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001465 if (result == NULL)
1466 goto done;
Christian Heimes3497f942008-05-26 12:29:14 +00001467 output_start = output = PyByteArray_AsString(result);
1468 input = PyByteArray_AS_STRING(input_obj);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001469
1470 if (vdel.len == 0) {
1471 /* If no deletions are required, use faster code */
1472 for (i = inlen; --i >= 0; ) {
1473 c = Py_CHARMASK(*input++);
1474 if (Py_CHARMASK((*output++ = table[c])) != c)
1475 changed = 1;
1476 }
Christian Heimes3497f942008-05-26 12:29:14 +00001477 if (changed || !PyByteArray_CheckExact(input_obj))
Christian Heimes1a6387e2008-03-26 12:49:49 +00001478 goto done;
1479 Py_DECREF(result);
1480 Py_INCREF(input_obj);
1481 result = input_obj;
1482 goto done;
1483 }
1484
1485 for (i = 0; i < 256; i++)
1486 trans_table[i] = Py_CHARMASK(table[i]);
1487
1488 for (i = 0; i < vdel.len; i++)
1489 trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
1490
1491 for (i = inlen; --i >= 0; ) {
1492 c = Py_CHARMASK(*input++);
1493 if (trans_table[c] != -1)
1494 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1495 continue;
1496 changed = 1;
1497 }
Christian Heimes3497f942008-05-26 12:29:14 +00001498 if (!changed && PyByteArray_CheckExact(input_obj)) {
Christian Heimes1a6387e2008-03-26 12:49:49 +00001499 Py_DECREF(result);
1500 Py_INCREF(input_obj);
1501 result = input_obj;
1502 goto done;
1503 }
1504 /* Fix the size of the resulting string */
1505 if (inlen > 0)
Christian Heimes3497f942008-05-26 12:29:14 +00001506 PyByteArray_Resize(result, output - output_start);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001507
1508done:
1509 PyObject_ReleaseBuffer(tableobj, &vtable);
1510 if (delobj != NULL)
1511 PyObject_ReleaseBuffer(delobj, &vdel);
1512 return result;
1513}
1514
1515
1516#define FORWARD 1
1517#define REVERSE -1
1518
1519/* find and count characters and substrings */
1520
1521#define findchar(target, target_len, c) \
1522 ((char *)memchr((const void *)(target), c, target_len))
1523
1524/* Don't call if length < 2 */
1525#define Py_STRING_MATCH(target, offset, pattern, length) \
1526 (target[offset] == pattern[0] && \
1527 target[offset+length-1] == pattern[length-1] && \
1528 !memcmp(target+offset+1, pattern+1, length-2) )
1529
1530
1531/* Bytes ops must return a string. */
1532/* If the object is subclass of bytes, create a copy */
Christian Heimes3497f942008-05-26 12:29:14 +00001533Py_LOCAL(PyByteArrayObject *)
1534return_self(PyByteArrayObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001535{
Christian Heimes3497f942008-05-26 12:29:14 +00001536 if (PyByteArray_CheckExact(self)) {
Christian Heimes1a6387e2008-03-26 12:49:49 +00001537 Py_INCREF(self);
Christian Heimes3497f942008-05-26 12:29:14 +00001538 return (PyByteArrayObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001539 }
Christian Heimes3497f942008-05-26 12:29:14 +00001540 return (PyByteArrayObject *)PyByteArray_FromStringAndSize(
1541 PyByteArray_AS_STRING(self),
1542 PyByteArray_GET_SIZE(self));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001543}
1544
1545Py_LOCAL_INLINE(Py_ssize_t)
1546countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
1547{
1548 Py_ssize_t count=0;
1549 const char *start=target;
1550 const char *end=target+target_len;
1551
1552 while ( (start=findchar(start, end-start, c)) != NULL ) {
1553 count++;
1554 if (count >= maxcount)
1555 break;
1556 start += 1;
1557 }
1558 return count;
1559}
1560
1561Py_LOCAL(Py_ssize_t)
1562findstring(const char *target, Py_ssize_t target_len,
1563 const char *pattern, Py_ssize_t pattern_len,
1564 Py_ssize_t start,
1565 Py_ssize_t end,
1566 int direction)
1567{
1568 if (start < 0) {
1569 start += target_len;
1570 if (start < 0)
1571 start = 0;
1572 }
1573 if (end > target_len) {
1574 end = target_len;
1575 } else if (end < 0) {
1576 end += target_len;
1577 if (end < 0)
1578 end = 0;
1579 }
1580
1581 /* zero-length substrings always match at the first attempt */
1582 if (pattern_len == 0)
1583 return (direction > 0) ? start : end;
1584
1585 end -= pattern_len;
1586
1587 if (direction < 0) {
1588 for (; end >= start; end--)
1589 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1590 return end;
1591 } else {
1592 for (; start <= end; start++)
1593 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1594 return start;
1595 }
1596 return -1;
1597}
1598
1599Py_LOCAL_INLINE(Py_ssize_t)
1600countstring(const char *target, Py_ssize_t target_len,
1601 const char *pattern, Py_ssize_t pattern_len,
1602 Py_ssize_t start,
1603 Py_ssize_t end,
1604 int direction, Py_ssize_t maxcount)
1605{
1606 Py_ssize_t count=0;
1607
1608 if (start < 0) {
1609 start += target_len;
1610 if (start < 0)
1611 start = 0;
1612 }
1613 if (end > target_len) {
1614 end = target_len;
1615 } else if (end < 0) {
1616 end += target_len;
1617 if (end < 0)
1618 end = 0;
1619 }
1620
1621 /* zero-length substrings match everywhere */
1622 if (pattern_len == 0 || maxcount == 0) {
1623 if (target_len+1 < maxcount)
1624 return target_len+1;
1625 return maxcount;
1626 }
1627
1628 end -= pattern_len;
1629 if (direction < 0) {
1630 for (; (end >= start); end--)
1631 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1632 count++;
1633 if (--maxcount <= 0) break;
1634 end -= pattern_len-1;
1635 }
1636 } else {
1637 for (; (start <= end); start++)
1638 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1639 count++;
1640 if (--maxcount <= 0)
1641 break;
1642 start += pattern_len-1;
1643 }
1644 }
1645 return count;
1646}
1647
1648
1649/* Algorithms for different cases of string replacement */
1650
1651/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Christian Heimes3497f942008-05-26 12:29:14 +00001652Py_LOCAL(PyByteArrayObject *)
1653replace_interleave(PyByteArrayObject *self,
Christian Heimes1a6387e2008-03-26 12:49:49 +00001654 const char *to_s, Py_ssize_t to_len,
1655 Py_ssize_t maxcount)
1656{
1657 char *self_s, *result_s;
1658 Py_ssize_t self_len, result_len;
1659 Py_ssize_t count, i, product;
Christian Heimes3497f942008-05-26 12:29:14 +00001660 PyByteArrayObject *result;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001661
Christian Heimes3497f942008-05-26 12:29:14 +00001662 self_len = PyByteArray_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001663
1664 /* 1 at the end plus 1 after every character */
1665 count = self_len+1;
1666 if (maxcount < count)
1667 count = maxcount;
1668
1669 /* Check for overflow */
1670 /* result_len = count * to_len + self_len; */
1671 product = count * to_len;
1672 if (product / to_len != count) {
1673 PyErr_SetString(PyExc_OverflowError,
1674 "replace string is too long");
1675 return NULL;
1676 }
1677 result_len = product + self_len;
1678 if (result_len < 0) {
1679 PyErr_SetString(PyExc_OverflowError,
1680 "replace string is too long");
1681 return NULL;
1682 }
1683
Christian Heimes3497f942008-05-26 12:29:14 +00001684 if (! (result = (PyByteArrayObject *)
1685 PyByteArray_FromStringAndSize(NULL, result_len)) )
Christian Heimes1a6387e2008-03-26 12:49:49 +00001686 return NULL;
1687
Christian Heimes3497f942008-05-26 12:29:14 +00001688 self_s = PyByteArray_AS_STRING(self);
1689 result_s = PyByteArray_AS_STRING(result);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001690
1691 /* TODO: special case single character, which doesn't need memcpy */
1692
1693 /* Lay the first one down (guaranteed this will occur) */
1694 Py_MEMCPY(result_s, to_s, to_len);
1695 result_s += to_len;
1696 count -= 1;
1697
1698 for (i=0; i<count; i++) {
1699 *result_s++ = *self_s++;
1700 Py_MEMCPY(result_s, to_s, to_len);
1701 result_s += to_len;
1702 }
1703
1704 /* Copy the rest of the original string */
1705 Py_MEMCPY(result_s, self_s, self_len-i);
1706
1707 return result;
1708}
1709
1710/* Special case for deleting a single character */
1711/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Christian Heimes3497f942008-05-26 12:29:14 +00001712Py_LOCAL(PyByteArrayObject *)
1713replace_delete_single_character(PyByteArrayObject *self,
Christian Heimes1a6387e2008-03-26 12:49:49 +00001714 char from_c, Py_ssize_t maxcount)
1715{
1716 char *self_s, *result_s;
1717 char *start, *next, *end;
1718 Py_ssize_t self_len, result_len;
1719 Py_ssize_t count;
Christian Heimes3497f942008-05-26 12:29:14 +00001720 PyByteArrayObject *result;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001721
Christian Heimes3497f942008-05-26 12:29:14 +00001722 self_len = PyByteArray_GET_SIZE(self);
1723 self_s = PyByteArray_AS_STRING(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001724
1725 count = countchar(self_s, self_len, from_c, maxcount);
1726 if (count == 0) {
1727 return return_self(self);
1728 }
1729
1730 result_len = self_len - count; /* from_len == 1 */
1731 assert(result_len>=0);
1732
Christian Heimes3497f942008-05-26 12:29:14 +00001733 if ( (result = (PyByteArrayObject *)
1734 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001735 return NULL;
Christian Heimes3497f942008-05-26 12:29:14 +00001736 result_s = PyByteArray_AS_STRING(result);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001737
1738 start = self_s;
1739 end = self_s + self_len;
1740 while (count-- > 0) {
1741 next = findchar(start, end-start, from_c);
1742 if (next == NULL)
1743 break;
1744 Py_MEMCPY(result_s, start, next-start);
1745 result_s += (next-start);
1746 start = next+1;
1747 }
1748 Py_MEMCPY(result_s, start, end-start);
1749
1750 return result;
1751}
1752
1753/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1754
Christian Heimes3497f942008-05-26 12:29:14 +00001755Py_LOCAL(PyByteArrayObject *)
1756replace_delete_substring(PyByteArrayObject *self,
Christian Heimes1a6387e2008-03-26 12:49:49 +00001757 const char *from_s, Py_ssize_t from_len,
1758 Py_ssize_t maxcount)
1759{
1760 char *self_s, *result_s;
1761 char *start, *next, *end;
1762 Py_ssize_t self_len, result_len;
1763 Py_ssize_t count, offset;
Christian Heimes3497f942008-05-26 12:29:14 +00001764 PyByteArrayObject *result;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001765
Christian Heimes3497f942008-05-26 12:29:14 +00001766 self_len = PyByteArray_GET_SIZE(self);
1767 self_s = PyByteArray_AS_STRING(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001768
1769 count = countstring(self_s, self_len,
1770 from_s, from_len,
1771 0, self_len, 1,
1772 maxcount);
1773
1774 if (count == 0) {
1775 /* no matches */
1776 return return_self(self);
1777 }
1778
1779 result_len = self_len - (count * from_len);
1780 assert (result_len>=0);
1781
Christian Heimes3497f942008-05-26 12:29:14 +00001782 if ( (result = (PyByteArrayObject *)
1783 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL )
Christian Heimes1a6387e2008-03-26 12:49:49 +00001784 return NULL;
1785
Christian Heimes3497f942008-05-26 12:29:14 +00001786 result_s = PyByteArray_AS_STRING(result);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001787
1788 start = self_s;
1789 end = self_s + self_len;
1790 while (count-- > 0) {
1791 offset = findstring(start, end-start,
1792 from_s, from_len,
1793 0, end-start, FORWARD);
1794 if (offset == -1)
1795 break;
1796 next = start + offset;
1797
1798 Py_MEMCPY(result_s, start, next-start);
1799
1800 result_s += (next-start);
1801 start = next+from_len;
1802 }
1803 Py_MEMCPY(result_s, start, end-start);
1804 return result;
1805}
1806
1807/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Christian Heimes3497f942008-05-26 12:29:14 +00001808Py_LOCAL(PyByteArrayObject *)
1809replace_single_character_in_place(PyByteArrayObject *self,
Christian Heimes1a6387e2008-03-26 12:49:49 +00001810 char from_c, char to_c,
1811 Py_ssize_t maxcount)
1812{
1813 char *self_s, *result_s, *start, *end, *next;
1814 Py_ssize_t self_len;
Christian Heimes3497f942008-05-26 12:29:14 +00001815 PyByteArrayObject *result;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001816
1817 /* The result string will be the same size */
Christian Heimes3497f942008-05-26 12:29:14 +00001818 self_s = PyByteArray_AS_STRING(self);
1819 self_len = PyByteArray_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001820
1821 next = findchar(self_s, self_len, from_c);
1822
1823 if (next == NULL) {
1824 /* No matches; return the original bytes */
1825 return return_self(self);
1826 }
1827
1828 /* Need to make a new bytes */
Christian Heimes3497f942008-05-26 12:29:14 +00001829 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001830 if (result == NULL)
1831 return NULL;
Christian Heimes3497f942008-05-26 12:29:14 +00001832 result_s = PyByteArray_AS_STRING(result);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001833 Py_MEMCPY(result_s, self_s, self_len);
1834
1835 /* change everything in-place, starting with this one */
1836 start = result_s + (next-self_s);
1837 *start = to_c;
1838 start++;
1839 end = result_s + self_len;
1840
1841 while (--maxcount > 0) {
1842 next = findchar(start, end-start, from_c);
1843 if (next == NULL)
1844 break;
1845 *next = to_c;
1846 start = next+1;
1847 }
1848
1849 return result;
1850}
1851
1852/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Christian Heimes3497f942008-05-26 12:29:14 +00001853Py_LOCAL(PyByteArrayObject *)
1854replace_substring_in_place(PyByteArrayObject *self,
Christian Heimes1a6387e2008-03-26 12:49:49 +00001855 const char *from_s, Py_ssize_t from_len,
1856 const char *to_s, Py_ssize_t to_len,
1857 Py_ssize_t maxcount)
1858{
1859 char *result_s, *start, *end;
1860 char *self_s;
1861 Py_ssize_t self_len, offset;
Christian Heimes3497f942008-05-26 12:29:14 +00001862 PyByteArrayObject *result;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001863
1864 /* The result bytes will be the same size */
1865
Christian Heimes3497f942008-05-26 12:29:14 +00001866 self_s = PyByteArray_AS_STRING(self);
1867 self_len = PyByteArray_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001868
1869 offset = findstring(self_s, self_len,
1870 from_s, from_len,
1871 0, self_len, FORWARD);
1872 if (offset == -1) {
1873 /* No matches; return the original bytes */
1874 return return_self(self);
1875 }
1876
1877 /* Need to make a new bytes */
Christian Heimes3497f942008-05-26 12:29:14 +00001878 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001879 if (result == NULL)
1880 return NULL;
Christian Heimes3497f942008-05-26 12:29:14 +00001881 result_s = PyByteArray_AS_STRING(result);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001882 Py_MEMCPY(result_s, self_s, self_len);
1883
1884 /* change everything in-place, starting with this one */
1885 start = result_s + offset;
1886 Py_MEMCPY(start, to_s, from_len);
1887 start += from_len;
1888 end = result_s + self_len;
1889
1890 while ( --maxcount > 0) {
1891 offset = findstring(start, end-start,
1892 from_s, from_len,
1893 0, end-start, FORWARD);
1894 if (offset==-1)
1895 break;
1896 Py_MEMCPY(start+offset, to_s, from_len);
1897 start += offset+from_len;
1898 }
1899
1900 return result;
1901}
1902
1903/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Christian Heimes3497f942008-05-26 12:29:14 +00001904Py_LOCAL(PyByteArrayObject *)
1905replace_single_character(PyByteArrayObject *self,
Christian Heimes1a6387e2008-03-26 12:49:49 +00001906 char from_c,
1907 const char *to_s, Py_ssize_t to_len,
1908 Py_ssize_t maxcount)
1909{
1910 char *self_s, *result_s;
1911 char *start, *next, *end;
1912 Py_ssize_t self_len, result_len;
1913 Py_ssize_t count, product;
Christian Heimes3497f942008-05-26 12:29:14 +00001914 PyByteArrayObject *result;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001915
Christian Heimes3497f942008-05-26 12:29:14 +00001916 self_s = PyByteArray_AS_STRING(self);
1917 self_len = PyByteArray_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001918
1919 count = countchar(self_s, self_len, from_c, maxcount);
1920 if (count == 0) {
1921 /* no matches, return unchanged */
1922 return return_self(self);
1923 }
1924
1925 /* use the difference between current and new, hence the "-1" */
1926 /* result_len = self_len + count * (to_len-1) */
1927 product = count * (to_len-1);
1928 if (product / (to_len-1) != count) {
1929 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1930 return NULL;
1931 }
1932 result_len = self_len + product;
1933 if (result_len < 0) {
1934 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1935 return NULL;
1936 }
1937
Christian Heimes3497f942008-05-26 12:29:14 +00001938 if ( (result = (PyByteArrayObject *)
1939 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001940 return NULL;
Christian Heimes3497f942008-05-26 12:29:14 +00001941 result_s = PyByteArray_AS_STRING(result);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001942
1943 start = self_s;
1944 end = self_s + self_len;
1945 while (count-- > 0) {
1946 next = findchar(start, end-start, from_c);
1947 if (next == NULL)
1948 break;
1949
1950 if (next == start) {
1951 /* replace with the 'to' */
1952 Py_MEMCPY(result_s, to_s, to_len);
1953 result_s += to_len;
1954 start += 1;
1955 } else {
1956 /* copy the unchanged old then the 'to' */
1957 Py_MEMCPY(result_s, start, next-start);
1958 result_s += (next-start);
1959 Py_MEMCPY(result_s, to_s, to_len);
1960 result_s += to_len;
1961 start = next+1;
1962 }
1963 }
1964 /* Copy the remainder of the remaining bytes */
1965 Py_MEMCPY(result_s, start, end-start);
1966
1967 return result;
1968}
1969
1970/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Christian Heimes3497f942008-05-26 12:29:14 +00001971Py_LOCAL(PyByteArrayObject *)
1972replace_substring(PyByteArrayObject *self,
Christian Heimes1a6387e2008-03-26 12:49:49 +00001973 const char *from_s, Py_ssize_t from_len,
1974 const char *to_s, Py_ssize_t to_len,
1975 Py_ssize_t maxcount)
1976{
1977 char *self_s, *result_s;
1978 char *start, *next, *end;
1979 Py_ssize_t self_len, result_len;
1980 Py_ssize_t count, offset, product;
Christian Heimes3497f942008-05-26 12:29:14 +00001981 PyByteArrayObject *result;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001982
Christian Heimes3497f942008-05-26 12:29:14 +00001983 self_s = PyByteArray_AS_STRING(self);
1984 self_len = PyByteArray_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001985
1986 count = countstring(self_s, self_len,
1987 from_s, from_len,
1988 0, self_len, FORWARD, maxcount);
1989 if (count == 0) {
1990 /* no matches, return unchanged */
1991 return return_self(self);
1992 }
1993
1994 /* Check for overflow */
1995 /* result_len = self_len + count * (to_len-from_len) */
1996 product = count * (to_len-from_len);
1997 if (product / (to_len-from_len) != count) {
1998 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1999 return NULL;
2000 }
2001 result_len = self_len + product;
2002 if (result_len < 0) {
2003 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
2004 return NULL;
2005 }
2006
Christian Heimes3497f942008-05-26 12:29:14 +00002007 if ( (result = (PyByteArrayObject *)
2008 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
Christian Heimes1a6387e2008-03-26 12:49:49 +00002009 return NULL;
Christian Heimes3497f942008-05-26 12:29:14 +00002010 result_s = PyByteArray_AS_STRING(result);
Christian Heimes1a6387e2008-03-26 12:49:49 +00002011
2012 start = self_s;
2013 end = self_s + self_len;
2014 while (count-- > 0) {
2015 offset = findstring(start, end-start,
2016 from_s, from_len,
2017 0, end-start, FORWARD);
2018 if (offset == -1)
2019 break;
2020 next = start+offset;
2021 if (next == start) {
2022 /* replace with the 'to' */
2023 Py_MEMCPY(result_s, to_s, to_len);
2024 result_s += to_len;
2025 start += from_len;
2026 } else {
2027 /* copy the unchanged old then the 'to' */
2028 Py_MEMCPY(result_s, start, next-start);
2029 result_s += (next-start);
2030 Py_MEMCPY(result_s, to_s, to_len);
2031 result_s += to_len;
2032 start = next+from_len;
2033 }
2034 }
2035 /* Copy the remainder of the remaining bytes */
2036 Py_MEMCPY(result_s, start, end-start);
2037
2038 return result;
2039}
2040
2041
Christian Heimes3497f942008-05-26 12:29:14 +00002042Py_LOCAL(PyByteArrayObject *)
2043replace(PyByteArrayObject *self,
Christian Heimes1a6387e2008-03-26 12:49:49 +00002044 const char *from_s, Py_ssize_t from_len,
2045 const char *to_s, Py_ssize_t to_len,
2046 Py_ssize_t maxcount)
2047{
2048 if (maxcount < 0) {
2049 maxcount = PY_SSIZE_T_MAX;
Christian Heimes3497f942008-05-26 12:29:14 +00002050 } else if (maxcount == 0 || PyByteArray_GET_SIZE(self) == 0) {
Christian Heimes1a6387e2008-03-26 12:49:49 +00002051 /* nothing to do; return the original bytes */
2052 return return_self(self);
2053 }
2054
2055 if (maxcount == 0 ||
2056 (from_len == 0 && to_len == 0)) {
2057 /* nothing to do; return the original bytes */
2058 return return_self(self);
2059 }
2060
2061 /* Handle zero-length special cases */
2062
2063 if (from_len == 0) {
2064 /* insert the 'to' bytes everywhere. */
2065 /* >>> "Python".replace("", ".") */
2066 /* '.P.y.t.h.o.n.' */
2067 return replace_interleave(self, to_s, to_len, maxcount);
2068 }
2069
2070 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2071 /* point for an empty self bytes to generate a non-empty bytes */
2072 /* Special case so the remaining code always gets a non-empty bytes */
Christian Heimes3497f942008-05-26 12:29:14 +00002073 if (PyByteArray_GET_SIZE(self) == 0) {
Christian Heimes1a6387e2008-03-26 12:49:49 +00002074 return return_self(self);
2075 }
2076
2077 if (to_len == 0) {
2078 /* delete all occurances of 'from' bytes */
2079 if (from_len == 1) {
2080 return replace_delete_single_character(
2081 self, from_s[0], maxcount);
2082 } else {
2083 return replace_delete_substring(self, from_s, from_len, maxcount);
2084 }
2085 }
2086
2087 /* Handle special case where both bytes have the same length */
2088
2089 if (from_len == to_len) {
2090 if (from_len == 1) {
2091 return replace_single_character_in_place(
2092 self,
2093 from_s[0],
2094 to_s[0],
2095 maxcount);
2096 } else {
2097 return replace_substring_in_place(
2098 self, from_s, from_len, to_s, to_len, maxcount);
2099 }
2100 }
2101
2102 /* Otherwise use the more generic algorithms */
2103 if (from_len == 1) {
2104 return replace_single_character(self, from_s[0],
2105 to_s, to_len, maxcount);
2106 } else {
2107 /* len('from')>=2, len('to')>=1 */
2108 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2109 }
2110}
2111
2112
2113PyDoc_STRVAR(replace__doc__,
2114"B.replace(old, new[, count]) -> bytes\n\
2115\n\
2116Return a copy of B with all occurrences of subsection\n\
2117old replaced by new. If the optional argument count is\n\
2118given, only the first count occurrences are replaced.");
2119
2120static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00002121bytes_replace(PyByteArrayObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00002122{
2123 Py_ssize_t count = -1;
2124 PyObject *from, *to, *res;
2125 Py_buffer vfrom, vto;
2126
2127 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2128 return NULL;
2129
2130 if (_getbuffer(from, &vfrom) < 0)
2131 return NULL;
2132 if (_getbuffer(to, &vto) < 0) {
2133 PyObject_ReleaseBuffer(from, &vfrom);
2134 return NULL;
2135 }
2136
Christian Heimes3497f942008-05-26 12:29:14 +00002137 res = (PyObject *)replace((PyByteArrayObject *) self,
Christian Heimes1a6387e2008-03-26 12:49:49 +00002138 vfrom.buf, vfrom.len,
2139 vto.buf, vto.len, count);
2140
2141 PyObject_ReleaseBuffer(from, &vfrom);
2142 PyObject_ReleaseBuffer(to, &vto);
2143 return res;
2144}
2145
2146
2147/* Overallocate the initial list to reduce the number of reallocs for small
2148 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2149 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2150 text (roughly 11 words per line) and field delimited data (usually 1-10
2151 fields). For large strings the split algorithms are bandwidth limited
2152 so increasing the preallocation likely will not improve things.*/
2153
2154#define MAX_PREALLOC 12
2155
2156/* 5 splits gives 6 elements */
2157#define PREALLOC_SIZE(maxsplit) \
2158 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2159
2160#define SPLIT_APPEND(data, left, right) \
Christian Heimes3497f942008-05-26 12:29:14 +00002161 str = PyByteArray_FromStringAndSize((data) + (left), \
Christian Heimes1a6387e2008-03-26 12:49:49 +00002162 (right) - (left)); \
2163 if (str == NULL) \
2164 goto onError; \
2165 if (PyList_Append(list, str)) { \
2166 Py_DECREF(str); \
2167 goto onError; \
2168 } \
2169 else \
2170 Py_DECREF(str);
2171
2172#define SPLIT_ADD(data, left, right) { \
Christian Heimes3497f942008-05-26 12:29:14 +00002173 str = PyByteArray_FromStringAndSize((data) + (left), \
Christian Heimes1a6387e2008-03-26 12:49:49 +00002174 (right) - (left)); \
2175 if (str == NULL) \
2176 goto onError; \
2177 if (count < MAX_PREALLOC) { \
2178 PyList_SET_ITEM(list, count, str); \
2179 } else { \
2180 if (PyList_Append(list, str)) { \
2181 Py_DECREF(str); \
2182 goto onError; \
2183 } \
2184 else \
2185 Py_DECREF(str); \
2186 } \
2187 count++; }
2188
2189/* Always force the list to the expected size. */
2190#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
2191
2192
2193Py_LOCAL_INLINE(PyObject *)
2194split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2195{
2196 register Py_ssize_t i, j, count = 0;
2197 PyObject *str;
2198 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2199
2200 if (list == NULL)
2201 return NULL;
2202
2203 i = j = 0;
2204 while ((j < len) && (maxcount-- > 0)) {
2205 for(; j < len; j++) {
2206 /* I found that using memchr makes no difference */
2207 if (s[j] == ch) {
2208 SPLIT_ADD(s, i, j);
2209 i = j = j + 1;
2210 break;
2211 }
2212 }
2213 }
2214 if (i <= len) {
2215 SPLIT_ADD(s, i, len);
2216 }
2217 FIX_PREALLOC_SIZE(list);
2218 return list;
2219
2220 onError:
2221 Py_DECREF(list);
2222 return NULL;
2223}
2224
2225
2226Py_LOCAL_INLINE(PyObject *)
2227split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2228{
2229 register Py_ssize_t i, j, count = 0;
2230 PyObject *str;
2231 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2232
2233 if (list == NULL)
2234 return NULL;
2235
2236 for (i = j = 0; i < len; ) {
2237 /* find a token */
2238 while (i < len && ISSPACE(s[i]))
2239 i++;
2240 j = i;
2241 while (i < len && !ISSPACE(s[i]))
2242 i++;
2243 if (j < i) {
2244 if (maxcount-- <= 0)
2245 break;
2246 SPLIT_ADD(s, j, i);
2247 while (i < len && ISSPACE(s[i]))
2248 i++;
2249 j = i;
2250 }
2251 }
2252 if (j < len) {
2253 SPLIT_ADD(s, j, len);
2254 }
2255 FIX_PREALLOC_SIZE(list);
2256 return list;
2257
2258 onError:
2259 Py_DECREF(list);
2260 return NULL;
2261}
2262
2263PyDoc_STRVAR(split__doc__,
2264"B.split([sep[, maxsplit]]) -> list of bytearray\n\
2265\n\
2266Return a list of the sections in B, using sep as the delimiter.\n\
2267If sep is not given, B is split on ASCII whitespace characters\n\
2268(space, tab, return, newline, formfeed, vertical tab).\n\
2269If maxsplit is given, at most maxsplit splits are done.");
2270
2271static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00002272bytes_split(PyByteArrayObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00002273{
Christian Heimes3497f942008-05-26 12:29:14 +00002274 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
Christian Heimes1a6387e2008-03-26 12:49:49 +00002275 Py_ssize_t maxsplit = -1, count = 0;
Christian Heimes3497f942008-05-26 12:29:14 +00002276 const char *s = PyByteArray_AS_STRING(self), *sub;
Christian Heimes1a6387e2008-03-26 12:49:49 +00002277 PyObject *list, *str, *subobj = Py_None;
2278 Py_buffer vsub;
2279#ifdef USE_FAST
2280 Py_ssize_t pos;
2281#endif
2282
2283 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
2284 return NULL;
2285 if (maxsplit < 0)
2286 maxsplit = PY_SSIZE_T_MAX;
2287
2288 if (subobj == Py_None)
2289 return split_whitespace(s, len, maxsplit);
2290
2291 if (_getbuffer(subobj, &vsub) < 0)
2292 return NULL;
2293 sub = vsub.buf;
2294 n = vsub.len;
2295
2296 if (n == 0) {
2297 PyErr_SetString(PyExc_ValueError, "empty separator");
2298 PyObject_ReleaseBuffer(subobj, &vsub);
2299 return NULL;
2300 }
2301 if (n == 1)
2302 return split_char(s, len, sub[0], maxsplit);
2303
2304 list = PyList_New(PREALLOC_SIZE(maxsplit));
2305 if (list == NULL) {
2306 PyObject_ReleaseBuffer(subobj, &vsub);
2307 return NULL;
2308 }
2309
2310#ifdef USE_FAST
2311 i = j = 0;
2312 while (maxsplit-- > 0) {
2313 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2314 if (pos < 0)
2315 break;
2316 j = i+pos;
2317 SPLIT_ADD(s, i, j);
2318 i = j + n;
2319 }
2320#else
2321 i = j = 0;
2322 while ((j+n <= len) && (maxsplit-- > 0)) {
2323 for (; j+n <= len; j++) {
2324 if (Py_STRING_MATCH(s, j, sub, n)) {
2325 SPLIT_ADD(s, i, j);
2326 i = j = j + n;
2327 break;
2328 }
2329 }
2330 }
2331#endif
2332 SPLIT_ADD(s, i, len);
2333 FIX_PREALLOC_SIZE(list);
2334 PyObject_ReleaseBuffer(subobj, &vsub);
2335 return list;
2336
2337 onError:
2338 Py_DECREF(list);
2339 PyObject_ReleaseBuffer(subobj, &vsub);
2340 return NULL;
2341}
2342
2343/* stringlib's partition shares nullbytes in some cases.
2344 undo this, we don't want the nullbytes to be shared. */
2345static PyObject *
2346make_nullbytes_unique(PyObject *result)
2347{
2348 if (result != NULL) {
2349 int i;
2350 assert(PyTuple_Check(result));
2351 assert(PyTuple_GET_SIZE(result) == 3);
2352 for (i = 0; i < 3; i++) {
2353 if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) {
Christian Heimes3497f942008-05-26 12:29:14 +00002354 PyObject *new = PyByteArray_FromStringAndSize(NULL, 0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00002355 if (new == NULL) {
2356 Py_DECREF(result);
2357 result = NULL;
2358 break;
2359 }
2360 Py_DECREF(nullbytes);
2361 PyTuple_SET_ITEM(result, i, new);
2362 }
2363 }
2364 }
2365 return result;
2366}
2367
2368PyDoc_STRVAR(partition__doc__,
2369"B.partition(sep) -> (head, sep, tail)\n\
2370\n\
2371Searches for the separator sep in B, and returns the part before it,\n\
2372the separator itself, and the part after it. If the separator is not\n\
2373found, returns B and two empty bytearray objects.");
2374
2375static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00002376bytes_partition(PyByteArrayObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00002377{
2378 PyObject *bytesep, *result;
2379
Christian Heimes3497f942008-05-26 12:29:14 +00002380 bytesep = PyByteArray_FromObject(sep_obj);
Christian Heimes1a6387e2008-03-26 12:49:49 +00002381 if (! bytesep)
2382 return NULL;
2383
2384 result = stringlib_partition(
2385 (PyObject*) self,
Christian Heimes3497f942008-05-26 12:29:14 +00002386 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
Christian Heimes1a6387e2008-03-26 12:49:49 +00002387 bytesep,
Christian Heimes3497f942008-05-26 12:29:14 +00002388 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
Christian Heimes1a6387e2008-03-26 12:49:49 +00002389 );
2390
2391 Py_DECREF(bytesep);
2392 return make_nullbytes_unique(result);
2393}
2394
2395PyDoc_STRVAR(rpartition__doc__,
2396"B.rpartition(sep) -> (tail, sep, head)\n\
2397\n\
2398Searches for the separator sep in B, starting at the end of B,\n\
2399and returns the part before it, the separator itself, and the\n\
2400part after it. If the separator is not found, returns two empty\n\
2401bytearray objects and B.");
2402
2403static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00002404bytes_rpartition(PyByteArrayObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00002405{
2406 PyObject *bytesep, *result;
2407
Christian Heimes3497f942008-05-26 12:29:14 +00002408 bytesep = PyByteArray_FromObject(sep_obj);
Christian Heimes1a6387e2008-03-26 12:49:49 +00002409 if (! bytesep)
2410 return NULL;
2411
2412 result = stringlib_rpartition(
2413 (PyObject*) self,
Christian Heimes3497f942008-05-26 12:29:14 +00002414 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
Christian Heimes1a6387e2008-03-26 12:49:49 +00002415 bytesep,
Christian Heimes3497f942008-05-26 12:29:14 +00002416 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
Christian Heimes1a6387e2008-03-26 12:49:49 +00002417 );
2418
2419 Py_DECREF(bytesep);
2420 return make_nullbytes_unique(result);
2421}
2422
2423Py_LOCAL_INLINE(PyObject *)
2424rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2425{
2426 register Py_ssize_t i, j, count=0;
2427 PyObject *str;
2428 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2429
2430 if (list == NULL)
2431 return NULL;
2432
2433 i = j = len - 1;
2434 while ((i >= 0) && (maxcount-- > 0)) {
2435 for (; i >= 0; i--) {
2436 if (s[i] == ch) {
2437 SPLIT_ADD(s, i + 1, j + 1);
2438 j = i = i - 1;
2439 break;
2440 }
2441 }
2442 }
2443 if (j >= -1) {
2444 SPLIT_ADD(s, 0, j + 1);
2445 }
2446 FIX_PREALLOC_SIZE(list);
2447 if (PyList_Reverse(list) < 0)
2448 goto onError;
2449
2450 return list;
2451
2452 onError:
2453 Py_DECREF(list);
2454 return NULL;
2455}
2456
2457Py_LOCAL_INLINE(PyObject *)
2458rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2459{
2460 register Py_ssize_t i, j, count = 0;
2461 PyObject *str;
2462 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2463
2464 if (list == NULL)
2465 return NULL;
2466
2467 for (i = j = len - 1; i >= 0; ) {
2468 /* find a token */
2469 while (i >= 0 && ISSPACE(s[i]))
2470 i--;
2471 j = i;
2472 while (i >= 0 && !ISSPACE(s[i]))
2473 i--;
2474 if (j > i) {
2475 if (maxcount-- <= 0)
2476 break;
2477 SPLIT_ADD(s, i + 1, j + 1);
2478 while (i >= 0 && ISSPACE(s[i]))
2479 i--;
2480 j = i;
2481 }
2482 }
2483 if (j >= 0) {
2484 SPLIT_ADD(s, 0, j + 1);
2485 }
2486 FIX_PREALLOC_SIZE(list);
2487 if (PyList_Reverse(list) < 0)
2488 goto onError;
2489
2490 return list;
2491
2492 onError:
2493 Py_DECREF(list);
2494 return NULL;
2495}
2496
2497PyDoc_STRVAR(rsplit__doc__,
2498"B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2499\n\
2500Return a list of the sections in B, using sep as the delimiter,\n\
2501starting at the end of B and working to the front.\n\
2502If sep is not given, B is split on ASCII whitespace characters\n\
2503(space, tab, return, newline, formfeed, vertical tab).\n\
2504If maxsplit is given, at most maxsplit splits are done.");
2505
2506static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00002507bytes_rsplit(PyByteArrayObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00002508{
Christian Heimes3497f942008-05-26 12:29:14 +00002509 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
Christian Heimes1a6387e2008-03-26 12:49:49 +00002510 Py_ssize_t maxsplit = -1, count = 0;
Christian Heimes3497f942008-05-26 12:29:14 +00002511 const char *s = PyByteArray_AS_STRING(self), *sub;
Christian Heimes1a6387e2008-03-26 12:49:49 +00002512 PyObject *list, *str, *subobj = Py_None;
2513 Py_buffer vsub;
2514
2515 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
2516 return NULL;
2517 if (maxsplit < 0)
2518 maxsplit = PY_SSIZE_T_MAX;
2519
2520 if (subobj == Py_None)
2521 return rsplit_whitespace(s, len, maxsplit);
2522
2523 if (_getbuffer(subobj, &vsub) < 0)
2524 return NULL;
2525 sub = vsub.buf;
2526 n = vsub.len;
2527
2528 if (n == 0) {
2529 PyErr_SetString(PyExc_ValueError, "empty separator");
2530 PyObject_ReleaseBuffer(subobj, &vsub);
2531 return NULL;
2532 }
2533 else if (n == 1)
2534 return rsplit_char(s, len, sub[0], maxsplit);
2535
2536 list = PyList_New(PREALLOC_SIZE(maxsplit));
2537 if (list == NULL) {
2538 PyObject_ReleaseBuffer(subobj, &vsub);
2539 return NULL;
2540 }
2541
2542 j = len;
2543 i = j - n;
2544
2545 while ( (i >= 0) && (maxsplit-- > 0) ) {
2546 for (; i>=0; i--) {
2547 if (Py_STRING_MATCH(s, i, sub, n)) {
2548 SPLIT_ADD(s, i + n, j);
2549 j = i;
2550 i -= n;
2551 break;
2552 }
2553 }
2554 }
2555 SPLIT_ADD(s, 0, j);
2556 FIX_PREALLOC_SIZE(list);
2557 if (PyList_Reverse(list) < 0)
2558 goto onError;
2559 PyObject_ReleaseBuffer(subobj, &vsub);
2560 return list;
2561
2562onError:
2563 Py_DECREF(list);
2564 PyObject_ReleaseBuffer(subobj, &vsub);
2565 return NULL;
2566}
2567
2568PyDoc_STRVAR(reverse__doc__,
2569"B.reverse() -> None\n\
2570\n\
2571Reverse the order of the values in B in place.");
2572static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00002573bytes_reverse(PyByteArrayObject *self, PyObject *unused)
Christian Heimes1a6387e2008-03-26 12:49:49 +00002574{
2575 char swap, *head, *tail;
2576 Py_ssize_t i, j, n = Py_SIZE(self);
2577
2578 j = n / 2;
2579 head = self->ob_bytes;
2580 tail = head + n - 1;
2581 for (i = 0; i < j; i++) {
2582 swap = *head;
2583 *head++ = *tail;
2584 *tail-- = swap;
2585 }
2586
2587 Py_RETURN_NONE;
2588}
2589
2590PyDoc_STRVAR(insert__doc__,
2591"B.insert(index, int) -> None\n\
2592\n\
2593Insert a single item into the bytearray before the given index.");
2594static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00002595bytes_insert(PyByteArrayObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00002596{
2597 int value;
2598 Py_ssize_t where, n = Py_SIZE(self);
2599
2600 if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
2601 return NULL;
2602
2603 if (n == PY_SSIZE_T_MAX) {
2604 PyErr_SetString(PyExc_OverflowError,
2605 "cannot add more objects to bytes");
2606 return NULL;
2607 }
2608 if (value < 0 || value >= 256) {
2609 PyErr_SetString(PyExc_ValueError,
2610 "byte must be in range(0, 256)");
2611 return NULL;
2612 }
Christian Heimes3497f942008-05-26 12:29:14 +00002613 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
Christian Heimes1a6387e2008-03-26 12:49:49 +00002614 return NULL;
2615
2616 if (where < 0) {
2617 where += n;
2618 if (where < 0)
2619 where = 0;
2620 }
2621 if (where > n)
2622 where = n;
2623 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
2624 self->ob_bytes[where] = value;
2625
2626 Py_RETURN_NONE;
2627}
2628
2629PyDoc_STRVAR(append__doc__,
2630"B.append(int) -> None\n\
2631\n\
2632Append a single item to the end of B.");
2633static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00002634bytes_append(PyByteArrayObject *self, PyObject *arg)
Christian Heimes1a6387e2008-03-26 12:49:49 +00002635{
2636 int value;
2637 Py_ssize_t n = Py_SIZE(self);
2638
2639 if (! _getbytevalue(arg, &value))
2640 return NULL;
2641 if (n == PY_SSIZE_T_MAX) {
2642 PyErr_SetString(PyExc_OverflowError,
2643 "cannot add more objects to bytes");
2644 return NULL;
2645 }
Christian Heimes3497f942008-05-26 12:29:14 +00002646 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
Christian Heimes1a6387e2008-03-26 12:49:49 +00002647 return NULL;
2648
2649 self->ob_bytes[n] = value;
2650
2651 Py_RETURN_NONE;
2652}
2653
2654PyDoc_STRVAR(extend__doc__,
2655"B.extend(iterable int) -> None\n\
2656\n\
2657Append all the elements from the iterator or sequence to the\n\
2658end of B.");
2659static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00002660bytes_extend(PyByteArrayObject *self, PyObject *arg)
Christian Heimes1a6387e2008-03-26 12:49:49 +00002661{
Alexandre Vassalottibcdc4682008-04-14 22:40:08 +00002662 PyObject *it, *item, *bytes_obj;
Christian Heimes1a6387e2008-03-26 12:49:49 +00002663 Py_ssize_t buf_size = 0, len = 0;
2664 int value;
2665 char *buf;
2666
2667 /* bytes_setslice code only accepts something supporting PEP 3118. */
2668 if (PyObject_CheckBuffer(arg)) {
2669 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), arg) == -1)
2670 return NULL;
2671
2672 Py_RETURN_NONE;
2673 }
2674
2675 it = PyObject_GetIter(arg);
2676 if (it == NULL)
2677 return NULL;
2678
2679 /* Try to determine the length of the argument. 32 is abitrary. */
2680 buf_size = _PyObject_LengthHint(arg, 32);
2681
Christian Heimes3497f942008-05-26 12:29:14 +00002682 bytes_obj = PyByteArray_FromStringAndSize(NULL, buf_size);
Alexandre Vassalottibcdc4682008-04-14 22:40:08 +00002683 if (bytes_obj == NULL)
2684 return NULL;
Christian Heimes3497f942008-05-26 12:29:14 +00002685 buf = PyByteArray_AS_STRING(bytes_obj);
Christian Heimes1a6387e2008-03-26 12:49:49 +00002686
2687 while ((item = PyIter_Next(it)) != NULL) {
2688 if (! _getbytevalue(item, &value)) {
2689 Py_DECREF(item);
2690 Py_DECREF(it);
Alexandre Vassalottibcdc4682008-04-14 22:40:08 +00002691 Py_DECREF(bytes_obj);
Christian Heimes1a6387e2008-03-26 12:49:49 +00002692 return NULL;
2693 }
2694 buf[len++] = value;
2695 Py_DECREF(item);
Alexandre Vassalottibcdc4682008-04-14 22:40:08 +00002696
Christian Heimes1a6387e2008-03-26 12:49:49 +00002697 if (len >= buf_size) {
2698 buf_size = len + (len >> 1) + 1;
Christian Heimes3497f942008-05-26 12:29:14 +00002699 if (PyByteArray_Resize((PyObject *)bytes_obj, buf_size) < 0) {
Christian Heimes1a6387e2008-03-26 12:49:49 +00002700 Py_DECREF(it);
Alexandre Vassalottibcdc4682008-04-14 22:40:08 +00002701 Py_DECREF(bytes_obj);
2702 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00002703 }
Alexandre Vassalottibcdc4682008-04-14 22:40:08 +00002704 /* Recompute the `buf' pointer, since the resizing operation may
2705 have invalidated it. */
Christian Heimes3497f942008-05-26 12:29:14 +00002706 buf = PyByteArray_AS_STRING(bytes_obj);
Christian Heimes1a6387e2008-03-26 12:49:49 +00002707 }
2708 }
2709 Py_DECREF(it);
2710
Alexandre Vassalottibcdc4682008-04-14 22:40:08 +00002711 /* Resize down to exact size. */
Christian Heimes3497f942008-05-26 12:29:14 +00002712 if (PyByteArray_Resize((PyObject *)bytes_obj, len) < 0) {
Alexandre Vassalottibcdc4682008-04-14 22:40:08 +00002713 Py_DECREF(bytes_obj);
2714 return NULL;
2715 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00002716
Alexandre Vassalottibcdc4682008-04-14 22:40:08 +00002717 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), bytes_obj) == -1)
2718 return NULL;
2719 Py_DECREF(bytes_obj);
2720
2721 Py_RETURN_NONE;
Christian Heimes1a6387e2008-03-26 12:49:49 +00002722}
2723
2724PyDoc_STRVAR(pop__doc__,
2725"B.pop([index]) -> int\n\
2726\n\
2727Remove and return a single item from B. If no index\n\
2728argument is give, will pop the last value.");
2729static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00002730bytes_pop(PyByteArrayObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00002731{
2732 int value;
2733 Py_ssize_t where = -1, n = Py_SIZE(self);
2734
2735 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2736 return NULL;
2737
2738 if (n == 0) {
2739 PyErr_SetString(PyExc_OverflowError,
2740 "cannot pop an empty bytes");
2741 return NULL;
2742 }
2743 if (where < 0)
2744 where += Py_SIZE(self);
2745 if (where < 0 || where >= Py_SIZE(self)) {
2746 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2747 return NULL;
2748 }
2749
2750 value = self->ob_bytes[where];
2751 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
Christian Heimes3497f942008-05-26 12:29:14 +00002752 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
Christian Heimes1a6387e2008-03-26 12:49:49 +00002753 return NULL;
2754
2755 return PyInt_FromLong(value);
2756}
2757
2758PyDoc_STRVAR(remove__doc__,
2759"B.remove(int) -> None\n\
2760\n\
2761Remove the first occurance of a value in B.");
2762static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00002763bytes_remove(PyByteArrayObject *self, PyObject *arg)
Christian Heimes1a6387e2008-03-26 12:49:49 +00002764{
2765 int value;
2766 Py_ssize_t where, n = Py_SIZE(self);
2767
2768 if (! _getbytevalue(arg, &value))
2769 return NULL;
2770
2771 for (where = 0; where < n; where++) {
2772 if (self->ob_bytes[where] == value)
2773 break;
2774 }
2775 if (where == n) {
2776 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2777 return NULL;
2778 }
2779
2780 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
Christian Heimes3497f942008-05-26 12:29:14 +00002781 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
Christian Heimes1a6387e2008-03-26 12:49:49 +00002782 return NULL;
2783
2784 Py_RETURN_NONE;
2785}
2786
2787/* XXX These two helpers could be optimized if argsize == 1 */
2788
2789static Py_ssize_t
2790lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2791 void *argptr, Py_ssize_t argsize)
2792{
2793 Py_ssize_t i = 0;
2794 while (i < mysize && memchr(argptr, myptr[i], argsize))
2795 i++;
2796 return i;
2797}
2798
2799static Py_ssize_t
2800rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2801 void *argptr, Py_ssize_t argsize)
2802{
2803 Py_ssize_t i = mysize - 1;
2804 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2805 i--;
2806 return i + 1;
2807}
2808
2809PyDoc_STRVAR(strip__doc__,
2810"B.strip([bytes]) -> bytearray\n\
2811\n\
2812Strip leading and trailing bytes contained in the argument.\n\
2813If the argument is omitted, strip ASCII whitespace.");
2814static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00002815bytes_strip(PyByteArrayObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00002816{
2817 Py_ssize_t left, right, mysize, argsize;
2818 void *myptr, *argptr;
2819 PyObject *arg = Py_None;
2820 Py_buffer varg;
2821 if (!PyArg_ParseTuple(args, "|O:strip", &arg))
2822 return NULL;
2823 if (arg == Py_None) {
2824 argptr = "\t\n\r\f\v ";
2825 argsize = 6;
2826 }
2827 else {
2828 if (_getbuffer(arg, &varg) < 0)
2829 return NULL;
2830 argptr = varg.buf;
2831 argsize = varg.len;
2832 }
2833 myptr = self->ob_bytes;
2834 mysize = Py_SIZE(self);
2835 left = lstrip_helper(myptr, mysize, argptr, argsize);
2836 if (left == mysize)
2837 right = left;
2838 else
2839 right = rstrip_helper(myptr, mysize, argptr, argsize);
2840 if (arg != Py_None)
2841 PyObject_ReleaseBuffer(arg, &varg);
Christian Heimes3497f942008-05-26 12:29:14 +00002842 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
Christian Heimes1a6387e2008-03-26 12:49:49 +00002843}
2844
2845PyDoc_STRVAR(lstrip__doc__,
2846"B.lstrip([bytes]) -> bytearray\n\
2847\n\
2848Strip leading bytes contained in the argument.\n\
2849If the argument is omitted, strip leading ASCII whitespace.");
2850static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00002851bytes_lstrip(PyByteArrayObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00002852{
2853 Py_ssize_t left, right, mysize, argsize;
2854 void *myptr, *argptr;
2855 PyObject *arg = Py_None;
2856 Py_buffer varg;
2857 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
2858 return NULL;
2859 if (arg == Py_None) {
2860 argptr = "\t\n\r\f\v ";
2861 argsize = 6;
2862 }
2863 else {
2864 if (_getbuffer(arg, &varg) < 0)
2865 return NULL;
2866 argptr = varg.buf;
2867 argsize = varg.len;
2868 }
2869 myptr = self->ob_bytes;
2870 mysize = Py_SIZE(self);
2871 left = lstrip_helper(myptr, mysize, argptr, argsize);
2872 right = mysize;
2873 if (arg != Py_None)
2874 PyObject_ReleaseBuffer(arg, &varg);
Christian Heimes3497f942008-05-26 12:29:14 +00002875 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
Christian Heimes1a6387e2008-03-26 12:49:49 +00002876}
2877
2878PyDoc_STRVAR(rstrip__doc__,
2879"B.rstrip([bytes]) -> bytearray\n\
2880\n\
2881Strip trailing bytes contained in the argument.\n\
2882If the argument is omitted, strip trailing ASCII whitespace.");
2883static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00002884bytes_rstrip(PyByteArrayObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00002885{
2886 Py_ssize_t left, right, mysize, argsize;
2887 void *myptr, *argptr;
2888 PyObject *arg = Py_None;
2889 Py_buffer varg;
2890 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
2891 return NULL;
2892 if (arg == Py_None) {
2893 argptr = "\t\n\r\f\v ";
2894 argsize = 6;
2895 }
2896 else {
2897 if (_getbuffer(arg, &varg) < 0)
2898 return NULL;
2899 argptr = varg.buf;
2900 argsize = varg.len;
2901 }
2902 myptr = self->ob_bytes;
2903 mysize = Py_SIZE(self);
2904 left = 0;
2905 right = rstrip_helper(myptr, mysize, argptr, argsize);
2906 if (arg != Py_None)
2907 PyObject_ReleaseBuffer(arg, &varg);
Christian Heimes3497f942008-05-26 12:29:14 +00002908 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
Christian Heimes1a6387e2008-03-26 12:49:49 +00002909}
2910
2911PyDoc_STRVAR(decode_doc,
2912"B.decode([encoding[, errors]]) -> unicode object.\n\
2913\n\
2914Decodes B using the codec registered for encoding. encoding defaults\n\
2915to the default encoding. errors may be given to set a different error\n\
2916handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2917a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2918as well as any other name registered with codecs.register_error that is\n\
2919able to handle UnicodeDecodeErrors.");
2920
2921static PyObject *
2922bytes_decode(PyObject *self, PyObject *args)
2923{
2924 const char *encoding = NULL;
2925 const char *errors = NULL;
2926
2927 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2928 return NULL;
2929 if (encoding == NULL)
2930 encoding = PyUnicode_GetDefaultEncoding();
2931 return PyCodec_Decode(self, encoding, errors);
2932}
2933
2934PyDoc_STRVAR(alloc_doc,
2935"B.__alloc__() -> int\n\
2936\n\
2937Returns the number of bytes actually allocated.");
2938
2939static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00002940bytes_alloc(PyByteArrayObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00002941{
2942 return PyInt_FromSsize_t(self->ob_alloc);
2943}
2944
2945PyDoc_STRVAR(join_doc,
2946"B.join(iterable_of_bytes) -> bytes\n\
2947\n\
2948Concatenates any number of bytearray objects, with B in between each pair.");
2949
2950static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00002951bytes_join(PyByteArrayObject *self, PyObject *it)
Christian Heimes1a6387e2008-03-26 12:49:49 +00002952{
2953 PyObject *seq;
2954 Py_ssize_t mysize = Py_SIZE(self);
2955 Py_ssize_t i;
2956 Py_ssize_t n;
2957 PyObject **items;
2958 Py_ssize_t totalsize = 0;
2959 PyObject *result;
2960 char *dest;
2961
2962 seq = PySequence_Fast(it, "can only join an iterable");
2963 if (seq == NULL)
2964 return NULL;
2965 n = PySequence_Fast_GET_SIZE(seq);
2966 items = PySequence_Fast_ITEMS(seq);
2967
2968 /* Compute the total size, and check that they are all bytes */
2969 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2970 for (i = 0; i < n; i++) {
2971 PyObject *obj = items[i];
Christian Heimes3497f942008-05-26 12:29:14 +00002972 if (!PyByteArray_Check(obj) && !PyString_Check(obj)) {
Christian Heimes1a6387e2008-03-26 12:49:49 +00002973 PyErr_Format(PyExc_TypeError,
2974 "can only join an iterable of bytes "
2975 "(item %ld has type '%.100s')",
2976 /* XXX %ld isn't right on Win64 */
2977 (long)i, Py_TYPE(obj)->tp_name);
2978 goto error;
2979 }
2980 if (i > 0)
2981 totalsize += mysize;
2982 totalsize += Py_SIZE(obj);
2983 if (totalsize < 0) {
2984 PyErr_NoMemory();
2985 goto error;
2986 }
2987 }
2988
2989 /* Allocate the result, and copy the bytes */
Christian Heimes3497f942008-05-26 12:29:14 +00002990 result = PyByteArray_FromStringAndSize(NULL, totalsize);
Christian Heimes1a6387e2008-03-26 12:49:49 +00002991 if (result == NULL)
2992 goto error;
Christian Heimes3497f942008-05-26 12:29:14 +00002993 dest = PyByteArray_AS_STRING(result);
Christian Heimes1a6387e2008-03-26 12:49:49 +00002994 for (i = 0; i < n; i++) {
2995 PyObject *obj = items[i];
2996 Py_ssize_t size = Py_SIZE(obj);
2997 char *buf;
Christian Heimes3497f942008-05-26 12:29:14 +00002998 if (PyByteArray_Check(obj))
2999 buf = PyByteArray_AS_STRING(obj);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003000 else
3001 buf = PyString_AS_STRING(obj);
3002 if (i) {
3003 memcpy(dest, self->ob_bytes, mysize);
3004 dest += mysize;
3005 }
3006 memcpy(dest, buf, size);
3007 dest += size;
3008 }
3009
3010 /* Done */
3011 Py_DECREF(seq);
3012 return result;
3013
3014 /* Error handling */
3015 error:
3016 Py_DECREF(seq);
3017 return NULL;
3018}
3019
3020PyDoc_STRVAR(fromhex_doc,
3021"bytearray.fromhex(string) -> bytearray\n\
3022\n\
3023Create a bytearray object from a string of hexadecimal numbers.\n\
3024Spaces between two numbers are accepted.\n\
3025Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
3026
3027static int
3028hex_digit_to_int(Py_UNICODE c)
3029{
3030 if (c >= 128)
3031 return -1;
3032 if (ISDIGIT(c))
3033 return c - '0';
3034 else {
3035 if (ISUPPER(c))
3036 c = TOLOWER(c);
3037 if (c >= 'a' && c <= 'f')
3038 return c - 'a' + 10;
3039 }
3040 return -1;
3041}
3042
3043static PyObject *
3044bytes_fromhex(PyObject *cls, PyObject *args)
3045{
3046 PyObject *newbytes, *hexobj;
3047 char *buf;
3048 Py_UNICODE *hex;
3049 Py_ssize_t hexlen, byteslen, i, j;
3050 int top, bot;
3051
3052 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
3053 return NULL;
3054 assert(PyUnicode_Check(hexobj));
3055 hexlen = PyUnicode_GET_SIZE(hexobj);
3056 hex = PyUnicode_AS_UNICODE(hexobj);
3057 byteslen = hexlen/2; /* This overestimates if there are spaces */
Christian Heimes3497f942008-05-26 12:29:14 +00003058 newbytes = PyByteArray_FromStringAndSize(NULL, byteslen);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003059 if (!newbytes)
3060 return NULL;
Christian Heimes3497f942008-05-26 12:29:14 +00003061 buf = PyByteArray_AS_STRING(newbytes);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003062 for (i = j = 0; i < hexlen; i += 2) {
3063 /* skip over spaces in the input */
3064 while (hex[i] == ' ')
3065 i++;
3066 if (i >= hexlen)
3067 break;
3068 top = hex_digit_to_int(hex[i]);
3069 bot = hex_digit_to_int(hex[i+1]);
3070 if (top == -1 || bot == -1) {
3071 PyErr_Format(PyExc_ValueError,
3072 "non-hexadecimal number found in "
3073 "fromhex() arg at position %zd", i);
3074 goto error;
3075 }
3076 buf[j++] = (top << 4) + bot;
3077 }
Christian Heimes3497f942008-05-26 12:29:14 +00003078 if (PyByteArray_Resize(newbytes, j) < 0)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003079 goto error;
3080 return newbytes;
3081
3082 error:
3083 Py_DECREF(newbytes);
3084 return NULL;
3085}
3086
3087PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3088
3089static PyObject *
Christian Heimes3497f942008-05-26 12:29:14 +00003090bytes_reduce(PyByteArrayObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003091{
3092 PyObject *latin1, *dict;
3093 if (self->ob_bytes)
3094 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
3095 Py_SIZE(self), NULL);
3096 else
3097 latin1 = PyUnicode_FromString("");
3098
3099 dict = PyObject_GetAttrString((PyObject *)self, "__dict__");
3100 if (dict == NULL) {
3101 PyErr_Clear();
3102 dict = Py_None;
3103 Py_INCREF(dict);
3104 }
3105
3106 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self), latin1, "latin-1", dict);
3107}
3108
3109static PySequenceMethods bytes_as_sequence = {
3110 (lenfunc)bytes_length, /* sq_length */
Christian Heimes3497f942008-05-26 12:29:14 +00003111 (binaryfunc)PyByteArray_Concat, /* sq_concat */
Christian Heimes1a6387e2008-03-26 12:49:49 +00003112 (ssizeargfunc)bytes_repeat, /* sq_repeat */
3113 (ssizeargfunc)bytes_getitem, /* sq_item */
3114 0, /* sq_slice */
3115 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
3116 0, /* sq_ass_slice */
3117 (objobjproc)bytes_contains, /* sq_contains */
3118 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
3119 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
3120};
3121
3122static PyMappingMethods bytes_as_mapping = {
3123 (lenfunc)bytes_length,
3124 (binaryfunc)bytes_subscript,
3125 (objobjargproc)bytes_ass_subscript,
3126};
3127
3128static PyBufferProcs bytes_as_buffer = {
3129 (readbufferproc)bytes_buffer_getreadbuf,
3130 (writebufferproc)bytes_buffer_getwritebuf,
3131 (segcountproc)bytes_buffer_getsegcount,
3132 (charbufferproc)bytes_buffer_getcharbuf,
3133 (getbufferproc)bytes_getbuffer,
3134 (releasebufferproc)bytes_releasebuffer,
3135};
3136
3137static PyMethodDef
3138bytes_methods[] = {
3139 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
3140 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
3141 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
3142 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3143 _Py_capitalize__doc__},
3144 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3145 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
3146 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
3147 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
3148 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
3149 expandtabs__doc__},
3150 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
3151 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
3152 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
3153 fromhex_doc},
3154 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3155 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
3156 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3157 _Py_isalnum__doc__},
3158 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3159 _Py_isalpha__doc__},
3160 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3161 _Py_isdigit__doc__},
3162 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3163 _Py_islower__doc__},
3164 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3165 _Py_isspace__doc__},
3166 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3167 _Py_istitle__doc__},
3168 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3169 _Py_isupper__doc__},
3170 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
3171 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3172 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
3173 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
3174 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
3175 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
3176 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
3177 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
3178 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
3179 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3180 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3181 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
3182 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
3183 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
3184 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
3185 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
3186 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
3187 splitlines__doc__},
3188 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS ,
3189 startswith__doc__},
3190 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
3191 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3192 _Py_swapcase__doc__},
3193 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
3194 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
3195 translate__doc__},
3196 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3197 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
3198 {NULL}
3199};
3200
3201PyDoc_STRVAR(bytes_doc,
3202"bytearray(iterable_of_ints) -> bytearray.\n\
3203bytearray(string, encoding[, errors]) -> bytearray.\n\
3204bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
3205bytearray(memory_view) -> bytearray.\n\
3206\n\
3207Construct an mutable bytearray object from:\n\
3208 - an iterable yielding integers in range(256)\n\
3209 - a text string encoded using the specified encoding\n\
3210 - a bytes or a bytearray object\n\
3211 - any object implementing the buffer API.\n\
3212\n\
3213bytearray(int) -> bytearray.\n\
3214\n\
3215Construct a zero-initialized bytearray of the given length.");
3216
3217
3218static PyObject *bytes_iter(PyObject *seq);
3219
Christian Heimes3497f942008-05-26 12:29:14 +00003220PyTypeObject PyByteArray_Type = {
Christian Heimes1a6387e2008-03-26 12:49:49 +00003221 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3222 "bytearray",
Christian Heimes3497f942008-05-26 12:29:14 +00003223 sizeof(PyByteArrayObject),
Christian Heimes1a6387e2008-03-26 12:49:49 +00003224 0,
3225 (destructor)bytes_dealloc, /* tp_dealloc */
3226 0, /* tp_print */
3227 0, /* tp_getattr */
3228 0, /* tp_setattr */
3229 0, /* tp_compare */
3230 (reprfunc)bytes_repr, /* tp_repr */
3231 0, /* tp_as_number */
3232 &bytes_as_sequence, /* tp_as_sequence */
3233 &bytes_as_mapping, /* tp_as_mapping */
3234 0, /* tp_hash */
3235 0, /* tp_call */
3236 bytes_str, /* tp_str */
3237 PyObject_GenericGetAttr, /* tp_getattro */
3238 0, /* tp_setattro */
3239 &bytes_as_buffer, /* tp_as_buffer */
3240 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3241 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3242 bytes_doc, /* tp_doc */
3243 0, /* tp_traverse */
3244 0, /* tp_clear */
3245 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3246 0, /* tp_weaklistoffset */
3247 bytes_iter, /* tp_iter */
3248 0, /* tp_iternext */
3249 bytes_methods, /* tp_methods */
3250 0, /* tp_members */
3251 0, /* tp_getset */
3252 0, /* tp_base */
3253 0, /* tp_dict */
3254 0, /* tp_descr_get */
3255 0, /* tp_descr_set */
3256 0, /* tp_dictoffset */
3257 (initproc)bytes_init, /* tp_init */
3258 PyType_GenericAlloc, /* tp_alloc */
3259 PyType_GenericNew, /* tp_new */
3260 PyObject_Del, /* tp_free */
3261};
3262
3263/*********************** Bytes Iterator ****************************/
3264
3265typedef struct {
3266 PyObject_HEAD
3267 Py_ssize_t it_index;
Christian Heimes3497f942008-05-26 12:29:14 +00003268 PyByteArrayObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes1a6387e2008-03-26 12:49:49 +00003269} bytesiterobject;
3270
3271static void
3272bytesiter_dealloc(bytesiterobject *it)
3273{
3274 _PyObject_GC_UNTRACK(it);
3275 Py_XDECREF(it->it_seq);
3276 PyObject_GC_Del(it);
3277}
3278
3279static int
3280bytesiter_traverse(bytesiterobject *it, visitproc visit, void *arg)
3281{
3282 Py_VISIT(it->it_seq);
3283 return 0;
3284}
3285
3286static PyObject *
3287bytesiter_next(bytesiterobject *it)
3288{
Christian Heimes3497f942008-05-26 12:29:14 +00003289 PyByteArrayObject *seq;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003290 PyObject *item;
3291
3292 assert(it != NULL);
3293 seq = it->it_seq;
3294 if (seq == NULL)
3295 return NULL;
Christian Heimes3497f942008-05-26 12:29:14 +00003296 assert(PyByteArray_Check(seq));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003297
Christian Heimes3497f942008-05-26 12:29:14 +00003298 if (it->it_index < PyByteArray_GET_SIZE(seq)) {
Christian Heimes1a6387e2008-03-26 12:49:49 +00003299 item = PyInt_FromLong(
3300 (unsigned char)seq->ob_bytes[it->it_index]);
3301 if (item != NULL)
3302 ++it->it_index;
3303 return item;
3304 }
3305
3306 Py_DECREF(seq);
3307 it->it_seq = NULL;
3308 return NULL;
3309}
3310
3311static PyObject *
3312bytesiter_length_hint(bytesiterobject *it)
3313{
3314 Py_ssize_t len = 0;
3315 if (it->it_seq)
Christian Heimes3497f942008-05-26 12:29:14 +00003316 len = PyByteArray_GET_SIZE(it->it_seq) - it->it_index;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003317 return PyInt_FromSsize_t(len);
3318}
3319
3320PyDoc_STRVAR(length_hint_doc,
3321 "Private method returning an estimate of len(list(it)).");
3322
3323static PyMethodDef bytesiter_methods[] = {
3324 {"__length_hint__", (PyCFunction)bytesiter_length_hint, METH_NOARGS,
3325 length_hint_doc},
3326 {NULL, NULL} /* sentinel */
3327};
3328
Christian Heimes3497f942008-05-26 12:29:14 +00003329PyTypeObject PyByteArrayIter_Type = {
Christian Heimes1a6387e2008-03-26 12:49:49 +00003330 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3331 "bytearray_iterator", /* tp_name */
3332 sizeof(bytesiterobject), /* tp_basicsize */
3333 0, /* tp_itemsize */
3334 /* methods */
3335 (destructor)bytesiter_dealloc, /* tp_dealloc */
3336 0, /* tp_print */
3337 0, /* tp_getattr */
3338 0, /* tp_setattr */
3339 0, /* tp_compare */
3340 0, /* tp_repr */
3341 0, /* tp_as_number */
3342 0, /* tp_as_sequence */
3343 0, /* tp_as_mapping */
3344 0, /* tp_hash */
3345 0, /* tp_call */
3346 0, /* tp_str */
3347 PyObject_GenericGetAttr, /* tp_getattro */
3348 0, /* tp_setattro */
3349 0, /* tp_as_buffer */
3350 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3351 0, /* tp_doc */
3352 (traverseproc)bytesiter_traverse, /* tp_traverse */
3353 0, /* tp_clear */
3354 0, /* tp_richcompare */
3355 0, /* tp_weaklistoffset */
3356 PyObject_SelfIter, /* tp_iter */
3357 (iternextfunc)bytesiter_next, /* tp_iternext */
3358 bytesiter_methods, /* tp_methods */
3359 0,
3360};
3361
3362static PyObject *
3363bytes_iter(PyObject *seq)
3364{
3365 bytesiterobject *it;
3366
Christian Heimes3497f942008-05-26 12:29:14 +00003367 if (!PyByteArray_Check(seq)) {
Christian Heimes1a6387e2008-03-26 12:49:49 +00003368 PyErr_BadInternalCall();
3369 return NULL;
3370 }
Christian Heimes3497f942008-05-26 12:29:14 +00003371 it = PyObject_GC_New(bytesiterobject, &PyByteArrayIter_Type);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003372 if (it == NULL)
3373 return NULL;
3374 it->it_index = 0;
3375 Py_INCREF(seq);
Christian Heimes3497f942008-05-26 12:29:14 +00003376 it->it_seq = (PyByteArrayObject *)seq;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003377 _PyObject_GC_TRACK(it);
3378 return (PyObject *)it;
3379}