blob: b1f696227fe7bae133a0ebd06cc6f3e8ceffb4da [file] [log] [blame]
Christian Heimes44720832008-05-26 13:01:01 +00001/* PyBytes (bytearray) implementation */
2
3#define PY_SSIZE_T_CLEAN
4#include "Python.h"
5#include "structmember.h"
6#include "bytes_methods.h"
7
8static PyByteArrayObject *nullbytes = NULL;
9
10void
11PyByteArray_Fini(void)
12{
13 Py_CLEAR(nullbytes);
14}
15
16int
17PyByteArray_Init(void)
18{
19 nullbytes = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
20 if (nullbytes == NULL)
21 return 0;
22 nullbytes->ob_bytes = NULL;
23 Py_SIZE(nullbytes) = nullbytes->ob_alloc = 0;
24 nullbytes->ob_exports = 0;
25 return 1;
26}
27
28/* end nullbytes support */
29
30/* Helpers */
31
32static int
33_getbytevalue(PyObject* arg, int *value)
34{
35 long face_value;
36
Georg Brandl3e483f62008-07-16 22:57:41 +000037 if (PyBytes_CheckExact(arg)) {
Christian Heimes44720832008-05-26 13:01:01 +000038 if (Py_SIZE(arg) != 1) {
39 PyErr_SetString(PyExc_ValueError, "string must be of size 1");
40 return 0;
41 }
Georg Brandl3e483f62008-07-16 22:57:41 +000042 *value = Py_CHARMASK(((PyBytesObject*)arg)->ob_sval[0]);
43 return 1;
44 }
45 else if (PyInt_Check(arg) || PyLong_Check(arg)) {
46 face_value = PyLong_AsLong(arg);
Christian Heimes44720832008-05-26 13:01:01 +000047 }
48 else {
Georg Brandl3e483f62008-07-16 22:57:41 +000049 PyObject *index = PyNumber_Index(arg);
50 if (index == NULL) {
51 PyErr_Format(PyExc_TypeError,
52 "an integer or string of size 1 is required");
53 return 0;
54 }
55 face_value = PyLong_AsLong(index);
56 Py_DECREF(index);
57 }
58 if (face_value == -1 && PyErr_Occurred())
59 return 0;
60
61 if (face_value < 0 || face_value >= 256) {
62 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
Christian Heimes44720832008-05-26 13:01:01 +000063 return 0;
64 }
65
66 *value = face_value;
67 return 1;
68}
69
70static Py_ssize_t
71bytes_buffer_getreadbuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
72{
73 if ( index != 0 ) {
74 PyErr_SetString(PyExc_SystemError,
75 "accessing non-existent bytes segment");
76 return -1;
77 }
78 *ptr = (void *)self->ob_bytes;
79 return Py_SIZE(self);
80}
81
82static Py_ssize_t
83bytes_buffer_getwritebuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
84{
85 if ( index != 0 ) {
86 PyErr_SetString(PyExc_SystemError,
87 "accessing non-existent bytes segment");
88 return -1;
89 }
90 *ptr = (void *)self->ob_bytes;
91 return Py_SIZE(self);
92}
93
94static Py_ssize_t
95bytes_buffer_getsegcount(PyByteArrayObject *self, Py_ssize_t *lenp)
96{
97 if ( lenp )
98 *lenp = Py_SIZE(self);
99 return 1;
100}
101
102static Py_ssize_t
103bytes_buffer_getcharbuf(PyByteArrayObject *self, Py_ssize_t index, const char **ptr)
104{
105 if ( index != 0 ) {
106 PyErr_SetString(PyExc_SystemError,
107 "accessing non-existent bytes segment");
108 return -1;
109 }
110 *ptr = self->ob_bytes;
111 return Py_SIZE(self);
112}
113
114static int
115bytes_getbuffer(PyByteArrayObject *obj, Py_buffer *view, int flags)
116{
117 int ret;
118 void *ptr;
119 if (view == NULL) {
120 obj->ob_exports++;
121 return 0;
122 }
123 if (obj->ob_bytes == NULL)
124 ptr = "";
125 else
126 ptr = obj->ob_bytes;
127 ret = PyBuffer_FillInfo(view, ptr, Py_SIZE(obj), 0, flags);
128 if (ret >= 0) {
129 obj->ob_exports++;
130 }
131 return ret;
132}
133
134static void
135bytes_releasebuffer(PyByteArrayObject *obj, Py_buffer *view)
136{
137 obj->ob_exports--;
138}
139
140static Py_ssize_t
141_getbuffer(PyObject *obj, Py_buffer *view)
142{
143 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
144
145 if (buffer == NULL || buffer->bf_getbuffer == NULL)
146 {
147 PyErr_Format(PyExc_TypeError,
148 "Type %.100s doesn't support the buffer API",
149 Py_TYPE(obj)->tp_name);
150 return -1;
151 }
152
153 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
154 return -1;
155 return view->len;
156}
157
158/* Direct API functions */
159
160PyObject *
161PyByteArray_FromObject(PyObject *input)
162{
163 return PyObject_CallFunctionObjArgs((PyObject *)&PyByteArray_Type,
164 input, NULL);
165}
166
167PyObject *
168PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
169{
170 PyByteArrayObject *new;
171 Py_ssize_t alloc;
172
173 if (size < 0) {
174 PyErr_SetString(PyExc_SystemError,
175 "Negative size passed to PyByteArray_FromStringAndSize");
176 return NULL;
177 }
178
179 new = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
180 if (new == NULL)
181 return NULL;
182
183 if (size == 0) {
184 new->ob_bytes = NULL;
185 alloc = 0;
186 }
187 else {
188 alloc = size + 1;
189 new->ob_bytes = PyMem_Malloc(alloc);
190 if (new->ob_bytes == NULL) {
191 Py_DECREF(new);
192 return PyErr_NoMemory();
193 }
194 if (bytes != NULL)
195 memcpy(new->ob_bytes, bytes, size);
196 new->ob_bytes[size] = '\0'; /* Trailing null byte */
197 }
198 Py_SIZE(new) = size;
199 new->ob_alloc = alloc;
200 new->ob_exports = 0;
201
202 return (PyObject *)new;
203}
204
205Py_ssize_t
206PyByteArray_Size(PyObject *self)
207{
208 assert(self != NULL);
209 assert(PyByteArray_Check(self));
210
211 return PyByteArray_GET_SIZE(self);
212}
213
214char *
215PyByteArray_AsString(PyObject *self)
216{
217 assert(self != NULL);
218 assert(PyByteArray_Check(self));
219
220 return PyByteArray_AS_STRING(self);
221}
222
223int
224PyByteArray_Resize(PyObject *self, Py_ssize_t size)
225{
226 void *sval;
227 Py_ssize_t alloc = ((PyByteArrayObject *)self)->ob_alloc;
228
229 assert(self != NULL);
230 assert(PyByteArray_Check(self));
231 assert(size >= 0);
232
233 if (size < alloc / 2) {
234 /* Major downsize; resize down to exact size */
235 alloc = size + 1;
236 }
237 else if (size < alloc) {
238 /* Within allocated size; quick exit */
239 Py_SIZE(self) = size;
240 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
241 return 0;
242 }
243 else if (size <= alloc * 1.125) {
244 /* Moderate upsize; overallocate similar to list_resize() */
245 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
246 }
247 else {
248 /* Major upsize; resize up to exact size */
249 alloc = size + 1;
250 }
251
252 if (((PyByteArrayObject *)self)->ob_exports > 0) {
253 /*
254 fprintf(stderr, "%d: %s", ((PyByteArrayObject *)self)->ob_exports,
255 ((PyByteArrayObject *)self)->ob_bytes);
256 */
257 PyErr_SetString(PyExc_BufferError,
258 "Existing exports of data: object cannot be re-sized");
259 return -1;
260 }
261
262 sval = PyMem_Realloc(((PyByteArrayObject *)self)->ob_bytes, alloc);
263 if (sval == NULL) {
264 PyErr_NoMemory();
265 return -1;
266 }
267
268 ((PyByteArrayObject *)self)->ob_bytes = sval;
269 Py_SIZE(self) = size;
270 ((PyByteArrayObject *)self)->ob_alloc = alloc;
271 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
272
273 return 0;
274}
275
276PyObject *
277PyByteArray_Concat(PyObject *a, PyObject *b)
278{
279 Py_ssize_t size;
280 Py_buffer va, vb;
281 PyByteArrayObject *result = NULL;
282
283 va.len = -1;
284 vb.len = -1;
285 if (_getbuffer(a, &va) < 0 ||
286 _getbuffer(b, &vb) < 0) {
287 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
288 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
289 goto done;
290 }
291
292 size = va.len + vb.len;
293 if (size < 0) {
294 return PyErr_NoMemory();
295 goto done;
296 }
297
298 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, size);
299 if (result != NULL) {
300 memcpy(result->ob_bytes, va.buf, va.len);
301 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
302 }
303
304 done:
305 if (va.len != -1)
306 PyObject_ReleaseBuffer(a, &va);
307 if (vb.len != -1)
308 PyObject_ReleaseBuffer(b, &vb);
309 return (PyObject *)result;
310}
311
312/* Functions stuffed into the type object */
313
314static Py_ssize_t
315bytes_length(PyByteArrayObject *self)
316{
317 return Py_SIZE(self);
318}
319
320static PyObject *
321bytes_iconcat(PyByteArrayObject *self, PyObject *other)
322{
323 Py_ssize_t mysize;
324 Py_ssize_t size;
325 Py_buffer vo;
326
327 if (_getbuffer(other, &vo) < 0) {
328 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
329 Py_TYPE(other)->tp_name, Py_TYPE(self)->tp_name);
330 return NULL;
331 }
332
333 mysize = Py_SIZE(self);
334 size = mysize + vo.len;
335 if (size < 0) {
336 PyObject_ReleaseBuffer(other, &vo);
337 return PyErr_NoMemory();
338 }
339 if (size < self->ob_alloc) {
340 Py_SIZE(self) = size;
341 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
342 }
343 else if (PyByteArray_Resize((PyObject *)self, size) < 0) {
344 PyObject_ReleaseBuffer(other, &vo);
345 return NULL;
346 }
347 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
348 PyObject_ReleaseBuffer(other, &vo);
349 Py_INCREF(self);
350 return (PyObject *)self;
351}
352
353static PyObject *
354bytes_repeat(PyByteArrayObject *self, Py_ssize_t count)
355{
356 PyByteArrayObject *result;
357 Py_ssize_t mysize;
358 Py_ssize_t size;
359
360 if (count < 0)
361 count = 0;
362 mysize = Py_SIZE(self);
363 size = mysize * count;
364 if (count != 0 && size / count != mysize)
365 return PyErr_NoMemory();
366 result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size);
367 if (result != NULL && size != 0) {
368 if (mysize == 1)
369 memset(result->ob_bytes, self->ob_bytes[0], size);
370 else {
371 Py_ssize_t i;
372 for (i = 0; i < count; i++)
373 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
374 }
375 }
376 return (PyObject *)result;
377}
378
379static PyObject *
380bytes_irepeat(PyByteArrayObject *self, Py_ssize_t count)
381{
382 Py_ssize_t mysize;
383 Py_ssize_t size;
384
385 if (count < 0)
386 count = 0;
387 mysize = Py_SIZE(self);
388 size = mysize * count;
389 if (count != 0 && size / count != mysize)
390 return PyErr_NoMemory();
391 if (size < self->ob_alloc) {
392 Py_SIZE(self) = size;
393 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
394 }
395 else if (PyByteArray_Resize((PyObject *)self, size) < 0)
396 return NULL;
397
398 if (mysize == 1)
399 memset(self->ob_bytes, self->ob_bytes[0], size);
400 else {
401 Py_ssize_t i;
402 for (i = 1; i < count; i++)
403 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
404 }
405
406 Py_INCREF(self);
407 return (PyObject *)self;
408}
409
410static PyObject *
411bytes_getitem(PyByteArrayObject *self, Py_ssize_t i)
412{
413 if (i < 0)
414 i += Py_SIZE(self);
415 if (i < 0 || i >= Py_SIZE(self)) {
416 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
417 return NULL;
418 }
419 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
420}
421
422static PyObject *
Georg Brandl3e483f62008-07-16 22:57:41 +0000423bytes_subscript(PyByteArrayObject *self, PyObject *index)
Christian Heimes44720832008-05-26 13:01:01 +0000424{
Georg Brandl3e483f62008-07-16 22:57:41 +0000425 if (PyIndex_Check(index)) {
426 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
Christian Heimes44720832008-05-26 13:01:01 +0000427
428 if (i == -1 && PyErr_Occurred())
429 return NULL;
430
431 if (i < 0)
432 i += PyByteArray_GET_SIZE(self);
433
434 if (i < 0 || i >= Py_SIZE(self)) {
435 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
436 return NULL;
437 }
438 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
439 }
Georg Brandl3e483f62008-07-16 22:57:41 +0000440 else if (PySlice_Check(index)) {
Christian Heimes44720832008-05-26 13:01:01 +0000441 Py_ssize_t start, stop, step, slicelength, cur, i;
Georg Brandl3e483f62008-07-16 22:57:41 +0000442 if (PySlice_GetIndicesEx((PySliceObject *)index,
Christian Heimes44720832008-05-26 13:01:01 +0000443 PyByteArray_GET_SIZE(self),
444 &start, &stop, &step, &slicelength) < 0) {
445 return NULL;
446 }
447
448 if (slicelength <= 0)
449 return PyByteArray_FromStringAndSize("", 0);
450 else if (step == 1) {
451 return PyByteArray_FromStringAndSize(self->ob_bytes + start,
452 slicelength);
453 }
454 else {
455 char *source_buf = PyByteArray_AS_STRING(self);
456 char *result_buf = (char *)PyMem_Malloc(slicelength);
457 PyObject *result;
458
459 if (result_buf == NULL)
460 return PyErr_NoMemory();
461
462 for (cur = start, i = 0; i < slicelength;
463 cur += step, i++) {
464 result_buf[i] = source_buf[cur];
465 }
466 result = PyByteArray_FromStringAndSize(result_buf, slicelength);
467 PyMem_Free(result_buf);
468 return result;
469 }
470 }
471 else {
472 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integers");
473 return NULL;
474 }
475}
476
477static int
478bytes_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi,
479 PyObject *values)
480{
481 Py_ssize_t avail, needed;
482 void *bytes;
483 Py_buffer vbytes;
484 int res = 0;
485
486 vbytes.len = -1;
487 if (values == (PyObject *)self) {
488 /* Make a copy and call this function recursively */
489 int err;
490 values = PyByteArray_FromObject(values);
491 if (values == NULL)
492 return -1;
493 err = bytes_setslice(self, lo, hi, values);
494 Py_DECREF(values);
495 return err;
496 }
497 if (values == NULL) {
498 /* del b[lo:hi] */
499 bytes = NULL;
500 needed = 0;
501 }
502 else {
503 if (_getbuffer(values, &vbytes) < 0) {
504 PyErr_Format(PyExc_TypeError,
505 "can't set bytes slice from %.100s",
506 Py_TYPE(values)->tp_name);
507 return -1;
508 }
509 needed = vbytes.len;
510 bytes = vbytes.buf;
511 }
512
513 if (lo < 0)
514 lo = 0;
515 if (hi < lo)
516 hi = lo;
517 if (hi > Py_SIZE(self))
518 hi = Py_SIZE(self);
519
520 avail = hi - lo;
521 if (avail < 0)
522 lo = hi = avail = 0;
523
524 if (avail != needed) {
525 if (avail > needed) {
526 /*
527 0 lo hi old_size
528 | |<----avail----->|<-----tomove------>|
529 | |<-needed->|<-----tomove------>|
530 0 lo new_hi new_size
531 */
532 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
533 Py_SIZE(self) - hi);
534 }
535 /* XXX(nnorwitz): need to verify this can't overflow! */
536 if (PyByteArray_Resize((PyObject *)self,
537 Py_SIZE(self) + needed - avail) < 0) {
538 res = -1;
539 goto finish;
540 }
541 if (avail < needed) {
542 /*
543 0 lo hi old_size
544 | |<-avail->|<-----tomove------>|
545 | |<----needed---->|<-----tomove------>|
546 0 lo new_hi new_size
547 */
548 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
549 Py_SIZE(self) - lo - needed);
550 }
551 }
552
553 if (needed > 0)
554 memcpy(self->ob_bytes + lo, bytes, needed);
555
556
557 finish:
558 if (vbytes.len != -1)
559 PyObject_ReleaseBuffer(values, &vbytes);
560 return res;
561}
562
563static int
564bytes_setitem(PyByteArrayObject *self, Py_ssize_t i, PyObject *value)
565{
566 int ival;
567
568 if (i < 0)
569 i += Py_SIZE(self);
570
571 if (i < 0 || i >= Py_SIZE(self)) {
572 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
573 return -1;
574 }
575
576 if (value == NULL)
577 return bytes_setslice(self, i, i+1, NULL);
578
579 if (!_getbytevalue(value, &ival))
580 return -1;
581
582 self->ob_bytes[i] = ival;
583 return 0;
584}
585
586static int
Georg Brandl3e483f62008-07-16 22:57:41 +0000587bytes_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *values)
Christian Heimes44720832008-05-26 13:01:01 +0000588{
589 Py_ssize_t start, stop, step, slicelen, needed;
590 char *bytes;
591
Georg Brandl3e483f62008-07-16 22:57:41 +0000592 if (PyIndex_Check(index)) {
593 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
Christian Heimes44720832008-05-26 13:01:01 +0000594
595 if (i == -1 && PyErr_Occurred())
596 return -1;
597
598 if (i < 0)
599 i += PyByteArray_GET_SIZE(self);
600
601 if (i < 0 || i >= Py_SIZE(self)) {
602 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
603 return -1;
604 }
605
606 if (values == NULL) {
607 /* Fall through to slice assignment */
608 start = i;
609 stop = i + 1;
610 step = 1;
611 slicelen = 1;
612 }
613 else {
Georg Brandl3e483f62008-07-16 22:57:41 +0000614 int ival;
615 if (!_getbytevalue(values, &ival))
Christian Heimes44720832008-05-26 13:01:01 +0000616 return -1;
Christian Heimes44720832008-05-26 13:01:01 +0000617 self->ob_bytes[i] = (char)ival;
618 return 0;
619 }
620 }
Georg Brandl3e483f62008-07-16 22:57:41 +0000621 else if (PySlice_Check(index)) {
622 if (PySlice_GetIndicesEx((PySliceObject *)index,
Christian Heimes44720832008-05-26 13:01:01 +0000623 PyByteArray_GET_SIZE(self),
624 &start, &stop, &step, &slicelen) < 0) {
625 return -1;
626 }
627 }
628 else {
629 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integer");
630 return -1;
631 }
632
633 if (values == NULL) {
634 bytes = NULL;
635 needed = 0;
636 }
637 else if (values == (PyObject *)self || !PyByteArray_Check(values)) {
638 /* Make a copy an call this function recursively */
639 int err;
640 values = PyByteArray_FromObject(values);
641 if (values == NULL)
642 return -1;
Georg Brandl3e483f62008-07-16 22:57:41 +0000643 err = bytes_ass_subscript(self, index, values);
Christian Heimes44720832008-05-26 13:01:01 +0000644 Py_DECREF(values);
645 return err;
646 }
647 else {
648 assert(PyByteArray_Check(values));
649 bytes = ((PyByteArrayObject *)values)->ob_bytes;
650 needed = Py_SIZE(values);
651 }
652 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
653 if ((step < 0 && start < stop) ||
654 (step > 0 && start > stop))
655 stop = start;
656 if (step == 1) {
657 if (slicelen != needed) {
658 if (slicelen > needed) {
659 /*
660 0 start stop old_size
661 | |<---slicelen--->|<-----tomove------>|
662 | |<-needed->|<-----tomove------>|
663 0 lo new_hi new_size
664 */
665 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
666 Py_SIZE(self) - stop);
667 }
668 if (PyByteArray_Resize((PyObject *)self,
669 Py_SIZE(self) + needed - slicelen) < 0)
670 return -1;
671 if (slicelen < needed) {
672 /*
673 0 lo hi old_size
674 | |<-avail->|<-----tomove------>|
675 | |<----needed---->|<-----tomove------>|
676 0 lo new_hi new_size
677 */
678 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
679 Py_SIZE(self) - start - needed);
680 }
681 }
682
683 if (needed > 0)
684 memcpy(self->ob_bytes + start, bytes, needed);
685
686 return 0;
687 }
688 else {
689 if (needed == 0) {
690 /* Delete slice */
691 Py_ssize_t cur, i;
692
693 if (step < 0) {
694 stop = start + 1;
695 start = stop + step * (slicelen - 1) - 1;
696 step = -step;
697 }
698 for (cur = start, i = 0;
699 i < slicelen; cur += step, i++) {
700 Py_ssize_t lim = step - 1;
701
702 if (cur + step >= PyByteArray_GET_SIZE(self))
703 lim = PyByteArray_GET_SIZE(self) - cur - 1;
704
705 memmove(self->ob_bytes + cur - i,
706 self->ob_bytes + cur + 1, lim);
707 }
708 /* Move the tail of the bytes, in one chunk */
709 cur = start + slicelen*step;
710 if (cur < PyByteArray_GET_SIZE(self)) {
711 memmove(self->ob_bytes + cur - slicelen,
712 self->ob_bytes + cur,
713 PyByteArray_GET_SIZE(self) - cur);
714 }
715 if (PyByteArray_Resize((PyObject *)self,
716 PyByteArray_GET_SIZE(self) - slicelen) < 0)
717 return -1;
718
719 return 0;
720 }
721 else {
722 /* Assign slice */
723 Py_ssize_t cur, i;
724
725 if (needed != slicelen) {
726 PyErr_Format(PyExc_ValueError,
727 "attempt to assign bytes of size %zd "
728 "to extended slice of size %zd",
729 needed, slicelen);
730 return -1;
731 }
732 for (cur = start, i = 0; i < slicelen; cur += step, i++)
733 self->ob_bytes[cur] = bytes[i];
734 return 0;
735 }
736 }
737}
738
739static int
740bytes_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
741{
742 static char *kwlist[] = {"source", "encoding", "errors", 0};
743 PyObject *arg = NULL;
744 const char *encoding = NULL;
745 const char *errors = NULL;
746 Py_ssize_t count;
747 PyObject *it;
748 PyObject *(*iternext)(PyObject *);
749
750 if (Py_SIZE(self) != 0) {
751 /* Empty previous contents (yes, do this first of all!) */
752 if (PyByteArray_Resize((PyObject *)self, 0) < 0)
753 return -1;
754 }
755
756 /* Parse arguments */
757 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
758 &arg, &encoding, &errors))
759 return -1;
760
761 /* Make a quick exit if no first argument */
762 if (arg == NULL) {
763 if (encoding != NULL || errors != NULL) {
764 PyErr_SetString(PyExc_TypeError,
765 "encoding or errors without sequence argument");
766 return -1;
767 }
768 return 0;
769 }
770
771 if (PyBytes_Check(arg)) {
772 PyObject *new, *encoded;
773 if (encoding != NULL) {
774 encoded = PyCodec_Encode(arg, encoding, errors);
775 if (encoded == NULL)
776 return -1;
777 assert(PyBytes_Check(encoded));
778 }
779 else {
780 encoded = arg;
781 Py_INCREF(arg);
782 }
783 new = bytes_iconcat(self, arg);
784 Py_DECREF(encoded);
785 if (new == NULL)
786 return -1;
787 Py_DECREF(new);
788 return 0;
789 }
790
791 if (PyUnicode_Check(arg)) {
792 /* Encode via the codec registry */
793 PyObject *encoded, *new;
794 if (encoding == NULL) {
795 PyErr_SetString(PyExc_TypeError,
796 "unicode argument without an encoding");
797 return -1;
798 }
799 encoded = PyCodec_Encode(arg, encoding, errors);
800 if (encoded == NULL)
801 return -1;
802 assert(PyBytes_Check(encoded));
803 new = bytes_iconcat(self, encoded);
804 Py_DECREF(encoded);
805 if (new == NULL)
806 return -1;
807 Py_DECREF(new);
808 return 0;
809 }
810
811 /* If it's not unicode, there can't be encoding or errors */
812 if (encoding != NULL || errors != NULL) {
813 PyErr_SetString(PyExc_TypeError,
814 "encoding or errors without a string argument");
815 return -1;
816 }
817
818 /* Is it an int? */
819 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
820 if (count == -1 && PyErr_Occurred())
821 PyErr_Clear();
822 else {
823 if (count < 0) {
824 PyErr_SetString(PyExc_ValueError, "negative count");
825 return -1;
826 }
827 if (count > 0) {
828 if (PyByteArray_Resize((PyObject *)self, count))
829 return -1;
830 memset(self->ob_bytes, 0, count);
831 }
832 return 0;
833 }
834
835 /* Use the buffer API */
836 if (PyObject_CheckBuffer(arg)) {
837 Py_ssize_t size;
838 Py_buffer view;
839 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
840 return -1;
841 size = view.len;
842 if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail;
843 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
844 goto fail;
845 PyObject_ReleaseBuffer(arg, &view);
846 return 0;
847 fail:
848 PyObject_ReleaseBuffer(arg, &view);
849 return -1;
850 }
851
852 /* XXX Optimize this if the arguments is a list, tuple */
853
854 /* Get the iterator */
855 it = PyObject_GetIter(arg);
856 if (it == NULL)
857 return -1;
858 iternext = *Py_TYPE(it)->tp_iternext;
859
860 /* Run the iterator to exhaustion */
861 for (;;) {
862 PyObject *item;
863 Py_ssize_t value;
864
865 /* Get the next item */
866 item = iternext(it);
867 if (item == NULL) {
868 if (PyErr_Occurred()) {
869 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
870 goto error;
871 PyErr_Clear();
872 }
873 break;
874 }
875
876 /* Interpret it as an int (__index__) */
877 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
878 Py_DECREF(item);
879 if (value == -1 && PyErr_Occurred())
880 goto error;
881
882 /* Range check */
883 if (value < 0 || value >= 256) {
884 PyErr_SetString(PyExc_ValueError,
885 "bytes must be in range(0, 256)");
886 goto error;
887 }
888
889 /* Append the byte */
890 if (Py_SIZE(self) < self->ob_alloc)
891 Py_SIZE(self)++;
892 else if (PyByteArray_Resize((PyObject *)self, Py_SIZE(self)+1) < 0)
893 goto error;
894 self->ob_bytes[Py_SIZE(self)-1] = value;
895 }
896
897 /* Clean up and return success */
898 Py_DECREF(it);
899 return 0;
900
901 error:
902 /* Error handling when it != NULL */
903 Py_DECREF(it);
904 return -1;
905}
906
907/* Mostly copied from string_repr, but without the
908 "smart quote" functionality. */
909static PyObject *
910bytes_repr(PyByteArrayObject *self)
911{
912 static const char *hexdigits = "0123456789abcdef";
913 const char *quote_prefix = "bytearray(b";
914 const char *quote_postfix = ")";
915 Py_ssize_t length = Py_SIZE(self);
916 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
917 size_t newsize = 14 + 4 * length;
918 PyObject *v;
919 if (newsize > PY_SSIZE_T_MAX || newsize / 4 - 3 != length) {
920 PyErr_SetString(PyExc_OverflowError,
921 "bytearray object is too large to make repr");
922 return NULL;
923 }
924 v = PyUnicode_FromUnicode(NULL, newsize);
925 if (v == NULL) {
926 return NULL;
927 }
928 else {
929 register Py_ssize_t i;
930 register Py_UNICODE c;
931 register Py_UNICODE *p;
932 int quote;
933
934 /* Figure out which quote to use; single is preferred */
935 quote = '\'';
936 {
937 char *test, *start;
938 start = PyByteArray_AS_STRING(self);
939 for (test = start; test < start+length; ++test) {
940 if (*test == '"') {
941 quote = '\''; /* back to single */
942 goto decided;
943 }
944 else if (*test == '\'')
945 quote = '"';
946 }
947 decided:
948 ;
949 }
950
951 p = PyUnicode_AS_UNICODE(v);
952 while (*quote_prefix)
953 *p++ = *quote_prefix++;
954 *p++ = quote;
955
956 for (i = 0; i < length; i++) {
957 /* There's at least enough room for a hex escape
958 and a closing quote. */
959 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
960 c = self->ob_bytes[i];
961 if (c == '\'' || c == '\\')
962 *p++ = '\\', *p++ = c;
963 else if (c == '\t')
964 *p++ = '\\', *p++ = 't';
965 else if (c == '\n')
966 *p++ = '\\', *p++ = 'n';
967 else if (c == '\r')
968 *p++ = '\\', *p++ = 'r';
969 else if (c == 0)
970 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
971 else if (c < ' ' || c >= 0x7f) {
972 *p++ = '\\';
973 *p++ = 'x';
974 *p++ = hexdigits[(c & 0xf0) >> 4];
975 *p++ = hexdigits[c & 0xf];
976 }
977 else
978 *p++ = c;
979 }
980 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
981 *p++ = quote;
982 while (*quote_postfix) {
983 *p++ = *quote_postfix++;
984 }
985 *p = '\0';
986 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
987 Py_DECREF(v);
988 return NULL;
989 }
990 return v;
991 }
992}
993
994static PyObject *
995bytes_str(PyObject *op)
996{
997#if 0
998 if (Py_BytesWarningFlag) {
999 if (PyErr_WarnEx(PyExc_BytesWarning,
1000 "str() on a bytearray instance", 1))
1001 return NULL;
1002 }
1003 return bytes_repr((PyByteArrayObject*)op);
1004#endif
1005 return PyBytes_FromStringAndSize(((PyByteArrayObject*)op)->ob_bytes, Py_SIZE(op));
1006}
1007
1008static PyObject *
1009bytes_richcompare(PyObject *self, PyObject *other, int op)
1010{
1011 Py_ssize_t self_size, other_size;
1012 Py_buffer self_bytes, other_bytes;
1013 PyObject *res;
1014 Py_ssize_t minsize;
1015 int cmp;
1016
1017 /* Bytes can be compared to anything that supports the (binary)
1018 buffer API. Except that a comparison with Unicode is always an
1019 error, even if the comparison is for equality. */
1020 if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
1021 PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
1022 if (Py_BytesWarningFlag && op == Py_EQ) {
1023 if (PyErr_WarnEx(PyExc_BytesWarning,
1024 "Comparsion between bytearray and string", 1))
1025 return NULL;
1026 }
1027
1028 Py_INCREF(Py_NotImplemented);
1029 return Py_NotImplemented;
1030 }
1031
1032 self_size = _getbuffer(self, &self_bytes);
1033 if (self_size < 0) {
1034 PyErr_Clear();
1035 Py_INCREF(Py_NotImplemented);
1036 return Py_NotImplemented;
1037 }
1038
1039 other_size = _getbuffer(other, &other_bytes);
1040 if (other_size < 0) {
1041 PyErr_Clear();
1042 PyObject_ReleaseBuffer(self, &self_bytes);
1043 Py_INCREF(Py_NotImplemented);
1044 return Py_NotImplemented;
1045 }
1046
1047 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
1048 /* Shortcut: if the lengths differ, the objects differ */
1049 cmp = (op == Py_NE);
1050 }
1051 else {
1052 minsize = self_size;
1053 if (other_size < minsize)
1054 minsize = other_size;
1055
1056 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
1057 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1058
1059 if (cmp == 0) {
1060 if (self_size < other_size)
1061 cmp = -1;
1062 else if (self_size > other_size)
1063 cmp = 1;
1064 }
1065
1066 switch (op) {
1067 case Py_LT: cmp = cmp < 0; break;
1068 case Py_LE: cmp = cmp <= 0; break;
1069 case Py_EQ: cmp = cmp == 0; break;
1070 case Py_NE: cmp = cmp != 0; break;
1071 case Py_GT: cmp = cmp > 0; break;
1072 case Py_GE: cmp = cmp >= 0; break;
1073 }
1074 }
1075
1076 res = cmp ? Py_True : Py_False;
1077 PyObject_ReleaseBuffer(self, &self_bytes);
1078 PyObject_ReleaseBuffer(other, &other_bytes);
1079 Py_INCREF(res);
1080 return res;
1081}
1082
1083static void
1084bytes_dealloc(PyByteArrayObject *self)
1085{
1086 if (self->ob_bytes != 0) {
1087 PyMem_Free(self->ob_bytes);
1088 }
1089 Py_TYPE(self)->tp_free((PyObject *)self);
1090}
1091
1092
1093/* -------------------------------------------------------------------- */
1094/* Methods */
1095
1096#define STRINGLIB_CHAR char
1097#define STRINGLIB_CMP memcmp
1098#define STRINGLIB_LEN PyByteArray_GET_SIZE
1099#define STRINGLIB_STR PyByteArray_AS_STRING
1100#define STRINGLIB_NEW PyByteArray_FromStringAndSize
1101#define STRINGLIB_EMPTY nullbytes
1102#define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
1103#define STRINGLIB_MUTABLE 1
1104
1105#include "stringlib/fastsearch.h"
1106#include "stringlib/count.h"
1107#include "stringlib/find.h"
1108#include "stringlib/partition.h"
1109#include "stringlib/ctype.h"
1110#include "stringlib/transmogrify.h"
1111
1112
1113/* The following Py_LOCAL_INLINE and Py_LOCAL functions
1114were copied from the old char* style string object. */
1115
1116Py_LOCAL_INLINE(void)
1117_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1118{
1119 if (*end > len)
1120 *end = len;
1121 else if (*end < 0)
1122 *end += len;
1123 if (*end < 0)
1124 *end = 0;
1125 if (*start < 0)
1126 *start += len;
1127 if (*start < 0)
1128 *start = 0;
1129}
1130
1131
1132Py_LOCAL_INLINE(Py_ssize_t)
1133bytes_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
1134{
1135 PyObject *subobj;
1136 Py_buffer subbuf;
1137 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1138 Py_ssize_t res;
1139
1140 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1141 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1142 return -2;
1143 if (_getbuffer(subobj, &subbuf) < 0)
1144 return -2;
1145 if (dir > 0)
1146 res = stringlib_find_slice(
1147 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1148 subbuf.buf, subbuf.len, start, end);
1149 else
1150 res = stringlib_rfind_slice(
1151 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1152 subbuf.buf, subbuf.len, start, end);
1153 PyObject_ReleaseBuffer(subobj, &subbuf);
1154 return res;
1155}
1156
1157PyDoc_STRVAR(find__doc__,
1158"B.find(sub [,start [,end]]) -> int\n\
1159\n\
1160Return the lowest index in B where subsection sub is found,\n\
1161such that sub is contained within s[start,end]. Optional\n\
1162arguments start and end are interpreted as in slice notation.\n\
1163\n\
1164Return -1 on failure.");
1165
1166static PyObject *
1167bytes_find(PyByteArrayObject *self, PyObject *args)
1168{
1169 Py_ssize_t result = bytes_find_internal(self, args, +1);
1170 if (result == -2)
1171 return NULL;
1172 return PyInt_FromSsize_t(result);
1173}
1174
1175PyDoc_STRVAR(count__doc__,
1176"B.count(sub [,start [,end]]) -> int\n\
1177\n\
1178Return the number of non-overlapping occurrences of subsection sub in\n\
1179bytes B[start:end]. Optional arguments start and end are interpreted\n\
1180as in slice notation.");
1181
1182static PyObject *
1183bytes_count(PyByteArrayObject *self, PyObject *args)
1184{
1185 PyObject *sub_obj;
1186 const char *str = PyByteArray_AS_STRING(self);
1187 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1188 Py_buffer vsub;
1189 PyObject *count_obj;
1190
1191 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1192 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1193 return NULL;
1194
1195 if (_getbuffer(sub_obj, &vsub) < 0)
1196 return NULL;
1197
1198 _adjust_indices(&start, &end, PyByteArray_GET_SIZE(self));
1199
1200 count_obj = PyInt_FromSsize_t(
1201 stringlib_count(str + start, end - start, vsub.buf, vsub.len)
1202 );
1203 PyObject_ReleaseBuffer(sub_obj, &vsub);
1204 return count_obj;
1205}
1206
1207
1208PyDoc_STRVAR(index__doc__,
1209"B.index(sub [,start [,end]]) -> int\n\
1210\n\
1211Like B.find() but raise ValueError when the subsection is not found.");
1212
1213static PyObject *
1214bytes_index(PyByteArrayObject *self, PyObject *args)
1215{
1216 Py_ssize_t result = bytes_find_internal(self, args, +1);
1217 if (result == -2)
1218 return NULL;
1219 if (result == -1) {
1220 PyErr_SetString(PyExc_ValueError,
1221 "subsection not found");
1222 return NULL;
1223 }
1224 return PyInt_FromSsize_t(result);
1225}
1226
1227
1228PyDoc_STRVAR(rfind__doc__,
1229"B.rfind(sub [,start [,end]]) -> int\n\
1230\n\
1231Return the highest index in B where subsection sub is found,\n\
1232such that sub is contained within s[start,end]. Optional\n\
1233arguments start and end are interpreted as in slice notation.\n\
1234\n\
1235Return -1 on failure.");
1236
1237static PyObject *
1238bytes_rfind(PyByteArrayObject *self, PyObject *args)
1239{
1240 Py_ssize_t result = bytes_find_internal(self, args, -1);
1241 if (result == -2)
1242 return NULL;
1243 return PyInt_FromSsize_t(result);
1244}
1245
1246
1247PyDoc_STRVAR(rindex__doc__,
1248"B.rindex(sub [,start [,end]]) -> int\n\
1249\n\
1250Like B.rfind() but raise ValueError when the subsection is not found.");
1251
1252static PyObject *
1253bytes_rindex(PyByteArrayObject *self, PyObject *args)
1254{
1255 Py_ssize_t result = bytes_find_internal(self, args, -1);
1256 if (result == -2)
1257 return NULL;
1258 if (result == -1) {
1259 PyErr_SetString(PyExc_ValueError,
1260 "subsection not found");
1261 return NULL;
1262 }
1263 return PyInt_FromSsize_t(result);
1264}
1265
1266
1267static int
1268bytes_contains(PyObject *self, PyObject *arg)
1269{
1270 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1271 if (ival == -1 && PyErr_Occurred()) {
1272 Py_buffer varg;
1273 int pos;
1274 PyErr_Clear();
1275 if (_getbuffer(arg, &varg) < 0)
1276 return -1;
1277 pos = stringlib_find(PyByteArray_AS_STRING(self), Py_SIZE(self),
1278 varg.buf, varg.len, 0);
1279 PyObject_ReleaseBuffer(arg, &varg);
1280 return pos >= 0;
1281 }
1282 if (ival < 0 || ival >= 256) {
1283 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1284 return -1;
1285 }
1286
1287 return memchr(PyByteArray_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
1288}
1289
1290
1291/* Matches the end (direction >= 0) or start (direction < 0) of self
1292 * against substr, using the start and end arguments. Returns
1293 * -1 on error, 0 if not found and 1 if found.
1294 */
1295Py_LOCAL(int)
1296_bytes_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start,
1297 Py_ssize_t end, int direction)
1298{
1299 Py_ssize_t len = PyByteArray_GET_SIZE(self);
1300 const char* str;
1301 Py_buffer vsubstr;
1302 int rv = 0;
1303
1304 str = PyByteArray_AS_STRING(self);
1305
1306 if (_getbuffer(substr, &vsubstr) < 0)
1307 return -1;
1308
1309 _adjust_indices(&start, &end, len);
1310
1311 if (direction < 0) {
1312 /* startswith */
1313 if (start+vsubstr.len > len) {
1314 goto done;
1315 }
1316 } else {
1317 /* endswith */
1318 if (end-start < vsubstr.len || start > len) {
1319 goto done;
1320 }
1321
1322 if (end-vsubstr.len > start)
1323 start = end - vsubstr.len;
1324 }
1325 if (end-start >= vsubstr.len)
1326 rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1327
1328done:
1329 PyObject_ReleaseBuffer(substr, &vsubstr);
1330 return rv;
1331}
1332
1333
1334PyDoc_STRVAR(startswith__doc__,
1335"B.startswith(prefix [,start [,end]]) -> bool\n\
1336\n\
1337Return True if B starts with the specified prefix, False otherwise.\n\
1338With optional start, test B beginning at that position.\n\
1339With optional end, stop comparing B at that position.\n\
1340prefix can also be a tuple of strings to try.");
1341
1342static PyObject *
1343bytes_startswith(PyByteArrayObject *self, PyObject *args)
1344{
1345 Py_ssize_t start = 0;
1346 Py_ssize_t end = PY_SSIZE_T_MAX;
1347 PyObject *subobj;
1348 int result;
1349
1350 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1351 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1352 return NULL;
1353 if (PyTuple_Check(subobj)) {
1354 Py_ssize_t i;
1355 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1356 result = _bytes_tailmatch(self,
1357 PyTuple_GET_ITEM(subobj, i),
1358 start, end, -1);
1359 if (result == -1)
1360 return NULL;
1361 else if (result) {
1362 Py_RETURN_TRUE;
1363 }
1364 }
1365 Py_RETURN_FALSE;
1366 }
1367 result = _bytes_tailmatch(self, subobj, start, end, -1);
1368 if (result == -1)
1369 return NULL;
1370 else
1371 return PyBool_FromLong(result);
1372}
1373
1374PyDoc_STRVAR(endswith__doc__,
1375"B.endswith(suffix [,start [,end]]) -> bool\n\
1376\n\
1377Return True if B ends with the specified suffix, False otherwise.\n\
1378With optional start, test B beginning at that position.\n\
1379With optional end, stop comparing B at that position.\n\
1380suffix can also be a tuple of strings to try.");
1381
1382static PyObject *
1383bytes_endswith(PyByteArrayObject *self, PyObject *args)
1384{
1385 Py_ssize_t start = 0;
1386 Py_ssize_t end = PY_SSIZE_T_MAX;
1387 PyObject *subobj;
1388 int result;
1389
1390 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1391 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1392 return NULL;
1393 if (PyTuple_Check(subobj)) {
1394 Py_ssize_t i;
1395 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1396 result = _bytes_tailmatch(self,
1397 PyTuple_GET_ITEM(subobj, i),
1398 start, end, +1);
1399 if (result == -1)
1400 return NULL;
1401 else if (result) {
1402 Py_RETURN_TRUE;
1403 }
1404 }
1405 Py_RETURN_FALSE;
1406 }
1407 result = _bytes_tailmatch(self, subobj, start, end, +1);
1408 if (result == -1)
1409 return NULL;
1410 else
1411 return PyBool_FromLong(result);
1412}
1413
1414
1415PyDoc_STRVAR(translate__doc__,
1416"B.translate(table[, deletechars]) -> bytearray\n\
1417\n\
1418Return a copy of B, where all characters occurring in the\n\
1419optional argument deletechars are removed, and the remaining\n\
1420characters have been mapped through the given translation\n\
1421table, which must be a bytes object of length 256.");
1422
1423static PyObject *
1424bytes_translate(PyByteArrayObject *self, PyObject *args)
1425{
1426 register char *input, *output;
1427 register const char *table;
1428 register Py_ssize_t i, c, changed = 0;
1429 PyObject *input_obj = (PyObject*)self;
1430 const char *output_start;
1431 Py_ssize_t inlen;
1432 PyObject *result;
1433 int trans_table[256];
1434 PyObject *tableobj, *delobj = NULL;
1435 Py_buffer vtable, vdel;
1436
1437 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1438 &tableobj, &delobj))
1439 return NULL;
1440
1441 if (_getbuffer(tableobj, &vtable) < 0)
1442 return NULL;
1443
1444 if (vtable.len != 256) {
1445 PyErr_SetString(PyExc_ValueError,
1446 "translation table must be 256 characters long");
1447 result = NULL;
1448 goto done;
1449 }
1450
1451 if (delobj != NULL) {
1452 if (_getbuffer(delobj, &vdel) < 0) {
1453 result = NULL;
1454 goto done;
1455 }
1456 }
1457 else {
1458 vdel.buf = NULL;
1459 vdel.len = 0;
1460 }
1461
1462 table = (const char *)vtable.buf;
1463 inlen = PyByteArray_GET_SIZE(input_obj);
1464 result = PyByteArray_FromStringAndSize((char *)NULL, inlen);
1465 if (result == NULL)
1466 goto done;
1467 output_start = output = PyByteArray_AsString(result);
1468 input = PyByteArray_AS_STRING(input_obj);
1469
1470 if (vdel.len == 0) {
1471 /* If no deletions are required, use faster code */
1472 for (i = inlen; --i >= 0; ) {
1473 c = Py_CHARMASK(*input++);
1474 if (Py_CHARMASK((*output++ = table[c])) != c)
1475 changed = 1;
1476 }
1477 if (changed || !PyByteArray_CheckExact(input_obj))
1478 goto done;
1479 Py_DECREF(result);
1480 Py_INCREF(input_obj);
1481 result = input_obj;
1482 goto done;
1483 }
1484
1485 for (i = 0; i < 256; i++)
1486 trans_table[i] = Py_CHARMASK(table[i]);
1487
1488 for (i = 0; i < vdel.len; i++)
1489 trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
1490
1491 for (i = inlen; --i >= 0; ) {
1492 c = Py_CHARMASK(*input++);
1493 if (trans_table[c] != -1)
1494 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1495 continue;
1496 changed = 1;
1497 }
1498 if (!changed && PyByteArray_CheckExact(input_obj)) {
1499 Py_DECREF(result);
1500 Py_INCREF(input_obj);
1501 result = input_obj;
1502 goto done;
1503 }
1504 /* Fix the size of the resulting string */
1505 if (inlen > 0)
1506 PyByteArray_Resize(result, output - output_start);
1507
1508done:
1509 PyObject_ReleaseBuffer(tableobj, &vtable);
1510 if (delobj != NULL)
1511 PyObject_ReleaseBuffer(delobj, &vdel);
1512 return result;
1513}
1514
1515
1516#define FORWARD 1
1517#define REVERSE -1
1518
1519/* find and count characters and substrings */
1520
1521#define findchar(target, target_len, c) \
1522 ((char *)memchr((const void *)(target), c, target_len))
1523
1524/* Don't call if length < 2 */
1525#define Py_STRING_MATCH(target, offset, pattern, length) \
1526 (target[offset] == pattern[0] && \
1527 target[offset+length-1] == pattern[length-1] && \
1528 !memcmp(target+offset+1, pattern+1, length-2) )
1529
1530
1531/* Bytes ops must return a string. */
1532/* If the object is subclass of bytes, create a copy */
1533Py_LOCAL(PyByteArrayObject *)
1534return_self(PyByteArrayObject *self)
1535{
1536 if (PyByteArray_CheckExact(self)) {
1537 Py_INCREF(self);
1538 return (PyByteArrayObject *)self;
1539 }
1540 return (PyByteArrayObject *)PyByteArray_FromStringAndSize(
1541 PyByteArray_AS_STRING(self),
1542 PyByteArray_GET_SIZE(self));
1543}
1544
1545Py_LOCAL_INLINE(Py_ssize_t)
1546countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
1547{
1548 Py_ssize_t count=0;
1549 const char *start=target;
1550 const char *end=target+target_len;
1551
1552 while ( (start=findchar(start, end-start, c)) != NULL ) {
1553 count++;
1554 if (count >= maxcount)
1555 break;
1556 start += 1;
1557 }
1558 return count;
1559}
1560
1561Py_LOCAL(Py_ssize_t)
1562findstring(const char *target, Py_ssize_t target_len,
1563 const char *pattern, Py_ssize_t pattern_len,
1564 Py_ssize_t start,
1565 Py_ssize_t end,
1566 int direction)
1567{
1568 if (start < 0) {
1569 start += target_len;
1570 if (start < 0)
1571 start = 0;
1572 }
1573 if (end > target_len) {
1574 end = target_len;
1575 } else if (end < 0) {
1576 end += target_len;
1577 if (end < 0)
1578 end = 0;
1579 }
1580
1581 /* zero-length substrings always match at the first attempt */
1582 if (pattern_len == 0)
1583 return (direction > 0) ? start : end;
1584
1585 end -= pattern_len;
1586
1587 if (direction < 0) {
1588 for (; end >= start; end--)
1589 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1590 return end;
1591 } else {
1592 for (; start <= end; start++)
1593 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1594 return start;
1595 }
1596 return -1;
1597}
1598
1599Py_LOCAL_INLINE(Py_ssize_t)
1600countstring(const char *target, Py_ssize_t target_len,
1601 const char *pattern, Py_ssize_t pattern_len,
1602 Py_ssize_t start,
1603 Py_ssize_t end,
1604 int direction, Py_ssize_t maxcount)
1605{
1606 Py_ssize_t count=0;
1607
1608 if (start < 0) {
1609 start += target_len;
1610 if (start < 0)
1611 start = 0;
1612 }
1613 if (end > target_len) {
1614 end = target_len;
1615 } else if (end < 0) {
1616 end += target_len;
1617 if (end < 0)
1618 end = 0;
1619 }
1620
1621 /* zero-length substrings match everywhere */
1622 if (pattern_len == 0 || maxcount == 0) {
1623 if (target_len+1 < maxcount)
1624 return target_len+1;
1625 return maxcount;
1626 }
1627
1628 end -= pattern_len;
1629 if (direction < 0) {
1630 for (; (end >= start); end--)
1631 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1632 count++;
1633 if (--maxcount <= 0) break;
1634 end -= pattern_len-1;
1635 }
1636 } else {
1637 for (; (start <= end); start++)
1638 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1639 count++;
1640 if (--maxcount <= 0)
1641 break;
1642 start += pattern_len-1;
1643 }
1644 }
1645 return count;
1646}
1647
1648
1649/* Algorithms for different cases of string replacement */
1650
1651/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1652Py_LOCAL(PyByteArrayObject *)
1653replace_interleave(PyByteArrayObject *self,
1654 const char *to_s, Py_ssize_t to_len,
1655 Py_ssize_t maxcount)
1656{
1657 char *self_s, *result_s;
1658 Py_ssize_t self_len, result_len;
1659 Py_ssize_t count, i, product;
1660 PyByteArrayObject *result;
1661
1662 self_len = PyByteArray_GET_SIZE(self);
1663
1664 /* 1 at the end plus 1 after every character */
1665 count = self_len+1;
1666 if (maxcount < count)
1667 count = maxcount;
1668
1669 /* Check for overflow */
1670 /* result_len = count * to_len + self_len; */
1671 product = count * to_len;
1672 if (product / to_len != count) {
1673 PyErr_SetString(PyExc_OverflowError,
1674 "replace string is too long");
1675 return NULL;
1676 }
1677 result_len = product + self_len;
1678 if (result_len < 0) {
1679 PyErr_SetString(PyExc_OverflowError,
1680 "replace string is too long");
1681 return NULL;
1682 }
1683
1684 if (! (result = (PyByteArrayObject *)
1685 PyByteArray_FromStringAndSize(NULL, result_len)) )
1686 return NULL;
1687
1688 self_s = PyByteArray_AS_STRING(self);
1689 result_s = PyByteArray_AS_STRING(result);
1690
1691 /* TODO: special case single character, which doesn't need memcpy */
1692
1693 /* Lay the first one down (guaranteed this will occur) */
1694 Py_MEMCPY(result_s, to_s, to_len);
1695 result_s += to_len;
1696 count -= 1;
1697
1698 for (i=0; i<count; i++) {
1699 *result_s++ = *self_s++;
1700 Py_MEMCPY(result_s, to_s, to_len);
1701 result_s += to_len;
1702 }
1703
1704 /* Copy the rest of the original string */
1705 Py_MEMCPY(result_s, self_s, self_len-i);
1706
1707 return result;
1708}
1709
1710/* Special case for deleting a single character */
1711/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1712Py_LOCAL(PyByteArrayObject *)
1713replace_delete_single_character(PyByteArrayObject *self,
1714 char from_c, Py_ssize_t maxcount)
1715{
1716 char *self_s, *result_s;
1717 char *start, *next, *end;
1718 Py_ssize_t self_len, result_len;
1719 Py_ssize_t count;
1720 PyByteArrayObject *result;
1721
1722 self_len = PyByteArray_GET_SIZE(self);
1723 self_s = PyByteArray_AS_STRING(self);
1724
1725 count = countchar(self_s, self_len, from_c, maxcount);
1726 if (count == 0) {
1727 return return_self(self);
1728 }
1729
1730 result_len = self_len - count; /* from_len == 1 */
1731 assert(result_len>=0);
1732
1733 if ( (result = (PyByteArrayObject *)
1734 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1735 return NULL;
1736 result_s = PyByteArray_AS_STRING(result);
1737
1738 start = self_s;
1739 end = self_s + self_len;
1740 while (count-- > 0) {
1741 next = findchar(start, end-start, from_c);
1742 if (next == NULL)
1743 break;
1744 Py_MEMCPY(result_s, start, next-start);
1745 result_s += (next-start);
1746 start = next+1;
1747 }
1748 Py_MEMCPY(result_s, start, end-start);
1749
1750 return result;
1751}
1752
1753/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1754
1755Py_LOCAL(PyByteArrayObject *)
1756replace_delete_substring(PyByteArrayObject *self,
1757 const char *from_s, Py_ssize_t from_len,
1758 Py_ssize_t maxcount)
1759{
1760 char *self_s, *result_s;
1761 char *start, *next, *end;
1762 Py_ssize_t self_len, result_len;
1763 Py_ssize_t count, offset;
1764 PyByteArrayObject *result;
1765
1766 self_len = PyByteArray_GET_SIZE(self);
1767 self_s = PyByteArray_AS_STRING(self);
1768
1769 count = countstring(self_s, self_len,
1770 from_s, from_len,
1771 0, self_len, 1,
1772 maxcount);
1773
1774 if (count == 0) {
1775 /* no matches */
1776 return return_self(self);
1777 }
1778
1779 result_len = self_len - (count * from_len);
1780 assert (result_len>=0);
1781
1782 if ( (result = (PyByteArrayObject *)
1783 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL )
1784 return NULL;
1785
1786 result_s = PyByteArray_AS_STRING(result);
1787
1788 start = self_s;
1789 end = self_s + self_len;
1790 while (count-- > 0) {
1791 offset = findstring(start, end-start,
1792 from_s, from_len,
1793 0, end-start, FORWARD);
1794 if (offset == -1)
1795 break;
1796 next = start + offset;
1797
1798 Py_MEMCPY(result_s, start, next-start);
1799
1800 result_s += (next-start);
1801 start = next+from_len;
1802 }
1803 Py_MEMCPY(result_s, start, end-start);
1804 return result;
1805}
1806
1807/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1808Py_LOCAL(PyByteArrayObject *)
1809replace_single_character_in_place(PyByteArrayObject *self,
1810 char from_c, char to_c,
1811 Py_ssize_t maxcount)
1812{
1813 char *self_s, *result_s, *start, *end, *next;
1814 Py_ssize_t self_len;
1815 PyByteArrayObject *result;
1816
1817 /* The result string will be the same size */
1818 self_s = PyByteArray_AS_STRING(self);
1819 self_len = PyByteArray_GET_SIZE(self);
1820
1821 next = findchar(self_s, self_len, from_c);
1822
1823 if (next == NULL) {
1824 /* No matches; return the original bytes */
1825 return return_self(self);
1826 }
1827
1828 /* Need to make a new bytes */
1829 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1830 if (result == NULL)
1831 return NULL;
1832 result_s = PyByteArray_AS_STRING(result);
1833 Py_MEMCPY(result_s, self_s, self_len);
1834
1835 /* change everything in-place, starting with this one */
1836 start = result_s + (next-self_s);
1837 *start = to_c;
1838 start++;
1839 end = result_s + self_len;
1840
1841 while (--maxcount > 0) {
1842 next = findchar(start, end-start, from_c);
1843 if (next == NULL)
1844 break;
1845 *next = to_c;
1846 start = next+1;
1847 }
1848
1849 return result;
1850}
1851
1852/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1853Py_LOCAL(PyByteArrayObject *)
1854replace_substring_in_place(PyByteArrayObject *self,
1855 const char *from_s, Py_ssize_t from_len,
1856 const char *to_s, Py_ssize_t to_len,
1857 Py_ssize_t maxcount)
1858{
1859 char *result_s, *start, *end;
1860 char *self_s;
1861 Py_ssize_t self_len, offset;
1862 PyByteArrayObject *result;
1863
1864 /* The result bytes will be the same size */
1865
1866 self_s = PyByteArray_AS_STRING(self);
1867 self_len = PyByteArray_GET_SIZE(self);
1868
1869 offset = findstring(self_s, self_len,
1870 from_s, from_len,
1871 0, self_len, FORWARD);
1872 if (offset == -1) {
1873 /* No matches; return the original bytes */
1874 return return_self(self);
1875 }
1876
1877 /* Need to make a new bytes */
1878 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1879 if (result == NULL)
1880 return NULL;
1881 result_s = PyByteArray_AS_STRING(result);
1882 Py_MEMCPY(result_s, self_s, self_len);
1883
1884 /* change everything in-place, starting with this one */
1885 start = result_s + offset;
1886 Py_MEMCPY(start, to_s, from_len);
1887 start += from_len;
1888 end = result_s + self_len;
1889
1890 while ( --maxcount > 0) {
1891 offset = findstring(start, end-start,
1892 from_s, from_len,
1893 0, end-start, FORWARD);
1894 if (offset==-1)
1895 break;
1896 Py_MEMCPY(start+offset, to_s, from_len);
1897 start += offset+from_len;
1898 }
1899
1900 return result;
1901}
1902
1903/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1904Py_LOCAL(PyByteArrayObject *)
1905replace_single_character(PyByteArrayObject *self,
1906 char from_c,
1907 const char *to_s, Py_ssize_t to_len,
1908 Py_ssize_t maxcount)
1909{
1910 char *self_s, *result_s;
1911 char *start, *next, *end;
1912 Py_ssize_t self_len, result_len;
1913 Py_ssize_t count, product;
1914 PyByteArrayObject *result;
1915
1916 self_s = PyByteArray_AS_STRING(self);
1917 self_len = PyByteArray_GET_SIZE(self);
1918
1919 count = countchar(self_s, self_len, from_c, maxcount);
1920 if (count == 0) {
1921 /* no matches, return unchanged */
1922 return return_self(self);
1923 }
1924
1925 /* use the difference between current and new, hence the "-1" */
1926 /* result_len = self_len + count * (to_len-1) */
1927 product = count * (to_len-1);
1928 if (product / (to_len-1) != count) {
1929 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1930 return NULL;
1931 }
1932 result_len = self_len + product;
1933 if (result_len < 0) {
1934 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1935 return NULL;
1936 }
1937
1938 if ( (result = (PyByteArrayObject *)
1939 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1940 return NULL;
1941 result_s = PyByteArray_AS_STRING(result);
1942
1943 start = self_s;
1944 end = self_s + self_len;
1945 while (count-- > 0) {
1946 next = findchar(start, end-start, from_c);
1947 if (next == NULL)
1948 break;
1949
1950 if (next == start) {
1951 /* replace with the 'to' */
1952 Py_MEMCPY(result_s, to_s, to_len);
1953 result_s += to_len;
1954 start += 1;
1955 } else {
1956 /* copy the unchanged old then the 'to' */
1957 Py_MEMCPY(result_s, start, next-start);
1958 result_s += (next-start);
1959 Py_MEMCPY(result_s, to_s, to_len);
1960 result_s += to_len;
1961 start = next+1;
1962 }
1963 }
1964 /* Copy the remainder of the remaining bytes */
1965 Py_MEMCPY(result_s, start, end-start);
1966
1967 return result;
1968}
1969
1970/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1971Py_LOCAL(PyByteArrayObject *)
1972replace_substring(PyByteArrayObject *self,
1973 const char *from_s, Py_ssize_t from_len,
1974 const char *to_s, Py_ssize_t to_len,
1975 Py_ssize_t maxcount)
1976{
1977 char *self_s, *result_s;
1978 char *start, *next, *end;
1979 Py_ssize_t self_len, result_len;
1980 Py_ssize_t count, offset, product;
1981 PyByteArrayObject *result;
1982
1983 self_s = PyByteArray_AS_STRING(self);
1984 self_len = PyByteArray_GET_SIZE(self);
1985
1986 count = countstring(self_s, self_len,
1987 from_s, from_len,
1988 0, self_len, FORWARD, maxcount);
1989 if (count == 0) {
1990 /* no matches, return unchanged */
1991 return return_self(self);
1992 }
1993
1994 /* Check for overflow */
1995 /* result_len = self_len + count * (to_len-from_len) */
1996 product = count * (to_len-from_len);
1997 if (product / (to_len-from_len) != count) {
1998 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1999 return NULL;
2000 }
2001 result_len = self_len + product;
2002 if (result_len < 0) {
2003 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
2004 return NULL;
2005 }
2006
2007 if ( (result = (PyByteArrayObject *)
2008 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
2009 return NULL;
2010 result_s = PyByteArray_AS_STRING(result);
2011
2012 start = self_s;
2013 end = self_s + self_len;
2014 while (count-- > 0) {
2015 offset = findstring(start, end-start,
2016 from_s, from_len,
2017 0, end-start, FORWARD);
2018 if (offset == -1)
2019 break;
2020 next = start+offset;
2021 if (next == start) {
2022 /* replace with the 'to' */
2023 Py_MEMCPY(result_s, to_s, to_len);
2024 result_s += to_len;
2025 start += from_len;
2026 } else {
2027 /* copy the unchanged old then the 'to' */
2028 Py_MEMCPY(result_s, start, next-start);
2029 result_s += (next-start);
2030 Py_MEMCPY(result_s, to_s, to_len);
2031 result_s += to_len;
2032 start = next+from_len;
2033 }
2034 }
2035 /* Copy the remainder of the remaining bytes */
2036 Py_MEMCPY(result_s, start, end-start);
2037
2038 return result;
2039}
2040
2041
2042Py_LOCAL(PyByteArrayObject *)
2043replace(PyByteArrayObject *self,
2044 const char *from_s, Py_ssize_t from_len,
2045 const char *to_s, Py_ssize_t to_len,
2046 Py_ssize_t maxcount)
2047{
2048 if (maxcount < 0) {
2049 maxcount = PY_SSIZE_T_MAX;
2050 } else if (maxcount == 0 || PyByteArray_GET_SIZE(self) == 0) {
2051 /* nothing to do; return the original bytes */
2052 return return_self(self);
2053 }
2054
2055 if (maxcount == 0 ||
2056 (from_len == 0 && to_len == 0)) {
2057 /* nothing to do; return the original bytes */
2058 return return_self(self);
2059 }
2060
2061 /* Handle zero-length special cases */
2062
2063 if (from_len == 0) {
2064 /* insert the 'to' bytes everywhere. */
2065 /* >>> "Python".replace("", ".") */
2066 /* '.P.y.t.h.o.n.' */
2067 return replace_interleave(self, to_s, to_len, maxcount);
2068 }
2069
2070 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2071 /* point for an empty self bytes to generate a non-empty bytes */
2072 /* Special case so the remaining code always gets a non-empty bytes */
2073 if (PyByteArray_GET_SIZE(self) == 0) {
2074 return return_self(self);
2075 }
2076
2077 if (to_len == 0) {
2078 /* delete all occurances of 'from' bytes */
2079 if (from_len == 1) {
2080 return replace_delete_single_character(
2081 self, from_s[0], maxcount);
2082 } else {
2083 return replace_delete_substring(self, from_s, from_len, maxcount);
2084 }
2085 }
2086
2087 /* Handle special case where both bytes have the same length */
2088
2089 if (from_len == to_len) {
2090 if (from_len == 1) {
2091 return replace_single_character_in_place(
2092 self,
2093 from_s[0],
2094 to_s[0],
2095 maxcount);
2096 } else {
2097 return replace_substring_in_place(
2098 self, from_s, from_len, to_s, to_len, maxcount);
2099 }
2100 }
2101
2102 /* Otherwise use the more generic algorithms */
2103 if (from_len == 1) {
2104 return replace_single_character(self, from_s[0],
2105 to_s, to_len, maxcount);
2106 } else {
2107 /* len('from')>=2, len('to')>=1 */
2108 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2109 }
2110}
2111
2112
2113PyDoc_STRVAR(replace__doc__,
2114"B.replace(old, new[, count]) -> bytes\n\
2115\n\
2116Return a copy of B with all occurrences of subsection\n\
2117old replaced by new. If the optional argument count is\n\
2118given, only the first count occurrences are replaced.");
2119
2120static PyObject *
2121bytes_replace(PyByteArrayObject *self, PyObject *args)
2122{
2123 Py_ssize_t count = -1;
2124 PyObject *from, *to, *res;
2125 Py_buffer vfrom, vto;
2126
2127 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2128 return NULL;
2129
2130 if (_getbuffer(from, &vfrom) < 0)
2131 return NULL;
2132 if (_getbuffer(to, &vto) < 0) {
2133 PyObject_ReleaseBuffer(from, &vfrom);
2134 return NULL;
2135 }
2136
2137 res = (PyObject *)replace((PyByteArrayObject *) self,
2138 vfrom.buf, vfrom.len,
2139 vto.buf, vto.len, count);
2140
2141 PyObject_ReleaseBuffer(from, &vfrom);
2142 PyObject_ReleaseBuffer(to, &vto);
2143 return res;
2144}
2145
2146
2147/* Overallocate the initial list to reduce the number of reallocs for small
2148 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2149 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2150 text (roughly 11 words per line) and field delimited data (usually 1-10
2151 fields). For large strings the split algorithms are bandwidth limited
2152 so increasing the preallocation likely will not improve things.*/
2153
2154#define MAX_PREALLOC 12
2155
2156/* 5 splits gives 6 elements */
2157#define PREALLOC_SIZE(maxsplit) \
2158 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2159
2160#define SPLIT_APPEND(data, left, right) \
2161 str = PyByteArray_FromStringAndSize((data) + (left), \
2162 (right) - (left)); \
2163 if (str == NULL) \
2164 goto onError; \
2165 if (PyList_Append(list, str)) { \
2166 Py_DECREF(str); \
2167 goto onError; \
2168 } \
2169 else \
2170 Py_DECREF(str);
2171
2172#define SPLIT_ADD(data, left, right) { \
2173 str = PyByteArray_FromStringAndSize((data) + (left), \
2174 (right) - (left)); \
2175 if (str == NULL) \
2176 goto onError; \
2177 if (count < MAX_PREALLOC) { \
2178 PyList_SET_ITEM(list, count, str); \
2179 } else { \
2180 if (PyList_Append(list, str)) { \
2181 Py_DECREF(str); \
2182 goto onError; \
2183 } \
2184 else \
2185 Py_DECREF(str); \
2186 } \
2187 count++; }
2188
2189/* Always force the list to the expected size. */
2190#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
2191
2192
2193Py_LOCAL_INLINE(PyObject *)
2194split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2195{
2196 register Py_ssize_t i, j, count = 0;
2197 PyObject *str;
2198 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2199
2200 if (list == NULL)
2201 return NULL;
2202
2203 i = j = 0;
2204 while ((j < len) && (maxcount-- > 0)) {
2205 for(; j < len; j++) {
2206 /* I found that using memchr makes no difference */
2207 if (s[j] == ch) {
2208 SPLIT_ADD(s, i, j);
2209 i = j = j + 1;
2210 break;
2211 }
2212 }
2213 }
2214 if (i <= len) {
2215 SPLIT_ADD(s, i, len);
2216 }
2217 FIX_PREALLOC_SIZE(list);
2218 return list;
2219
2220 onError:
2221 Py_DECREF(list);
2222 return NULL;
2223}
2224
2225
2226Py_LOCAL_INLINE(PyObject *)
2227split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2228{
2229 register Py_ssize_t i, j, count = 0;
2230 PyObject *str;
2231 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2232
2233 if (list == NULL)
2234 return NULL;
2235
2236 for (i = j = 0; i < len; ) {
2237 /* find a token */
2238 while (i < len && ISSPACE(s[i]))
2239 i++;
2240 j = i;
2241 while (i < len && !ISSPACE(s[i]))
2242 i++;
2243 if (j < i) {
2244 if (maxcount-- <= 0)
2245 break;
2246 SPLIT_ADD(s, j, i);
2247 while (i < len && ISSPACE(s[i]))
2248 i++;
2249 j = i;
2250 }
2251 }
2252 if (j < len) {
2253 SPLIT_ADD(s, j, len);
2254 }
2255 FIX_PREALLOC_SIZE(list);
2256 return list;
2257
2258 onError:
2259 Py_DECREF(list);
2260 return NULL;
2261}
2262
2263PyDoc_STRVAR(split__doc__,
2264"B.split([sep[, maxsplit]]) -> list of bytearray\n\
2265\n\
2266Return a list of the sections in B, using sep as the delimiter.\n\
2267If sep is not given, B is split on ASCII whitespace characters\n\
2268(space, tab, return, newline, formfeed, vertical tab).\n\
2269If maxsplit is given, at most maxsplit splits are done.");
2270
2271static PyObject *
2272bytes_split(PyByteArrayObject *self, PyObject *args)
2273{
2274 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
2275 Py_ssize_t maxsplit = -1, count = 0;
2276 const char *s = PyByteArray_AS_STRING(self), *sub;
2277 PyObject *list, *str, *subobj = Py_None;
2278 Py_buffer vsub;
2279#ifdef USE_FAST
2280 Py_ssize_t pos;
2281#endif
2282
2283 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
2284 return NULL;
2285 if (maxsplit < 0)
2286 maxsplit = PY_SSIZE_T_MAX;
2287
2288 if (subobj == Py_None)
2289 return split_whitespace(s, len, maxsplit);
2290
2291 if (_getbuffer(subobj, &vsub) < 0)
2292 return NULL;
2293 sub = vsub.buf;
2294 n = vsub.len;
2295
2296 if (n == 0) {
2297 PyErr_SetString(PyExc_ValueError, "empty separator");
2298 PyObject_ReleaseBuffer(subobj, &vsub);
2299 return NULL;
2300 }
2301 if (n == 1)
2302 return split_char(s, len, sub[0], maxsplit);
2303
2304 list = PyList_New(PREALLOC_SIZE(maxsplit));
2305 if (list == NULL) {
2306 PyObject_ReleaseBuffer(subobj, &vsub);
2307 return NULL;
2308 }
2309
2310#ifdef USE_FAST
2311 i = j = 0;
2312 while (maxsplit-- > 0) {
2313 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2314 if (pos < 0)
2315 break;
2316 j = i+pos;
2317 SPLIT_ADD(s, i, j);
2318 i = j + n;
2319 }
2320#else
2321 i = j = 0;
2322 while ((j+n <= len) && (maxsplit-- > 0)) {
2323 for (; j+n <= len; j++) {
2324 if (Py_STRING_MATCH(s, j, sub, n)) {
2325 SPLIT_ADD(s, i, j);
2326 i = j = j + n;
2327 break;
2328 }
2329 }
2330 }
2331#endif
2332 SPLIT_ADD(s, i, len);
2333 FIX_PREALLOC_SIZE(list);
2334 PyObject_ReleaseBuffer(subobj, &vsub);
2335 return list;
2336
2337 onError:
2338 Py_DECREF(list);
2339 PyObject_ReleaseBuffer(subobj, &vsub);
2340 return NULL;
2341}
2342
2343/* stringlib's partition shares nullbytes in some cases.
2344 undo this, we don't want the nullbytes to be shared. */
2345static PyObject *
2346make_nullbytes_unique(PyObject *result)
2347{
2348 if (result != NULL) {
2349 int i;
2350 assert(PyTuple_Check(result));
2351 assert(PyTuple_GET_SIZE(result) == 3);
2352 for (i = 0; i < 3; i++) {
2353 if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) {
2354 PyObject *new = PyByteArray_FromStringAndSize(NULL, 0);
2355 if (new == NULL) {
2356 Py_DECREF(result);
2357 result = NULL;
2358 break;
2359 }
2360 Py_DECREF(nullbytes);
2361 PyTuple_SET_ITEM(result, i, new);
2362 }
2363 }
2364 }
2365 return result;
2366}
2367
2368PyDoc_STRVAR(partition__doc__,
2369"B.partition(sep) -> (head, sep, tail)\n\
2370\n\
2371Searches for the separator sep in B, and returns the part before it,\n\
2372the separator itself, and the part after it. If the separator is not\n\
2373found, returns B and two empty bytearray objects.");
2374
2375static PyObject *
2376bytes_partition(PyByteArrayObject *self, PyObject *sep_obj)
2377{
2378 PyObject *bytesep, *result;
2379
2380 bytesep = PyByteArray_FromObject(sep_obj);
2381 if (! bytesep)
2382 return NULL;
2383
2384 result = stringlib_partition(
2385 (PyObject*) self,
2386 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2387 bytesep,
2388 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2389 );
2390
2391 Py_DECREF(bytesep);
2392 return make_nullbytes_unique(result);
2393}
2394
2395PyDoc_STRVAR(rpartition__doc__,
2396"B.rpartition(sep) -> (tail, sep, head)\n\
2397\n\
2398Searches for the separator sep in B, starting at the end of B,\n\
2399and returns the part before it, the separator itself, and the\n\
2400part after it. If the separator is not found, returns two empty\n\
2401bytearray objects and B.");
2402
2403static PyObject *
2404bytes_rpartition(PyByteArrayObject *self, PyObject *sep_obj)
2405{
2406 PyObject *bytesep, *result;
2407
2408 bytesep = PyByteArray_FromObject(sep_obj);
2409 if (! bytesep)
2410 return NULL;
2411
2412 result = stringlib_rpartition(
2413 (PyObject*) self,
2414 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2415 bytesep,
2416 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2417 );
2418
2419 Py_DECREF(bytesep);
2420 return make_nullbytes_unique(result);
2421}
2422
2423Py_LOCAL_INLINE(PyObject *)
2424rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2425{
2426 register Py_ssize_t i, j, count=0;
2427 PyObject *str;
2428 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2429
2430 if (list == NULL)
2431 return NULL;
2432
2433 i = j = len - 1;
2434 while ((i >= 0) && (maxcount-- > 0)) {
2435 for (; i >= 0; i--) {
2436 if (s[i] == ch) {
2437 SPLIT_ADD(s, i + 1, j + 1);
2438 j = i = i - 1;
2439 break;
2440 }
2441 }
2442 }
2443 if (j >= -1) {
2444 SPLIT_ADD(s, 0, j + 1);
2445 }
2446 FIX_PREALLOC_SIZE(list);
2447 if (PyList_Reverse(list) < 0)
2448 goto onError;
2449
2450 return list;
2451
2452 onError:
2453 Py_DECREF(list);
2454 return NULL;
2455}
2456
2457Py_LOCAL_INLINE(PyObject *)
2458rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2459{
2460 register Py_ssize_t i, j, count = 0;
2461 PyObject *str;
2462 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2463
2464 if (list == NULL)
2465 return NULL;
2466
2467 for (i = j = len - 1; i >= 0; ) {
2468 /* find a token */
2469 while (i >= 0 && ISSPACE(s[i]))
2470 i--;
2471 j = i;
2472 while (i >= 0 && !ISSPACE(s[i]))
2473 i--;
2474 if (j > i) {
2475 if (maxcount-- <= 0)
2476 break;
2477 SPLIT_ADD(s, i + 1, j + 1);
2478 while (i >= 0 && ISSPACE(s[i]))
2479 i--;
2480 j = i;
2481 }
2482 }
2483 if (j >= 0) {
2484 SPLIT_ADD(s, 0, j + 1);
2485 }
2486 FIX_PREALLOC_SIZE(list);
2487 if (PyList_Reverse(list) < 0)
2488 goto onError;
2489
2490 return list;
2491
2492 onError:
2493 Py_DECREF(list);
2494 return NULL;
2495}
2496
2497PyDoc_STRVAR(rsplit__doc__,
2498"B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2499\n\
2500Return a list of the sections in B, using sep as the delimiter,\n\
2501starting at the end of B and working to the front.\n\
2502If sep is not given, B is split on ASCII whitespace characters\n\
2503(space, tab, return, newline, formfeed, vertical tab).\n\
2504If maxsplit is given, at most maxsplit splits are done.");
2505
2506static PyObject *
2507bytes_rsplit(PyByteArrayObject *self, PyObject *args)
2508{
2509 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
2510 Py_ssize_t maxsplit = -1, count = 0;
2511 const char *s = PyByteArray_AS_STRING(self), *sub;
2512 PyObject *list, *str, *subobj = Py_None;
2513 Py_buffer vsub;
2514
2515 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
2516 return NULL;
2517 if (maxsplit < 0)
2518 maxsplit = PY_SSIZE_T_MAX;
2519
2520 if (subobj == Py_None)
2521 return rsplit_whitespace(s, len, maxsplit);
2522
2523 if (_getbuffer(subobj, &vsub) < 0)
2524 return NULL;
2525 sub = vsub.buf;
2526 n = vsub.len;
2527
2528 if (n == 0) {
2529 PyErr_SetString(PyExc_ValueError, "empty separator");
2530 PyObject_ReleaseBuffer(subobj, &vsub);
2531 return NULL;
2532 }
2533 else if (n == 1)
2534 return rsplit_char(s, len, sub[0], maxsplit);
2535
2536 list = PyList_New(PREALLOC_SIZE(maxsplit));
2537 if (list == NULL) {
2538 PyObject_ReleaseBuffer(subobj, &vsub);
2539 return NULL;
2540 }
2541
2542 j = len;
2543 i = j - n;
2544
2545 while ( (i >= 0) && (maxsplit-- > 0) ) {
2546 for (; i>=0; i--) {
2547 if (Py_STRING_MATCH(s, i, sub, n)) {
2548 SPLIT_ADD(s, i + n, j);
2549 j = i;
2550 i -= n;
2551 break;
2552 }
2553 }
2554 }
2555 SPLIT_ADD(s, 0, j);
2556 FIX_PREALLOC_SIZE(list);
2557 if (PyList_Reverse(list) < 0)
2558 goto onError;
2559 PyObject_ReleaseBuffer(subobj, &vsub);
2560 return list;
2561
2562onError:
2563 Py_DECREF(list);
2564 PyObject_ReleaseBuffer(subobj, &vsub);
2565 return NULL;
2566}
2567
2568PyDoc_STRVAR(reverse__doc__,
2569"B.reverse() -> None\n\
2570\n\
2571Reverse the order of the values in B in place.");
2572static PyObject *
2573bytes_reverse(PyByteArrayObject *self, PyObject *unused)
2574{
2575 char swap, *head, *tail;
2576 Py_ssize_t i, j, n = Py_SIZE(self);
2577
2578 j = n / 2;
2579 head = self->ob_bytes;
2580 tail = head + n - 1;
2581 for (i = 0; i < j; i++) {
2582 swap = *head;
2583 *head++ = *tail;
2584 *tail-- = swap;
2585 }
2586
2587 Py_RETURN_NONE;
2588}
2589
2590PyDoc_STRVAR(insert__doc__,
2591"B.insert(index, int) -> None\n\
2592\n\
2593Insert a single item into the bytearray before the given index.");
2594static PyObject *
2595bytes_insert(PyByteArrayObject *self, PyObject *args)
2596{
Georg Brandl3e483f62008-07-16 22:57:41 +00002597 PyObject *value;
2598 int ival;
Christian Heimes44720832008-05-26 13:01:01 +00002599 Py_ssize_t where, n = Py_SIZE(self);
2600
Georg Brandl3e483f62008-07-16 22:57:41 +00002601 if (!PyArg_ParseTuple(args, "nO:insert", &where, &value))
Christian Heimes44720832008-05-26 13:01:01 +00002602 return NULL;
2603
2604 if (n == PY_SSIZE_T_MAX) {
2605 PyErr_SetString(PyExc_OverflowError,
2606 "cannot add more objects to bytes");
2607 return NULL;
2608 }
Georg Brandl3e483f62008-07-16 22:57:41 +00002609 if (!_getbytevalue(value, &ival))
Christian Heimes44720832008-05-26 13:01:01 +00002610 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002611 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2612 return NULL;
2613
2614 if (where < 0) {
2615 where += n;
2616 if (where < 0)
2617 where = 0;
2618 }
2619 if (where > n)
2620 where = n;
2621 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
Georg Brandl3e483f62008-07-16 22:57:41 +00002622 self->ob_bytes[where] = ival;
Christian Heimes44720832008-05-26 13:01:01 +00002623
2624 Py_RETURN_NONE;
2625}
2626
2627PyDoc_STRVAR(append__doc__,
2628"B.append(int) -> None\n\
2629\n\
2630Append a single item to the end of B.");
2631static PyObject *
2632bytes_append(PyByteArrayObject *self, PyObject *arg)
2633{
2634 int value;
2635 Py_ssize_t n = Py_SIZE(self);
2636
2637 if (! _getbytevalue(arg, &value))
2638 return NULL;
2639 if (n == PY_SSIZE_T_MAX) {
2640 PyErr_SetString(PyExc_OverflowError,
2641 "cannot add more objects to bytes");
2642 return NULL;
2643 }
2644 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2645 return NULL;
2646
2647 self->ob_bytes[n] = value;
2648
2649 Py_RETURN_NONE;
2650}
2651
2652PyDoc_STRVAR(extend__doc__,
2653"B.extend(iterable int) -> None\n\
2654\n\
2655Append all the elements from the iterator or sequence to the\n\
2656end of B.");
2657static PyObject *
2658bytes_extend(PyByteArrayObject *self, PyObject *arg)
2659{
2660 PyObject *it, *item, *bytes_obj;
2661 Py_ssize_t buf_size = 0, len = 0;
2662 int value;
2663 char *buf;
2664
2665 /* bytes_setslice code only accepts something supporting PEP 3118. */
2666 if (PyObject_CheckBuffer(arg)) {
2667 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), arg) == -1)
2668 return NULL;
2669
2670 Py_RETURN_NONE;
2671 }
2672
2673 it = PyObject_GetIter(arg);
2674 if (it == NULL)
2675 return NULL;
2676
2677 /* Try to determine the length of the argument. 32 is abitrary. */
2678 buf_size = _PyObject_LengthHint(arg, 32);
2679
2680 bytes_obj = PyByteArray_FromStringAndSize(NULL, buf_size);
2681 if (bytes_obj == NULL)
2682 return NULL;
2683 buf = PyByteArray_AS_STRING(bytes_obj);
2684
2685 while ((item = PyIter_Next(it)) != NULL) {
2686 if (! _getbytevalue(item, &value)) {
2687 Py_DECREF(item);
2688 Py_DECREF(it);
2689 Py_DECREF(bytes_obj);
2690 return NULL;
2691 }
2692 buf[len++] = value;
2693 Py_DECREF(item);
2694
2695 if (len >= buf_size) {
2696 buf_size = len + (len >> 1) + 1;
2697 if (PyByteArray_Resize((PyObject *)bytes_obj, buf_size) < 0) {
2698 Py_DECREF(it);
2699 Py_DECREF(bytes_obj);
2700 return NULL;
2701 }
2702 /* Recompute the `buf' pointer, since the resizing operation may
2703 have invalidated it. */
2704 buf = PyByteArray_AS_STRING(bytes_obj);
2705 }
2706 }
2707 Py_DECREF(it);
2708
2709 /* Resize down to exact size. */
2710 if (PyByteArray_Resize((PyObject *)bytes_obj, len) < 0) {
2711 Py_DECREF(bytes_obj);
2712 return NULL;
2713 }
2714
2715 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), bytes_obj) == -1)
2716 return NULL;
2717 Py_DECREF(bytes_obj);
2718
2719 Py_RETURN_NONE;
2720}
2721
2722PyDoc_STRVAR(pop__doc__,
2723"B.pop([index]) -> int\n\
2724\n\
2725Remove and return a single item from B. If no index\n\
Andrew M. Kuchlingd8972642008-06-21 13:29:12 +00002726argument is given, will pop the last value.");
Christian Heimes44720832008-05-26 13:01:01 +00002727static PyObject *
2728bytes_pop(PyByteArrayObject *self, PyObject *args)
2729{
2730 int value;
2731 Py_ssize_t where = -1, n = Py_SIZE(self);
2732
2733 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2734 return NULL;
2735
2736 if (n == 0) {
2737 PyErr_SetString(PyExc_OverflowError,
2738 "cannot pop an empty bytes");
2739 return NULL;
2740 }
2741 if (where < 0)
2742 where += Py_SIZE(self);
2743 if (where < 0 || where >= Py_SIZE(self)) {
2744 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2745 return NULL;
2746 }
2747
2748 value = self->ob_bytes[where];
2749 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2750 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2751 return NULL;
2752
2753 return PyInt_FromLong(value);
2754}
2755
2756PyDoc_STRVAR(remove__doc__,
2757"B.remove(int) -> None\n\
2758\n\
2759Remove the first occurance of a value in B.");
2760static PyObject *
2761bytes_remove(PyByteArrayObject *self, PyObject *arg)
2762{
2763 int value;
2764 Py_ssize_t where, n = Py_SIZE(self);
2765
2766 if (! _getbytevalue(arg, &value))
2767 return NULL;
2768
2769 for (where = 0; where < n; where++) {
2770 if (self->ob_bytes[where] == value)
2771 break;
2772 }
2773 if (where == n) {
2774 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2775 return NULL;
2776 }
2777
2778 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2779 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2780 return NULL;
2781
2782 Py_RETURN_NONE;
2783}
2784
2785/* XXX These two helpers could be optimized if argsize == 1 */
2786
2787static Py_ssize_t
2788lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2789 void *argptr, Py_ssize_t argsize)
2790{
2791 Py_ssize_t i = 0;
2792 while (i < mysize && memchr(argptr, myptr[i], argsize))
2793 i++;
2794 return i;
2795}
2796
2797static Py_ssize_t
2798rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2799 void *argptr, Py_ssize_t argsize)
2800{
2801 Py_ssize_t i = mysize - 1;
2802 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2803 i--;
2804 return i + 1;
2805}
2806
2807PyDoc_STRVAR(strip__doc__,
2808"B.strip([bytes]) -> bytearray\n\
2809\n\
2810Strip leading and trailing bytes contained in the argument.\n\
2811If the argument is omitted, strip ASCII whitespace.");
2812static PyObject *
2813bytes_strip(PyByteArrayObject *self, PyObject *args)
2814{
2815 Py_ssize_t left, right, mysize, argsize;
2816 void *myptr, *argptr;
2817 PyObject *arg = Py_None;
2818 Py_buffer varg;
2819 if (!PyArg_ParseTuple(args, "|O:strip", &arg))
2820 return NULL;
2821 if (arg == Py_None) {
2822 argptr = "\t\n\r\f\v ";
2823 argsize = 6;
2824 }
2825 else {
2826 if (_getbuffer(arg, &varg) < 0)
2827 return NULL;
2828 argptr = varg.buf;
2829 argsize = varg.len;
2830 }
2831 myptr = self->ob_bytes;
2832 mysize = Py_SIZE(self);
2833 left = lstrip_helper(myptr, mysize, argptr, argsize);
2834 if (left == mysize)
2835 right = left;
2836 else
2837 right = rstrip_helper(myptr, mysize, argptr, argsize);
2838 if (arg != Py_None)
2839 PyObject_ReleaseBuffer(arg, &varg);
2840 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2841}
2842
2843PyDoc_STRVAR(lstrip__doc__,
2844"B.lstrip([bytes]) -> bytearray\n\
2845\n\
2846Strip leading bytes contained in the argument.\n\
2847If the argument is omitted, strip leading ASCII whitespace.");
2848static PyObject *
2849bytes_lstrip(PyByteArrayObject *self, PyObject *args)
2850{
2851 Py_ssize_t left, right, mysize, argsize;
2852 void *myptr, *argptr;
2853 PyObject *arg = Py_None;
2854 Py_buffer varg;
2855 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
2856 return NULL;
2857 if (arg == Py_None) {
2858 argptr = "\t\n\r\f\v ";
2859 argsize = 6;
2860 }
2861 else {
2862 if (_getbuffer(arg, &varg) < 0)
2863 return NULL;
2864 argptr = varg.buf;
2865 argsize = varg.len;
2866 }
2867 myptr = self->ob_bytes;
2868 mysize = Py_SIZE(self);
2869 left = lstrip_helper(myptr, mysize, argptr, argsize);
2870 right = mysize;
2871 if (arg != Py_None)
2872 PyObject_ReleaseBuffer(arg, &varg);
2873 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2874}
2875
2876PyDoc_STRVAR(rstrip__doc__,
2877"B.rstrip([bytes]) -> bytearray\n\
2878\n\
2879Strip trailing bytes contained in the argument.\n\
2880If the argument is omitted, strip trailing ASCII whitespace.");
2881static PyObject *
2882bytes_rstrip(PyByteArrayObject *self, PyObject *args)
2883{
2884 Py_ssize_t left, right, mysize, argsize;
2885 void *myptr, *argptr;
2886 PyObject *arg = Py_None;
2887 Py_buffer varg;
2888 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
2889 return NULL;
2890 if (arg == Py_None) {
2891 argptr = "\t\n\r\f\v ";
2892 argsize = 6;
2893 }
2894 else {
2895 if (_getbuffer(arg, &varg) < 0)
2896 return NULL;
2897 argptr = varg.buf;
2898 argsize = varg.len;
2899 }
2900 myptr = self->ob_bytes;
2901 mysize = Py_SIZE(self);
2902 left = 0;
2903 right = rstrip_helper(myptr, mysize, argptr, argsize);
2904 if (arg != Py_None)
2905 PyObject_ReleaseBuffer(arg, &varg);
2906 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2907}
2908
2909PyDoc_STRVAR(decode_doc,
2910"B.decode([encoding[, errors]]) -> unicode object.\n\
2911\n\
2912Decodes B using the codec registered for encoding. encoding defaults\n\
2913to the default encoding. errors may be given to set a different error\n\
2914handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2915a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2916as well as any other name registered with codecs.register_error that is\n\
2917able to handle UnicodeDecodeErrors.");
2918
2919static PyObject *
2920bytes_decode(PyObject *self, PyObject *args)
2921{
2922 const char *encoding = NULL;
2923 const char *errors = NULL;
2924
2925 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2926 return NULL;
2927 if (encoding == NULL)
2928 encoding = PyUnicode_GetDefaultEncoding();
2929 return PyCodec_Decode(self, encoding, errors);
2930}
2931
2932PyDoc_STRVAR(alloc_doc,
2933"B.__alloc__() -> int\n\
2934\n\
2935Returns the number of bytes actually allocated.");
2936
2937static PyObject *
2938bytes_alloc(PyByteArrayObject *self)
2939{
2940 return PyInt_FromSsize_t(self->ob_alloc);
2941}
2942
2943PyDoc_STRVAR(join_doc,
2944"B.join(iterable_of_bytes) -> bytes\n\
2945\n\
2946Concatenates any number of bytearray objects, with B in between each pair.");
2947
2948static PyObject *
2949bytes_join(PyByteArrayObject *self, PyObject *it)
2950{
2951 PyObject *seq;
2952 Py_ssize_t mysize = Py_SIZE(self);
2953 Py_ssize_t i;
2954 Py_ssize_t n;
2955 PyObject **items;
2956 Py_ssize_t totalsize = 0;
2957 PyObject *result;
2958 char *dest;
2959
2960 seq = PySequence_Fast(it, "can only join an iterable");
2961 if (seq == NULL)
2962 return NULL;
2963 n = PySequence_Fast_GET_SIZE(seq);
2964 items = PySequence_Fast_ITEMS(seq);
2965
2966 /* Compute the total size, and check that they are all bytes */
2967 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2968 for (i = 0; i < n; i++) {
2969 PyObject *obj = items[i];
2970 if (!PyByteArray_Check(obj) && !PyBytes_Check(obj)) {
2971 PyErr_Format(PyExc_TypeError,
2972 "can only join an iterable of bytes "
2973 "(item %ld has type '%.100s')",
2974 /* XXX %ld isn't right on Win64 */
2975 (long)i, Py_TYPE(obj)->tp_name);
2976 goto error;
2977 }
2978 if (i > 0)
2979 totalsize += mysize;
2980 totalsize += Py_SIZE(obj);
2981 if (totalsize < 0) {
2982 PyErr_NoMemory();
2983 goto error;
2984 }
2985 }
2986
2987 /* Allocate the result, and copy the bytes */
2988 result = PyByteArray_FromStringAndSize(NULL, totalsize);
2989 if (result == NULL)
2990 goto error;
2991 dest = PyByteArray_AS_STRING(result);
2992 for (i = 0; i < n; i++) {
2993 PyObject *obj = items[i];
2994 Py_ssize_t size = Py_SIZE(obj);
2995 char *buf;
2996 if (PyByteArray_Check(obj))
2997 buf = PyByteArray_AS_STRING(obj);
2998 else
2999 buf = PyBytes_AS_STRING(obj);
3000 if (i) {
3001 memcpy(dest, self->ob_bytes, mysize);
3002 dest += mysize;
3003 }
3004 memcpy(dest, buf, size);
3005 dest += size;
3006 }
3007
3008 /* Done */
3009 Py_DECREF(seq);
3010 return result;
3011
3012 /* Error handling */
3013 error:
3014 Py_DECREF(seq);
3015 return NULL;
3016}
3017
3018PyDoc_STRVAR(fromhex_doc,
3019"bytearray.fromhex(string) -> bytearray\n\
3020\n\
3021Create a bytearray object from a string of hexadecimal numbers.\n\
3022Spaces between two numbers are accepted.\n\
3023Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
3024
3025static int
3026hex_digit_to_int(Py_UNICODE c)
3027{
3028 if (c >= 128)
3029 return -1;
3030 if (ISDIGIT(c))
3031 return c - '0';
3032 else {
3033 if (ISUPPER(c))
3034 c = TOLOWER(c);
3035 if (c >= 'a' && c <= 'f')
3036 return c - 'a' + 10;
3037 }
3038 return -1;
3039}
3040
3041static PyObject *
3042bytes_fromhex(PyObject *cls, PyObject *args)
3043{
3044 PyObject *newbytes, *hexobj;
3045 char *buf;
3046 Py_UNICODE *hex;
3047 Py_ssize_t hexlen, byteslen, i, j;
3048 int top, bot;
3049
3050 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
3051 return NULL;
3052 assert(PyUnicode_Check(hexobj));
3053 hexlen = PyUnicode_GET_SIZE(hexobj);
3054 hex = PyUnicode_AS_UNICODE(hexobj);
3055 byteslen = hexlen/2; /* This overestimates if there are spaces */
3056 newbytes = PyByteArray_FromStringAndSize(NULL, byteslen);
3057 if (!newbytes)
3058 return NULL;
3059 buf = PyByteArray_AS_STRING(newbytes);
3060 for (i = j = 0; i < hexlen; i += 2) {
3061 /* skip over spaces in the input */
3062 while (hex[i] == ' ')
3063 i++;
3064 if (i >= hexlen)
3065 break;
3066 top = hex_digit_to_int(hex[i]);
3067 bot = hex_digit_to_int(hex[i+1]);
3068 if (top == -1 || bot == -1) {
3069 PyErr_Format(PyExc_ValueError,
3070 "non-hexadecimal number found in "
3071 "fromhex() arg at position %zd", i);
3072 goto error;
3073 }
3074 buf[j++] = (top << 4) + bot;
3075 }
3076 if (PyByteArray_Resize(newbytes, j) < 0)
3077 goto error;
3078 return newbytes;
3079
3080 error:
3081 Py_DECREF(newbytes);
3082 return NULL;
3083}
3084
3085PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3086
3087static PyObject *
3088bytes_reduce(PyByteArrayObject *self)
3089{
3090 PyObject *latin1, *dict;
3091 if (self->ob_bytes)
3092 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
3093 Py_SIZE(self), NULL);
3094 else
3095 latin1 = PyUnicode_FromString("");
3096
3097 dict = PyObject_GetAttrString((PyObject *)self, "__dict__");
3098 if (dict == NULL) {
3099 PyErr_Clear();
3100 dict = Py_None;
3101 Py_INCREF(dict);
3102 }
3103
3104 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self), latin1, "latin-1", dict);
3105}
3106
Robert Schuppenies9be2ec12008-07-10 15:24:04 +00003107PyDoc_STRVAR(sizeof_doc,
3108"B.__sizeof__() -> int\n\
3109 \n\
3110Returns the size of B in memory, in bytes");
3111static PyObject *
3112bytes_sizeof(PyByteArrayObject *self)
3113{
3114 Py_ssize_t res;
3115
3116 res = sizeof(PyByteArrayObject) + self->ob_alloc * sizeof(char);
3117 return PyInt_FromSsize_t(res);
3118}
3119
Christian Heimes44720832008-05-26 13:01:01 +00003120static PySequenceMethods bytes_as_sequence = {
3121 (lenfunc)bytes_length, /* sq_length */
3122 (binaryfunc)PyByteArray_Concat, /* sq_concat */
3123 (ssizeargfunc)bytes_repeat, /* sq_repeat */
3124 (ssizeargfunc)bytes_getitem, /* sq_item */
3125 0, /* sq_slice */
3126 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
3127 0, /* sq_ass_slice */
3128 (objobjproc)bytes_contains, /* sq_contains */
3129 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
3130 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
3131};
3132
3133static PyMappingMethods bytes_as_mapping = {
3134 (lenfunc)bytes_length,
3135 (binaryfunc)bytes_subscript,
3136 (objobjargproc)bytes_ass_subscript,
3137};
3138
3139static PyBufferProcs bytes_as_buffer = {
3140 (readbufferproc)bytes_buffer_getreadbuf,
3141 (writebufferproc)bytes_buffer_getwritebuf,
3142 (segcountproc)bytes_buffer_getsegcount,
3143 (charbufferproc)bytes_buffer_getcharbuf,
3144 (getbufferproc)bytes_getbuffer,
3145 (releasebufferproc)bytes_releasebuffer,
3146};
3147
3148static PyMethodDef
3149bytes_methods[] = {
3150 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
3151 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
Robert Schuppenies9be2ec12008-07-10 15:24:04 +00003152 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS, sizeof_doc},
Christian Heimes44720832008-05-26 13:01:01 +00003153 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
3154 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3155 _Py_capitalize__doc__},
3156 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3157 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
3158 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
3159 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
3160 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
3161 expandtabs__doc__},
3162 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
3163 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
3164 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
3165 fromhex_doc},
3166 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3167 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
3168 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3169 _Py_isalnum__doc__},
3170 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3171 _Py_isalpha__doc__},
3172 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3173 _Py_isdigit__doc__},
3174 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3175 _Py_islower__doc__},
3176 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3177 _Py_isspace__doc__},
3178 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3179 _Py_istitle__doc__},
3180 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3181 _Py_isupper__doc__},
3182 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
3183 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3184 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
3185 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
3186 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
3187 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
3188 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
3189 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
3190 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
3191 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3192 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3193 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
3194 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
3195 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
3196 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
3197 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
3198 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
3199 splitlines__doc__},
3200 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS ,
3201 startswith__doc__},
3202 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
3203 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3204 _Py_swapcase__doc__},
3205 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
3206 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
3207 translate__doc__},
3208 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3209 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
3210 {NULL}
3211};
3212
3213PyDoc_STRVAR(bytes_doc,
3214"bytearray(iterable_of_ints) -> bytearray.\n\
3215bytearray(string, encoding[, errors]) -> bytearray.\n\
3216bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
3217bytearray(memory_view) -> bytearray.\n\
3218\n\
3219Construct an mutable bytearray object from:\n\
3220 - an iterable yielding integers in range(256)\n\
3221 - a text string encoded using the specified encoding\n\
3222 - a bytes or a bytearray object\n\
3223 - any object implementing the buffer API.\n\
3224\n\
3225bytearray(int) -> bytearray.\n\
3226\n\
3227Construct a zero-initialized bytearray of the given length.");
3228
3229
3230static PyObject *bytes_iter(PyObject *seq);
3231
3232PyTypeObject PyByteArray_Type = {
3233 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3234 "bytearray",
3235 sizeof(PyByteArrayObject),
3236 0,
3237 (destructor)bytes_dealloc, /* tp_dealloc */
3238 0, /* tp_print */
3239 0, /* tp_getattr */
3240 0, /* tp_setattr */
3241 0, /* tp_compare */
3242 (reprfunc)bytes_repr, /* tp_repr */
3243 0, /* tp_as_number */
3244 &bytes_as_sequence, /* tp_as_sequence */
3245 &bytes_as_mapping, /* tp_as_mapping */
3246 0, /* tp_hash */
3247 0, /* tp_call */
3248 bytes_str, /* tp_str */
3249 PyObject_GenericGetAttr, /* tp_getattro */
3250 0, /* tp_setattro */
3251 &bytes_as_buffer, /* tp_as_buffer */
3252 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3253 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3254 bytes_doc, /* tp_doc */
3255 0, /* tp_traverse */
3256 0, /* tp_clear */
3257 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3258 0, /* tp_weaklistoffset */
3259 bytes_iter, /* tp_iter */
3260 0, /* tp_iternext */
3261 bytes_methods, /* tp_methods */
3262 0, /* tp_members */
3263 0, /* tp_getset */
3264 0, /* tp_base */
3265 0, /* tp_dict */
3266 0, /* tp_descr_get */
3267 0, /* tp_descr_set */
3268 0, /* tp_dictoffset */
3269 (initproc)bytes_init, /* tp_init */
3270 PyType_GenericAlloc, /* tp_alloc */
3271 PyType_GenericNew, /* tp_new */
3272 PyObject_Del, /* tp_free */
3273};
3274
3275/*********************** Bytes Iterator ****************************/
3276
3277typedef struct {
3278 PyObject_HEAD
3279 Py_ssize_t it_index;
3280 PyByteArrayObject *it_seq; /* Set to NULL when iterator is exhausted */
3281} bytesiterobject;
3282
3283static void
3284bytesiter_dealloc(bytesiterobject *it)
3285{
3286 _PyObject_GC_UNTRACK(it);
3287 Py_XDECREF(it->it_seq);
3288 PyObject_GC_Del(it);
3289}
3290
3291static int
3292bytesiter_traverse(bytesiterobject *it, visitproc visit, void *arg)
3293{
3294 Py_VISIT(it->it_seq);
3295 return 0;
3296}
3297
3298static PyObject *
3299bytesiter_next(bytesiterobject *it)
3300{
3301 PyByteArrayObject *seq;
3302 PyObject *item;
3303
3304 assert(it != NULL);
3305 seq = it->it_seq;
3306 if (seq == NULL)
3307 return NULL;
3308 assert(PyByteArray_Check(seq));
3309
3310 if (it->it_index < PyByteArray_GET_SIZE(seq)) {
3311 item = PyInt_FromLong(
3312 (unsigned char)seq->ob_bytes[it->it_index]);
3313 if (item != NULL)
3314 ++it->it_index;
3315 return item;
3316 }
3317
3318 Py_DECREF(seq);
3319 it->it_seq = NULL;
3320 return NULL;
3321}
3322
3323static PyObject *
3324bytesiter_length_hint(bytesiterobject *it)
3325{
3326 Py_ssize_t len = 0;
3327 if (it->it_seq)
3328 len = PyByteArray_GET_SIZE(it->it_seq) - it->it_index;
3329 return PyInt_FromSsize_t(len);
3330}
3331
3332PyDoc_STRVAR(length_hint_doc,
3333 "Private method returning an estimate of len(list(it)).");
3334
3335static PyMethodDef bytesiter_methods[] = {
3336 {"__length_hint__", (PyCFunction)bytesiter_length_hint, METH_NOARGS,
3337 length_hint_doc},
3338 {NULL, NULL} /* sentinel */
3339};
3340
3341PyTypeObject PyByteArrayIter_Type = {
3342 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3343 "bytearray_iterator", /* tp_name */
3344 sizeof(bytesiterobject), /* tp_basicsize */
3345 0, /* tp_itemsize */
3346 /* methods */
3347 (destructor)bytesiter_dealloc, /* tp_dealloc */
3348 0, /* tp_print */
3349 0, /* tp_getattr */
3350 0, /* tp_setattr */
3351 0, /* tp_compare */
3352 0, /* tp_repr */
3353 0, /* tp_as_number */
3354 0, /* tp_as_sequence */
3355 0, /* tp_as_mapping */
3356 0, /* tp_hash */
3357 0, /* tp_call */
3358 0, /* tp_str */
3359 PyObject_GenericGetAttr, /* tp_getattro */
3360 0, /* tp_setattro */
3361 0, /* tp_as_buffer */
3362 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3363 0, /* tp_doc */
3364 (traverseproc)bytesiter_traverse, /* tp_traverse */
3365 0, /* tp_clear */
3366 0, /* tp_richcompare */
3367 0, /* tp_weaklistoffset */
3368 PyObject_SelfIter, /* tp_iter */
3369 (iternextfunc)bytesiter_next, /* tp_iternext */
3370 bytesiter_methods, /* tp_methods */
3371 0,
3372};
3373
3374static PyObject *
3375bytes_iter(PyObject *seq)
3376{
3377 bytesiterobject *it;
3378
3379 if (!PyByteArray_Check(seq)) {
3380 PyErr_BadInternalCall();
3381 return NULL;
3382 }
3383 it = PyObject_GC_New(bytesiterobject, &PyByteArrayIter_Type);
3384 if (it == NULL)
3385 return NULL;
3386 it->it_index = 0;
3387 Py_INCREF(seq);
3388 it->it_seq = (PyByteArrayObject *)seq;
3389 _PyObject_GC_TRACK(it);
3390 return (PyObject *)it;
3391}