blob: 0a83725be5f325e00e11e69a3972871aa6b071cd [file] [log] [blame]
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001/* PyByteArray (bytearray) implementation */
2
3#define PY_SSIZE_T_CLEAN
4#include "Python.h"
5#include "structmember.h"
6#include "bytes_methods.h"
7
8static PyByteArrayObject *nullbytes = NULL;
9
10void
11PyByteArray_Fini(void)
12{
13 Py_CLEAR(nullbytes);
14}
15
16int
17PyByteArray_Init(void)
18{
19 nullbytes = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
20 if (nullbytes == NULL)
21 return 0;
22 nullbytes->ob_bytes = NULL;
23 Py_SIZE(nullbytes) = nullbytes->ob_alloc = 0;
24 nullbytes->ob_exports = 0;
25 return 1;
26}
27
28/* end nullbytes support */
29
30/* Helpers */
31
32static int
33_getbytevalue(PyObject* arg, int *value)
34{
35 long face_value;
36
37 if (PyLong_Check(arg)) {
38 face_value = PyLong_AsLong(arg);
39 if (face_value < 0 || face_value >= 256) {
40 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
41 return 0;
42 }
43 } else {
44 PyErr_Format(PyExc_TypeError, "an integer is required");
45 return 0;
46 }
47
48 *value = face_value;
49 return 1;
50}
51
52static int
53bytes_getbuffer(PyByteArrayObject *obj, Py_buffer *view, int flags)
54{
55 int ret;
56 void *ptr;
57 if (view == NULL) {
58 obj->ob_exports++;
59 return 0;
60 }
61 if (obj->ob_bytes == NULL)
62 ptr = "";
63 else
64 ptr = obj->ob_bytes;
65 ret = PyBuffer_FillInfo(view, ptr, Py_SIZE(obj), 0, flags);
66 if (ret >= 0) {
67 obj->ob_exports++;
68 }
69 return ret;
70}
71
72static void
73bytes_releasebuffer(PyByteArrayObject *obj, Py_buffer *view)
74{
75 obj->ob_exports--;
76}
77
78static Py_ssize_t
79_getbuffer(PyObject *obj, Py_buffer *view)
80{
81 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
82
83 if (buffer == NULL || buffer->bf_getbuffer == NULL)
84 {
85 PyErr_Format(PyExc_TypeError,
86 "Type %.100s doesn't support the buffer API",
87 Py_TYPE(obj)->tp_name);
88 return -1;
89 }
90
91 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
92 return -1;
93 return view->len;
94}
95
96/* Direct API functions */
97
98PyObject *
99PyByteArray_FromObject(PyObject *input)
100{
101 return PyObject_CallFunctionObjArgs((PyObject *)&PyByteArray_Type,
102 input, NULL);
103}
104
105PyObject *
106PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
107{
108 PyByteArrayObject *new;
109 Py_ssize_t alloc;
110
111 if (size < 0) {
112 PyErr_SetString(PyExc_SystemError,
113 "Negative size passed to PyByteArray_FromStringAndSize");
114 return NULL;
115 }
116
117 new = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
118 if (new == NULL)
119 return NULL;
120
121 if (size == 0) {
122 new->ob_bytes = NULL;
123 alloc = 0;
124 }
125 else {
126 alloc = size + 1;
127 new->ob_bytes = PyMem_Malloc(alloc);
128 if (new->ob_bytes == NULL) {
129 Py_DECREF(new);
130 return PyErr_NoMemory();
131 }
132 if (bytes != NULL)
133 memcpy(new->ob_bytes, bytes, size);
134 new->ob_bytes[size] = '\0'; /* Trailing null byte */
135 }
136 Py_SIZE(new) = size;
137 new->ob_alloc = alloc;
138 new->ob_exports = 0;
139
140 return (PyObject *)new;
141}
142
143Py_ssize_t
144PyByteArray_Size(PyObject *self)
145{
146 assert(self != NULL);
147 assert(PyByteArray_Check(self));
148
149 return PyByteArray_GET_SIZE(self);
150}
151
152char *
153PyByteArray_AsString(PyObject *self)
154{
155 assert(self != NULL);
156 assert(PyByteArray_Check(self));
157
158 return PyByteArray_AS_STRING(self);
159}
160
161int
162PyByteArray_Resize(PyObject *self, Py_ssize_t size)
163{
164 void *sval;
165 Py_ssize_t alloc = ((PyByteArrayObject *)self)->ob_alloc;
166
167 assert(self != NULL);
168 assert(PyByteArray_Check(self));
169 assert(size >= 0);
170
171 if (size < alloc / 2) {
172 /* Major downsize; resize down to exact size */
173 alloc = size + 1;
174 }
175 else if (size < alloc) {
176 /* Within allocated size; quick exit */
177 Py_SIZE(self) = size;
178 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
179 return 0;
180 }
181 else if (size <= alloc * 1.125) {
182 /* Moderate upsize; overallocate similar to list_resize() */
183 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
184 }
185 else {
186 /* Major upsize; resize up to exact size */
187 alloc = size + 1;
188 }
189
190 if (((PyByteArrayObject *)self)->ob_exports > 0) {
191 /*
192 fprintf(stderr, "%d: %s", ((PyByteArrayObject *)self)->ob_exports,
193 ((PyByteArrayObject *)self)->ob_bytes);
194 */
195 PyErr_SetString(PyExc_BufferError,
196 "Existing exports of data: object cannot be re-sized");
197 return -1;
198 }
199
200 sval = PyMem_Realloc(((PyByteArrayObject *)self)->ob_bytes, alloc);
201 if (sval == NULL) {
202 PyErr_NoMemory();
203 return -1;
204 }
205
206 ((PyByteArrayObject *)self)->ob_bytes = sval;
207 Py_SIZE(self) = size;
208 ((PyByteArrayObject *)self)->ob_alloc = alloc;
209 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
210
211 return 0;
212}
213
214PyObject *
215PyByteArray_Concat(PyObject *a, PyObject *b)
216{
217 Py_ssize_t size;
218 Py_buffer va, vb;
219 PyByteArrayObject *result = NULL;
220
221 va.len = -1;
222 vb.len = -1;
223 if (_getbuffer(a, &va) < 0 ||
224 _getbuffer(b, &vb) < 0) {
225 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
226 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
227 goto done;
228 }
229
230 size = va.len + vb.len;
231 if (size < 0) {
232 return PyErr_NoMemory();
233 goto done;
234 }
235
236 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, size);
237 if (result != NULL) {
238 memcpy(result->ob_bytes, va.buf, va.len);
239 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
240 }
241
242 done:
243 if (va.len != -1)
244 PyObject_ReleaseBuffer(a, &va);
245 if (vb.len != -1)
246 PyObject_ReleaseBuffer(b, &vb);
247 return (PyObject *)result;
248}
249
250/* Functions stuffed into the type object */
251
252static Py_ssize_t
253bytes_length(PyByteArrayObject *self)
254{
255 return Py_SIZE(self);
256}
257
258static PyObject *
259bytes_iconcat(PyByteArrayObject *self, PyObject *other)
260{
261 Py_ssize_t mysize;
262 Py_ssize_t size;
263 Py_buffer vo;
264
265 if (_getbuffer(other, &vo) < 0) {
266 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
267 Py_TYPE(other)->tp_name, Py_TYPE(self)->tp_name);
268 return NULL;
269 }
270
271 mysize = Py_SIZE(self);
272 size = mysize + vo.len;
273 if (size < 0) {
274 PyObject_ReleaseBuffer(other, &vo);
275 return PyErr_NoMemory();
276 }
277 if (size < self->ob_alloc) {
278 Py_SIZE(self) = size;
279 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
280 }
281 else if (PyByteArray_Resize((PyObject *)self, size) < 0) {
282 PyObject_ReleaseBuffer(other, &vo);
283 return NULL;
284 }
285 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
286 PyObject_ReleaseBuffer(other, &vo);
287 Py_INCREF(self);
288 return (PyObject *)self;
289}
290
291static PyObject *
292bytes_repeat(PyByteArrayObject *self, Py_ssize_t count)
293{
294 PyByteArrayObject *result;
295 Py_ssize_t mysize;
296 Py_ssize_t size;
297
298 if (count < 0)
299 count = 0;
300 mysize = Py_SIZE(self);
301 size = mysize * count;
302 if (count != 0 && size / count != mysize)
303 return PyErr_NoMemory();
304 result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size);
305 if (result != NULL && size != 0) {
306 if (mysize == 1)
307 memset(result->ob_bytes, self->ob_bytes[0], size);
308 else {
309 Py_ssize_t i;
310 for (i = 0; i < count; i++)
311 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
312 }
313 }
314 return (PyObject *)result;
315}
316
317static PyObject *
318bytes_irepeat(PyByteArrayObject *self, Py_ssize_t count)
319{
320 Py_ssize_t mysize;
321 Py_ssize_t size;
322
323 if (count < 0)
324 count = 0;
325 mysize = Py_SIZE(self);
326 size = mysize * count;
327 if (count != 0 && size / count != mysize)
328 return PyErr_NoMemory();
329 if (size < self->ob_alloc) {
330 Py_SIZE(self) = size;
331 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
332 }
333 else if (PyByteArray_Resize((PyObject *)self, size) < 0)
334 return NULL;
335
336 if (mysize == 1)
337 memset(self->ob_bytes, self->ob_bytes[0], size);
338 else {
339 Py_ssize_t i;
340 for (i = 1; i < count; i++)
341 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
342 }
343
344 Py_INCREF(self);
345 return (PyObject *)self;
346}
347
348static PyObject *
349bytes_getitem(PyByteArrayObject *self, Py_ssize_t i)
350{
351 if (i < 0)
352 i += Py_SIZE(self);
353 if (i < 0 || i >= Py_SIZE(self)) {
354 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
355 return NULL;
356 }
357 return PyLong_FromLong((unsigned char)(self->ob_bytes[i]));
358}
359
360static PyObject *
361bytes_subscript(PyByteArrayObject *self, PyObject *item)
362{
363 if (PyIndex_Check(item)) {
364 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
365
366 if (i == -1 && PyErr_Occurred())
367 return NULL;
368
369 if (i < 0)
370 i += PyByteArray_GET_SIZE(self);
371
372 if (i < 0 || i >= Py_SIZE(self)) {
373 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
374 return NULL;
375 }
376 return PyLong_FromLong((unsigned char)(self->ob_bytes[i]));
377 }
378 else if (PySlice_Check(item)) {
379 Py_ssize_t start, stop, step, slicelength, cur, i;
380 if (PySlice_GetIndicesEx((PySliceObject *)item,
381 PyByteArray_GET_SIZE(self),
382 &start, &stop, &step, &slicelength) < 0) {
383 return NULL;
384 }
385
386 if (slicelength <= 0)
387 return PyByteArray_FromStringAndSize("", 0);
388 else if (step == 1) {
389 return PyByteArray_FromStringAndSize(self->ob_bytes + start,
390 slicelength);
391 }
392 else {
393 char *source_buf = PyByteArray_AS_STRING(self);
394 char *result_buf = (char *)PyMem_Malloc(slicelength);
395 PyObject *result;
396
397 if (result_buf == NULL)
398 return PyErr_NoMemory();
399
400 for (cur = start, i = 0; i < slicelength;
401 cur += step, i++) {
402 result_buf[i] = source_buf[cur];
403 }
404 result = PyByteArray_FromStringAndSize(result_buf, slicelength);
405 PyMem_Free(result_buf);
406 return result;
407 }
408 }
409 else {
410 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integers");
411 return NULL;
412 }
413}
414
415static int
416bytes_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi,
417 PyObject *values)
418{
419 Py_ssize_t avail, needed;
420 void *bytes;
421 Py_buffer vbytes;
422 int res = 0;
423
424 vbytes.len = -1;
425 if (values == (PyObject *)self) {
426 /* Make a copy and call this function recursively */
427 int err;
428 values = PyByteArray_FromObject(values);
429 if (values == NULL)
430 return -1;
431 err = bytes_setslice(self, lo, hi, values);
432 Py_DECREF(values);
433 return err;
434 }
435 if (values == NULL) {
436 /* del b[lo:hi] */
437 bytes = NULL;
438 needed = 0;
439 }
440 else {
441 if (_getbuffer(values, &vbytes) < 0) {
442 PyErr_Format(PyExc_TypeError,
443 "can't set bytes slice from %.100s",
444 Py_TYPE(values)->tp_name);
445 return -1;
446 }
447 needed = vbytes.len;
448 bytes = vbytes.buf;
449 }
450
451 if (lo < 0)
452 lo = 0;
453 if (hi < lo)
454 hi = lo;
455 if (hi > Py_SIZE(self))
456 hi = Py_SIZE(self);
457
458 avail = hi - lo;
459 if (avail < 0)
460 lo = hi = avail = 0;
461
462 if (avail != needed) {
463 if (avail > needed) {
464 /*
465 0 lo hi old_size
466 | |<----avail----->|<-----tomove------>|
467 | |<-needed->|<-----tomove------>|
468 0 lo new_hi new_size
469 */
470 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
471 Py_SIZE(self) - hi);
472 }
473 /* XXX(nnorwitz): need to verify this can't overflow! */
474 if (PyByteArray_Resize((PyObject *)self,
475 Py_SIZE(self) + needed - avail) < 0) {
476 res = -1;
477 goto finish;
478 }
479 if (avail < needed) {
480 /*
481 0 lo hi old_size
482 | |<-avail->|<-----tomove------>|
483 | |<----needed---->|<-----tomove------>|
484 0 lo new_hi new_size
485 */
486 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
487 Py_SIZE(self) - lo - needed);
488 }
489 }
490
491 if (needed > 0)
492 memcpy(self->ob_bytes + lo, bytes, needed);
493
494
495 finish:
496 if (vbytes.len != -1)
497 PyObject_ReleaseBuffer(values, &vbytes);
498 return res;
499}
500
501static int
502bytes_setitem(PyByteArrayObject *self, Py_ssize_t i, PyObject *value)
503{
504 Py_ssize_t ival;
505
506 if (i < 0)
507 i += Py_SIZE(self);
508
509 if (i < 0 || i >= Py_SIZE(self)) {
510 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
511 return -1;
512 }
513
514 if (value == NULL)
515 return bytes_setslice(self, i, i+1, NULL);
516
517 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
518 if (ival == -1 && PyErr_Occurred())
519 return -1;
520
521 if (ival < 0 || ival >= 256) {
522 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
523 return -1;
524 }
525
526 self->ob_bytes[i] = ival;
527 return 0;
528}
529
530static int
531bytes_ass_subscript(PyByteArrayObject *self, PyObject *item, PyObject *values)
532{
533 Py_ssize_t start, stop, step, slicelen, needed;
534 char *bytes;
535
536 if (PyIndex_Check(item)) {
537 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
538
539 if (i == -1 && PyErr_Occurred())
540 return -1;
541
542 if (i < 0)
543 i += PyByteArray_GET_SIZE(self);
544
545 if (i < 0 || i >= Py_SIZE(self)) {
546 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
547 return -1;
548 }
549
550 if (values == NULL) {
551 /* Fall through to slice assignment */
552 start = i;
553 stop = i + 1;
554 step = 1;
555 slicelen = 1;
556 }
557 else {
558 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
559 if (ival == -1 && PyErr_Occurred())
560 return -1;
561 if (ival < 0 || ival >= 256) {
562 PyErr_SetString(PyExc_ValueError,
563 "byte must be in range(0, 256)");
564 return -1;
565 }
566 self->ob_bytes[i] = (char)ival;
567 return 0;
568 }
569 }
570 else if (PySlice_Check(item)) {
571 if (PySlice_GetIndicesEx((PySliceObject *)item,
572 PyByteArray_GET_SIZE(self),
573 &start, &stop, &step, &slicelen) < 0) {
574 return -1;
575 }
576 }
577 else {
578 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integer");
579 return -1;
580 }
581
582 if (values == NULL) {
583 bytes = NULL;
584 needed = 0;
585 }
586 else if (values == (PyObject *)self || !PyByteArray_Check(values)) {
587 /* Make a copy an call this function recursively */
588 int err;
589 values = PyByteArray_FromObject(values);
590 if (values == NULL)
591 return -1;
592 err = bytes_ass_subscript(self, item, values);
593 Py_DECREF(values);
594 return err;
595 }
596 else {
597 assert(PyByteArray_Check(values));
598 bytes = ((PyByteArrayObject *)values)->ob_bytes;
599 needed = Py_SIZE(values);
600 }
601 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
602 if ((step < 0 && start < stop) ||
603 (step > 0 && start > stop))
604 stop = start;
605 if (step == 1) {
606 if (slicelen != needed) {
607 if (slicelen > needed) {
608 /*
609 0 start stop old_size
610 | |<---slicelen--->|<-----tomove------>|
611 | |<-needed->|<-----tomove------>|
612 0 lo new_hi new_size
613 */
614 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
615 Py_SIZE(self) - stop);
616 }
617 if (PyByteArray_Resize((PyObject *)self,
618 Py_SIZE(self) + needed - slicelen) < 0)
619 return -1;
620 if (slicelen < needed) {
621 /*
622 0 lo hi old_size
623 | |<-avail->|<-----tomove------>|
624 | |<----needed---->|<-----tomove------>|
625 0 lo new_hi new_size
626 */
627 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
628 Py_SIZE(self) - start - needed);
629 }
630 }
631
632 if (needed > 0)
633 memcpy(self->ob_bytes + start, bytes, needed);
634
635 return 0;
636 }
637 else {
638 if (needed == 0) {
639 /* Delete slice */
640 Py_ssize_t cur, i;
641
642 if (step < 0) {
643 stop = start + 1;
644 start = stop + step * (slicelen - 1) - 1;
645 step = -step;
646 }
647 for (cur = start, i = 0;
648 i < slicelen; cur += step, i++) {
649 Py_ssize_t lim = step - 1;
650
651 if (cur + step >= PyByteArray_GET_SIZE(self))
652 lim = PyByteArray_GET_SIZE(self) - cur - 1;
653
654 memmove(self->ob_bytes + cur - i,
655 self->ob_bytes + cur + 1, lim);
656 }
657 /* Move the tail of the bytes, in one chunk */
658 cur = start + slicelen*step;
659 if (cur < PyByteArray_GET_SIZE(self)) {
660 memmove(self->ob_bytes + cur - slicelen,
661 self->ob_bytes + cur,
662 PyByteArray_GET_SIZE(self) - cur);
663 }
664 if (PyByteArray_Resize((PyObject *)self,
665 PyByteArray_GET_SIZE(self) - slicelen) < 0)
666 return -1;
667
668 return 0;
669 }
670 else {
671 /* Assign slice */
672 Py_ssize_t cur, i;
673
674 if (needed != slicelen) {
675 PyErr_Format(PyExc_ValueError,
676 "attempt to assign bytes of size %zd "
677 "to extended slice of size %zd",
678 needed, slicelen);
679 return -1;
680 }
681 for (cur = start, i = 0; i < slicelen; cur += step, i++)
682 self->ob_bytes[cur] = bytes[i];
683 return 0;
684 }
685 }
686}
687
688static int
689bytes_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
690{
691 static char *kwlist[] = {"source", "encoding", "errors", 0};
692 PyObject *arg = NULL;
693 const char *encoding = NULL;
694 const char *errors = NULL;
695 Py_ssize_t count;
696 PyObject *it;
697 PyObject *(*iternext)(PyObject *);
698
699 if (Py_SIZE(self) != 0) {
700 /* Empty previous contents (yes, do this first of all!) */
701 if (PyByteArray_Resize((PyObject *)self, 0) < 0)
702 return -1;
703 }
704
705 /* Parse arguments */
706 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
707 &arg, &encoding, &errors))
708 return -1;
709
710 /* Make a quick exit if no first argument */
711 if (arg == NULL) {
712 if (encoding != NULL || errors != NULL) {
713 PyErr_SetString(PyExc_TypeError,
714 "encoding or errors without sequence argument");
715 return -1;
716 }
717 return 0;
718 }
719
720 if (PyUnicode_Check(arg)) {
721 /* Encode via the codec registry */
722 PyObject *encoded, *new;
723 if (encoding == NULL) {
724 PyErr_SetString(PyExc_TypeError,
725 "string argument without an encoding");
726 return -1;
727 }
728 encoded = PyCodec_Encode(arg, encoding, errors);
729 if (encoded == NULL)
730 return -1;
731 assert(PyBytes_Check(encoded));
732 new = bytes_iconcat(self, encoded);
733 Py_DECREF(encoded);
734 if (new == NULL)
735 return -1;
736 Py_DECREF(new);
737 return 0;
738 }
739
740 /* If it's not unicode, there can't be encoding or errors */
741 if (encoding != NULL || errors != NULL) {
742 PyErr_SetString(PyExc_TypeError,
743 "encoding or errors without a string argument");
744 return -1;
745 }
746
747 /* Is it an int? */
748 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
749 if (count == -1 && PyErr_Occurred())
750 PyErr_Clear();
751 else {
752 if (count < 0) {
753 PyErr_SetString(PyExc_ValueError, "negative count");
754 return -1;
755 }
756 if (count > 0) {
757 if (PyByteArray_Resize((PyObject *)self, count))
758 return -1;
759 memset(self->ob_bytes, 0, count);
760 }
761 return 0;
762 }
763
764 /* Use the buffer API */
765 if (PyObject_CheckBuffer(arg)) {
766 Py_ssize_t size;
767 Py_buffer view;
768 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
769 return -1;
770 size = view.len;
771 if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail;
772 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
773 goto fail;
774 PyObject_ReleaseBuffer(arg, &view);
775 return 0;
776 fail:
777 PyObject_ReleaseBuffer(arg, &view);
778 return -1;
779 }
780
781 /* XXX Optimize this if the arguments is a list, tuple */
782
783 /* Get the iterator */
784 it = PyObject_GetIter(arg);
785 if (it == NULL)
786 return -1;
787 iternext = *Py_TYPE(it)->tp_iternext;
788
789 /* Run the iterator to exhaustion */
790 for (;;) {
791 PyObject *item;
792 Py_ssize_t value;
793
794 /* Get the next item */
795 item = iternext(it);
796 if (item == NULL) {
797 if (PyErr_Occurred()) {
798 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
799 goto error;
800 PyErr_Clear();
801 }
802 break;
803 }
804
805 /* Interpret it as an int (__index__) */
806 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
807 Py_DECREF(item);
808 if (value == -1 && PyErr_Occurred())
809 goto error;
810
811 /* Range check */
812 if (value < 0 || value >= 256) {
813 PyErr_SetString(PyExc_ValueError,
814 "bytes must be in range(0, 256)");
815 goto error;
816 }
817
818 /* Append the byte */
819 if (Py_SIZE(self) < self->ob_alloc)
820 Py_SIZE(self)++;
821 else if (PyByteArray_Resize((PyObject *)self, Py_SIZE(self)+1) < 0)
822 goto error;
823 self->ob_bytes[Py_SIZE(self)-1] = value;
824 }
825
826 /* Clean up and return success */
827 Py_DECREF(it);
828 return 0;
829
830 error:
831 /* Error handling when it != NULL */
832 Py_DECREF(it);
833 return -1;
834}
835
836/* Mostly copied from string_repr, but without the
837 "smart quote" functionality. */
838static PyObject *
839bytes_repr(PyByteArrayObject *self)
840{
841 static const char *hexdigits = "0123456789abcdef";
842 const char *quote_prefix = "bytearray(b";
843 const char *quote_postfix = ")";
844 Py_ssize_t length = Py_SIZE(self);
845 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
846 size_t newsize = 14 + 4 * length;
847 PyObject *v;
848 if (newsize > PY_SSIZE_T_MAX || newsize / 4 - 3 != length) {
849 PyErr_SetString(PyExc_OverflowError,
850 "bytearray object is too large to make repr");
851 return NULL;
852 }
853 v = PyUnicode_FromUnicode(NULL, newsize);
854 if (v == NULL) {
855 return NULL;
856 }
857 else {
858 register Py_ssize_t i;
859 register Py_UNICODE c;
860 register Py_UNICODE *p;
861 int quote;
862
863 /* Figure out which quote to use; single is preferred */
864 quote = '\'';
865 {
866 char *test, *start;
867 start = PyByteArray_AS_STRING(self);
868 for (test = start; test < start+length; ++test) {
869 if (*test == '"') {
870 quote = '\''; /* back to single */
871 goto decided;
872 }
873 else if (*test == '\'')
874 quote = '"';
875 }
876 decided:
877 ;
878 }
879
880 p = PyUnicode_AS_UNICODE(v);
881 while (*quote_prefix)
882 *p++ = *quote_prefix++;
883 *p++ = quote;
884
885 for (i = 0; i < length; i++) {
886 /* There's at least enough room for a hex escape
887 and a closing quote. */
888 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
889 c = self->ob_bytes[i];
890 if (c == '\'' || c == '\\')
891 *p++ = '\\', *p++ = c;
892 else if (c == '\t')
893 *p++ = '\\', *p++ = 't';
894 else if (c == '\n')
895 *p++ = '\\', *p++ = 'n';
896 else if (c == '\r')
897 *p++ = '\\', *p++ = 'r';
898 else if (c == 0)
899 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
900 else if (c < ' ' || c >= 0x7f) {
901 *p++ = '\\';
902 *p++ = 'x';
903 *p++ = hexdigits[(c & 0xf0) >> 4];
904 *p++ = hexdigits[c & 0xf];
905 }
906 else
907 *p++ = c;
908 }
909 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
910 *p++ = quote;
911 while (*quote_postfix) {
912 *p++ = *quote_postfix++;
913 }
914 *p = '\0';
915 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
916 Py_DECREF(v);
917 return NULL;
918 }
919 return v;
920 }
921}
922
923static PyObject *
924bytes_str(PyObject *op)
925{
926 if (Py_BytesWarningFlag) {
927 if (PyErr_WarnEx(PyExc_BytesWarning,
928 "str() on a bytearray instance", 1))
929 return NULL;
930 }
931 return bytes_repr((PyByteArrayObject*)op);
932}
933
934static PyObject *
935bytes_richcompare(PyObject *self, PyObject *other, int op)
936{
937 Py_ssize_t self_size, other_size;
938 Py_buffer self_bytes, other_bytes;
939 PyObject *res;
940 Py_ssize_t minsize;
941 int cmp;
942
943 /* Bytes can be compared to anything that supports the (binary)
944 buffer API. Except that a comparison with Unicode is always an
945 error, even if the comparison is for equality. */
946 if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
947 PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
948 if (Py_BytesWarningFlag && op == Py_EQ) {
949 if (PyErr_WarnEx(PyExc_BytesWarning,
950 "Comparsion between bytearray and string", 1))
951 return NULL;
952 }
953
954 Py_INCREF(Py_NotImplemented);
955 return Py_NotImplemented;
956 }
957
958 self_size = _getbuffer(self, &self_bytes);
959 if (self_size < 0) {
960 PyErr_Clear();
961 Py_INCREF(Py_NotImplemented);
962 return Py_NotImplemented;
963 }
964
965 other_size = _getbuffer(other, &other_bytes);
966 if (other_size < 0) {
967 PyErr_Clear();
968 PyObject_ReleaseBuffer(self, &self_bytes);
969 Py_INCREF(Py_NotImplemented);
970 return Py_NotImplemented;
971 }
972
973 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
974 /* Shortcut: if the lengths differ, the objects differ */
975 cmp = (op == Py_NE);
976 }
977 else {
978 minsize = self_size;
979 if (other_size < minsize)
980 minsize = other_size;
981
982 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
983 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
984
985 if (cmp == 0) {
986 if (self_size < other_size)
987 cmp = -1;
988 else if (self_size > other_size)
989 cmp = 1;
990 }
991
992 switch (op) {
993 case Py_LT: cmp = cmp < 0; break;
994 case Py_LE: cmp = cmp <= 0; break;
995 case Py_EQ: cmp = cmp == 0; break;
996 case Py_NE: cmp = cmp != 0; break;
997 case Py_GT: cmp = cmp > 0; break;
998 case Py_GE: cmp = cmp >= 0; break;
999 }
1000 }
1001
1002 res = cmp ? Py_True : Py_False;
1003 PyObject_ReleaseBuffer(self, &self_bytes);
1004 PyObject_ReleaseBuffer(other, &other_bytes);
1005 Py_INCREF(res);
1006 return res;
1007}
1008
1009static void
1010bytes_dealloc(PyByteArrayObject *self)
1011{
1012 if (self->ob_bytes != 0) {
1013 PyMem_Free(self->ob_bytes);
1014 }
1015 Py_TYPE(self)->tp_free((PyObject *)self);
1016}
1017
1018
1019/* -------------------------------------------------------------------- */
1020/* Methods */
1021
1022#define STRINGLIB_CHAR char
1023#define STRINGLIB_CMP memcmp
1024#define STRINGLIB_LEN PyByteArray_GET_SIZE
1025#define STRINGLIB_STR PyByteArray_AS_STRING
1026#define STRINGLIB_NEW PyByteArray_FromStringAndSize
1027#define STRINGLIB_EMPTY nullbytes
1028#define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
1029#define STRINGLIB_MUTABLE 1
1030
1031#include "stringlib/fastsearch.h"
1032#include "stringlib/count.h"
1033#include "stringlib/find.h"
1034#include "stringlib/partition.h"
1035#include "stringlib/ctype.h"
1036#include "stringlib/transmogrify.h"
1037
1038
1039/* The following Py_LOCAL_INLINE and Py_LOCAL functions
1040were copied from the old char* style string object. */
1041
1042Py_LOCAL_INLINE(void)
1043_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1044{
1045 if (*end > len)
1046 *end = len;
1047 else if (*end < 0)
1048 *end += len;
1049 if (*end < 0)
1050 *end = 0;
1051 if (*start < 0)
1052 *start += len;
1053 if (*start < 0)
1054 *start = 0;
1055}
1056
1057
1058Py_LOCAL_INLINE(Py_ssize_t)
1059bytes_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
1060{
1061 PyObject *subobj;
1062 Py_buffer subbuf;
1063 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1064 Py_ssize_t res;
1065
1066 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1067 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1068 return -2;
1069 if (_getbuffer(subobj, &subbuf) < 0)
1070 return -2;
1071 if (dir > 0)
1072 res = stringlib_find_slice(
1073 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1074 subbuf.buf, subbuf.len, start, end);
1075 else
1076 res = stringlib_rfind_slice(
1077 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1078 subbuf.buf, subbuf.len, start, end);
1079 PyObject_ReleaseBuffer(subobj, &subbuf);
1080 return res;
1081}
1082
1083PyDoc_STRVAR(find__doc__,
1084"B.find(sub [,start [,end]]) -> int\n\
1085\n\
1086Return the lowest index in B where subsection sub is found,\n\
1087such that sub is contained within s[start,end]. Optional\n\
1088arguments start and end are interpreted as in slice notation.\n\
1089\n\
1090Return -1 on failure.");
1091
1092static PyObject *
1093bytes_find(PyByteArrayObject *self, PyObject *args)
1094{
1095 Py_ssize_t result = bytes_find_internal(self, args, +1);
1096 if (result == -2)
1097 return NULL;
1098 return PyLong_FromSsize_t(result);
1099}
1100
1101PyDoc_STRVAR(count__doc__,
1102"B.count(sub [,start [,end]]) -> int\n\
1103\n\
1104Return the number of non-overlapping occurrences of subsection sub in\n\
1105bytes B[start:end]. Optional arguments start and end are interpreted\n\
1106as in slice notation.");
1107
1108static PyObject *
1109bytes_count(PyByteArrayObject *self, PyObject *args)
1110{
1111 PyObject *sub_obj;
1112 const char *str = PyByteArray_AS_STRING(self);
1113 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1114 Py_buffer vsub;
1115 PyObject *count_obj;
1116
1117 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1118 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1119 return NULL;
1120
1121 if (_getbuffer(sub_obj, &vsub) < 0)
1122 return NULL;
1123
1124 _adjust_indices(&start, &end, PyByteArray_GET_SIZE(self));
1125
1126 count_obj = PyLong_FromSsize_t(
1127 stringlib_count(str + start, end - start, vsub.buf, vsub.len)
1128 );
1129 PyObject_ReleaseBuffer(sub_obj, &vsub);
1130 return count_obj;
1131}
1132
1133
1134PyDoc_STRVAR(index__doc__,
1135"B.index(sub [,start [,end]]) -> int\n\
1136\n\
1137Like B.find() but raise ValueError when the subsection is not found.");
1138
1139static PyObject *
1140bytes_index(PyByteArrayObject *self, PyObject *args)
1141{
1142 Py_ssize_t result = bytes_find_internal(self, args, +1);
1143 if (result == -2)
1144 return NULL;
1145 if (result == -1) {
1146 PyErr_SetString(PyExc_ValueError,
1147 "subsection not found");
1148 return NULL;
1149 }
1150 return PyLong_FromSsize_t(result);
1151}
1152
1153
1154PyDoc_STRVAR(rfind__doc__,
1155"B.rfind(sub [,start [,end]]) -> int\n\
1156\n\
1157Return the highest index in B where subsection sub is found,\n\
1158such that sub is contained within s[start,end]. Optional\n\
1159arguments start and end are interpreted as in slice notation.\n\
1160\n\
1161Return -1 on failure.");
1162
1163static PyObject *
1164bytes_rfind(PyByteArrayObject *self, PyObject *args)
1165{
1166 Py_ssize_t result = bytes_find_internal(self, args, -1);
1167 if (result == -2)
1168 return NULL;
1169 return PyLong_FromSsize_t(result);
1170}
1171
1172
1173PyDoc_STRVAR(rindex__doc__,
1174"B.rindex(sub [,start [,end]]) -> int\n\
1175\n\
1176Like B.rfind() but raise ValueError when the subsection is not found.");
1177
1178static PyObject *
1179bytes_rindex(PyByteArrayObject *self, PyObject *args)
1180{
1181 Py_ssize_t result = bytes_find_internal(self, args, -1);
1182 if (result == -2)
1183 return NULL;
1184 if (result == -1) {
1185 PyErr_SetString(PyExc_ValueError,
1186 "subsection not found");
1187 return NULL;
1188 }
1189 return PyLong_FromSsize_t(result);
1190}
1191
1192
1193static int
1194bytes_contains(PyObject *self, PyObject *arg)
1195{
1196 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1197 if (ival == -1 && PyErr_Occurred()) {
1198 Py_buffer varg;
1199 int pos;
1200 PyErr_Clear();
1201 if (_getbuffer(arg, &varg) < 0)
1202 return -1;
1203 pos = stringlib_find(PyByteArray_AS_STRING(self), Py_SIZE(self),
1204 varg.buf, varg.len, 0);
1205 PyObject_ReleaseBuffer(arg, &varg);
1206 return pos >= 0;
1207 }
1208 if (ival < 0 || ival >= 256) {
1209 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1210 return -1;
1211 }
1212
1213 return memchr(PyByteArray_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
1214}
1215
1216
1217/* Matches the end (direction >= 0) or start (direction < 0) of self
1218 * against substr, using the start and end arguments. Returns
1219 * -1 on error, 0 if not found and 1 if found.
1220 */
1221Py_LOCAL(int)
1222_bytes_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start,
1223 Py_ssize_t end, int direction)
1224{
1225 Py_ssize_t len = PyByteArray_GET_SIZE(self);
1226 const char* str;
1227 Py_buffer vsubstr;
1228 int rv = 0;
1229
1230 str = PyByteArray_AS_STRING(self);
1231
1232 if (_getbuffer(substr, &vsubstr) < 0)
1233 return -1;
1234
1235 _adjust_indices(&start, &end, len);
1236
1237 if (direction < 0) {
1238 /* startswith */
1239 if (start+vsubstr.len > len) {
1240 goto done;
1241 }
1242 } else {
1243 /* endswith */
1244 if (end-start < vsubstr.len || start > len) {
1245 goto done;
1246 }
1247
1248 if (end-vsubstr.len > start)
1249 start = end - vsubstr.len;
1250 }
1251 if (end-start >= vsubstr.len)
1252 rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1253
1254done:
1255 PyObject_ReleaseBuffer(substr, &vsubstr);
1256 return rv;
1257}
1258
1259
1260PyDoc_STRVAR(startswith__doc__,
1261"B.startswith(prefix [,start [,end]]) -> bool\n\
1262\n\
1263Return True if B starts with the specified prefix, False otherwise.\n\
1264With optional start, test B beginning at that position.\n\
1265With optional end, stop comparing B at that position.\n\
1266prefix can also be a tuple of strings to try.");
1267
1268static PyObject *
1269bytes_startswith(PyByteArrayObject *self, PyObject *args)
1270{
1271 Py_ssize_t start = 0;
1272 Py_ssize_t end = PY_SSIZE_T_MAX;
1273 PyObject *subobj;
1274 int result;
1275
1276 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1277 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1278 return NULL;
1279 if (PyTuple_Check(subobj)) {
1280 Py_ssize_t i;
1281 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1282 result = _bytes_tailmatch(self,
1283 PyTuple_GET_ITEM(subobj, i),
1284 start, end, -1);
1285 if (result == -1)
1286 return NULL;
1287 else if (result) {
1288 Py_RETURN_TRUE;
1289 }
1290 }
1291 Py_RETURN_FALSE;
1292 }
1293 result = _bytes_tailmatch(self, subobj, start, end, -1);
1294 if (result == -1)
1295 return NULL;
1296 else
1297 return PyBool_FromLong(result);
1298}
1299
1300PyDoc_STRVAR(endswith__doc__,
1301"B.endswith(suffix [,start [,end]]) -> bool\n\
1302\n\
1303Return True if B ends with the specified suffix, False otherwise.\n\
1304With optional start, test B beginning at that position.\n\
1305With optional end, stop comparing B at that position.\n\
1306suffix can also be a tuple of strings to try.");
1307
1308static PyObject *
1309bytes_endswith(PyByteArrayObject *self, PyObject *args)
1310{
1311 Py_ssize_t start = 0;
1312 Py_ssize_t end = PY_SSIZE_T_MAX;
1313 PyObject *subobj;
1314 int result;
1315
1316 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1317 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1318 return NULL;
1319 if (PyTuple_Check(subobj)) {
1320 Py_ssize_t i;
1321 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1322 result = _bytes_tailmatch(self,
1323 PyTuple_GET_ITEM(subobj, i),
1324 start, end, +1);
1325 if (result == -1)
1326 return NULL;
1327 else if (result) {
1328 Py_RETURN_TRUE;
1329 }
1330 }
1331 Py_RETURN_FALSE;
1332 }
1333 result = _bytes_tailmatch(self, subobj, start, end, +1);
1334 if (result == -1)
1335 return NULL;
1336 else
1337 return PyBool_FromLong(result);
1338}
1339
1340
1341PyDoc_STRVAR(translate__doc__,
1342"B.translate(table[, deletechars]) -> bytearray\n\
1343\n\
1344Return a copy of B, where all characters occurring in the\n\
1345optional argument deletechars are removed, and the remaining\n\
1346characters have been mapped through the given translation\n\
1347table, which must be a bytes object of length 256.");
1348
1349static PyObject *
1350bytes_translate(PyByteArrayObject *self, PyObject *args)
1351{
1352 register char *input, *output;
1353 register const char *table;
1354 register Py_ssize_t i, c, changed = 0;
1355 PyObject *input_obj = (PyObject*)self;
1356 const char *output_start;
1357 Py_ssize_t inlen;
1358 PyObject *result;
1359 int trans_table[256];
1360 PyObject *tableobj, *delobj = NULL;
1361 Py_buffer vtable, vdel;
1362
1363 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1364 &tableobj, &delobj))
1365 return NULL;
1366
1367 if (_getbuffer(tableobj, &vtable) < 0)
1368 return NULL;
1369
1370 if (vtable.len != 256) {
1371 PyErr_SetString(PyExc_ValueError,
1372 "translation table must be 256 characters long");
1373 result = NULL;
1374 goto done;
1375 }
1376
1377 if (delobj != NULL) {
1378 if (_getbuffer(delobj, &vdel) < 0) {
1379 result = NULL;
1380 goto done;
1381 }
1382 }
1383 else {
1384 vdel.buf = NULL;
1385 vdel.len = 0;
1386 }
1387
1388 table = (const char *)vtable.buf;
1389 inlen = PyByteArray_GET_SIZE(input_obj);
1390 result = PyByteArray_FromStringAndSize((char *)NULL, inlen);
1391 if (result == NULL)
1392 goto done;
1393 output_start = output = PyByteArray_AsString(result);
1394 input = PyByteArray_AS_STRING(input_obj);
1395
1396 if (vdel.len == 0) {
1397 /* If no deletions are required, use faster code */
1398 for (i = inlen; --i >= 0; ) {
1399 c = Py_CHARMASK(*input++);
1400 if (Py_CHARMASK((*output++ = table[c])) != c)
1401 changed = 1;
1402 }
1403 if (changed || !PyByteArray_CheckExact(input_obj))
1404 goto done;
1405 Py_DECREF(result);
1406 Py_INCREF(input_obj);
1407 result = input_obj;
1408 goto done;
1409 }
1410
1411 for (i = 0; i < 256; i++)
1412 trans_table[i] = Py_CHARMASK(table[i]);
1413
1414 for (i = 0; i < vdel.len; i++)
1415 trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
1416
1417 for (i = inlen; --i >= 0; ) {
1418 c = Py_CHARMASK(*input++);
1419 if (trans_table[c] != -1)
1420 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1421 continue;
1422 changed = 1;
1423 }
1424 if (!changed && PyByteArray_CheckExact(input_obj)) {
1425 Py_DECREF(result);
1426 Py_INCREF(input_obj);
1427 result = input_obj;
1428 goto done;
1429 }
1430 /* Fix the size of the resulting string */
1431 if (inlen > 0)
1432 PyByteArray_Resize(result, output - output_start);
1433
1434done:
1435 PyObject_ReleaseBuffer(tableobj, &vtable);
1436 if (delobj != NULL)
1437 PyObject_ReleaseBuffer(delobj, &vdel);
1438 return result;
1439}
1440
1441
1442#define FORWARD 1
1443#define REVERSE -1
1444
1445/* find and count characters and substrings */
1446
1447#define findchar(target, target_len, c) \
1448 ((char *)memchr((const void *)(target), c, target_len))
1449
1450/* Don't call if length < 2 */
1451#define Py_STRING_MATCH(target, offset, pattern, length) \
1452 (target[offset] == pattern[0] && \
1453 target[offset+length-1] == pattern[length-1] && \
1454 !memcmp(target+offset+1, pattern+1, length-2) )
1455
1456
1457/* Bytes ops must return a string. */
1458/* If the object is subclass of bytes, create a copy */
1459Py_LOCAL(PyByteArrayObject *)
1460return_self(PyByteArrayObject *self)
1461{
1462 if (PyByteArray_CheckExact(self)) {
1463 Py_INCREF(self);
1464 return (PyByteArrayObject *)self;
1465 }
1466 return (PyByteArrayObject *)PyByteArray_FromStringAndSize(
1467 PyByteArray_AS_STRING(self),
1468 PyByteArray_GET_SIZE(self));
1469}
1470
1471Py_LOCAL_INLINE(Py_ssize_t)
1472countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
1473{
1474 Py_ssize_t count=0;
1475 const char *start=target;
1476 const char *end=target+target_len;
1477
1478 while ( (start=findchar(start, end-start, c)) != NULL ) {
1479 count++;
1480 if (count >= maxcount)
1481 break;
1482 start += 1;
1483 }
1484 return count;
1485}
1486
1487Py_LOCAL(Py_ssize_t)
1488findstring(const char *target, Py_ssize_t target_len,
1489 const char *pattern, Py_ssize_t pattern_len,
1490 Py_ssize_t start,
1491 Py_ssize_t end,
1492 int direction)
1493{
1494 if (start < 0) {
1495 start += target_len;
1496 if (start < 0)
1497 start = 0;
1498 }
1499 if (end > target_len) {
1500 end = target_len;
1501 } else if (end < 0) {
1502 end += target_len;
1503 if (end < 0)
1504 end = 0;
1505 }
1506
1507 /* zero-length substrings always match at the first attempt */
1508 if (pattern_len == 0)
1509 return (direction > 0) ? start : end;
1510
1511 end -= pattern_len;
1512
1513 if (direction < 0) {
1514 for (; end >= start; end--)
1515 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1516 return end;
1517 } else {
1518 for (; start <= end; start++)
1519 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1520 return start;
1521 }
1522 return -1;
1523}
1524
1525Py_LOCAL_INLINE(Py_ssize_t)
1526countstring(const char *target, Py_ssize_t target_len,
1527 const char *pattern, Py_ssize_t pattern_len,
1528 Py_ssize_t start,
1529 Py_ssize_t end,
1530 int direction, Py_ssize_t maxcount)
1531{
1532 Py_ssize_t count=0;
1533
1534 if (start < 0) {
1535 start += target_len;
1536 if (start < 0)
1537 start = 0;
1538 }
1539 if (end > target_len) {
1540 end = target_len;
1541 } else if (end < 0) {
1542 end += target_len;
1543 if (end < 0)
1544 end = 0;
1545 }
1546
1547 /* zero-length substrings match everywhere */
1548 if (pattern_len == 0 || maxcount == 0) {
1549 if (target_len+1 < maxcount)
1550 return target_len+1;
1551 return maxcount;
1552 }
1553
1554 end -= pattern_len;
1555 if (direction < 0) {
1556 for (; (end >= start); end--)
1557 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1558 count++;
1559 if (--maxcount <= 0) break;
1560 end -= pattern_len-1;
1561 }
1562 } else {
1563 for (; (start <= end); start++)
1564 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1565 count++;
1566 if (--maxcount <= 0)
1567 break;
1568 start += pattern_len-1;
1569 }
1570 }
1571 return count;
1572}
1573
1574
1575/* Algorithms for different cases of string replacement */
1576
1577/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1578Py_LOCAL(PyByteArrayObject *)
1579replace_interleave(PyByteArrayObject *self,
1580 const char *to_s, Py_ssize_t to_len,
1581 Py_ssize_t maxcount)
1582{
1583 char *self_s, *result_s;
1584 Py_ssize_t self_len, result_len;
1585 Py_ssize_t count, i, product;
1586 PyByteArrayObject *result;
1587
1588 self_len = PyByteArray_GET_SIZE(self);
1589
1590 /* 1 at the end plus 1 after every character */
1591 count = self_len+1;
1592 if (maxcount < count)
1593 count = maxcount;
1594
1595 /* Check for overflow */
1596 /* result_len = count * to_len + self_len; */
1597 product = count * to_len;
1598 if (product / to_len != count) {
1599 PyErr_SetString(PyExc_OverflowError,
1600 "replace string is too long");
1601 return NULL;
1602 }
1603 result_len = product + self_len;
1604 if (result_len < 0) {
1605 PyErr_SetString(PyExc_OverflowError,
1606 "replace string is too long");
1607 return NULL;
1608 }
1609
1610 if (! (result = (PyByteArrayObject *)
1611 PyByteArray_FromStringAndSize(NULL, result_len)) )
1612 return NULL;
1613
1614 self_s = PyByteArray_AS_STRING(self);
1615 result_s = PyByteArray_AS_STRING(result);
1616
1617 /* TODO: special case single character, which doesn't need memcpy */
1618
1619 /* Lay the first one down (guaranteed this will occur) */
1620 Py_MEMCPY(result_s, to_s, to_len);
1621 result_s += to_len;
1622 count -= 1;
1623
1624 for (i=0; i<count; i++) {
1625 *result_s++ = *self_s++;
1626 Py_MEMCPY(result_s, to_s, to_len);
1627 result_s += to_len;
1628 }
1629
1630 /* Copy the rest of the original string */
1631 Py_MEMCPY(result_s, self_s, self_len-i);
1632
1633 return result;
1634}
1635
1636/* Special case for deleting a single character */
1637/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1638Py_LOCAL(PyByteArrayObject *)
1639replace_delete_single_character(PyByteArrayObject *self,
1640 char from_c, Py_ssize_t maxcount)
1641{
1642 char *self_s, *result_s;
1643 char *start, *next, *end;
1644 Py_ssize_t self_len, result_len;
1645 Py_ssize_t count;
1646 PyByteArrayObject *result;
1647
1648 self_len = PyByteArray_GET_SIZE(self);
1649 self_s = PyByteArray_AS_STRING(self);
1650
1651 count = countchar(self_s, self_len, from_c, maxcount);
1652 if (count == 0) {
1653 return return_self(self);
1654 }
1655
1656 result_len = self_len - count; /* from_len == 1 */
1657 assert(result_len>=0);
1658
1659 if ( (result = (PyByteArrayObject *)
1660 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1661 return NULL;
1662 result_s = PyByteArray_AS_STRING(result);
1663
1664 start = self_s;
1665 end = self_s + self_len;
1666 while (count-- > 0) {
1667 next = findchar(start, end-start, from_c);
1668 if (next == NULL)
1669 break;
1670 Py_MEMCPY(result_s, start, next-start);
1671 result_s += (next-start);
1672 start = next+1;
1673 }
1674 Py_MEMCPY(result_s, start, end-start);
1675
1676 return result;
1677}
1678
1679/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1680
1681Py_LOCAL(PyByteArrayObject *)
1682replace_delete_substring(PyByteArrayObject *self,
1683 const char *from_s, Py_ssize_t from_len,
1684 Py_ssize_t maxcount)
1685{
1686 char *self_s, *result_s;
1687 char *start, *next, *end;
1688 Py_ssize_t self_len, result_len;
1689 Py_ssize_t count, offset;
1690 PyByteArrayObject *result;
1691
1692 self_len = PyByteArray_GET_SIZE(self);
1693 self_s = PyByteArray_AS_STRING(self);
1694
1695 count = countstring(self_s, self_len,
1696 from_s, from_len,
1697 0, self_len, 1,
1698 maxcount);
1699
1700 if (count == 0) {
1701 /* no matches */
1702 return return_self(self);
1703 }
1704
1705 result_len = self_len - (count * from_len);
1706 assert (result_len>=0);
1707
1708 if ( (result = (PyByteArrayObject *)
1709 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL )
1710 return NULL;
1711
1712 result_s = PyByteArray_AS_STRING(result);
1713
1714 start = self_s;
1715 end = self_s + self_len;
1716 while (count-- > 0) {
1717 offset = findstring(start, end-start,
1718 from_s, from_len,
1719 0, end-start, FORWARD);
1720 if (offset == -1)
1721 break;
1722 next = start + offset;
1723
1724 Py_MEMCPY(result_s, start, next-start);
1725
1726 result_s += (next-start);
1727 start = next+from_len;
1728 }
1729 Py_MEMCPY(result_s, start, end-start);
1730 return result;
1731}
1732
1733/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1734Py_LOCAL(PyByteArrayObject *)
1735replace_single_character_in_place(PyByteArrayObject *self,
1736 char from_c, char to_c,
1737 Py_ssize_t maxcount)
1738{
1739 char *self_s, *result_s, *start, *end, *next;
1740 Py_ssize_t self_len;
1741 PyByteArrayObject *result;
1742
1743 /* The result string will be the same size */
1744 self_s = PyByteArray_AS_STRING(self);
1745 self_len = PyByteArray_GET_SIZE(self);
1746
1747 next = findchar(self_s, self_len, from_c);
1748
1749 if (next == NULL) {
1750 /* No matches; return the original bytes */
1751 return return_self(self);
1752 }
1753
1754 /* Need to make a new bytes */
1755 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1756 if (result == NULL)
1757 return NULL;
1758 result_s = PyByteArray_AS_STRING(result);
1759 Py_MEMCPY(result_s, self_s, self_len);
1760
1761 /* change everything in-place, starting with this one */
1762 start = result_s + (next-self_s);
1763 *start = to_c;
1764 start++;
1765 end = result_s + self_len;
1766
1767 while (--maxcount > 0) {
1768 next = findchar(start, end-start, from_c);
1769 if (next == NULL)
1770 break;
1771 *next = to_c;
1772 start = next+1;
1773 }
1774
1775 return result;
1776}
1777
1778/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1779Py_LOCAL(PyByteArrayObject *)
1780replace_substring_in_place(PyByteArrayObject *self,
1781 const char *from_s, Py_ssize_t from_len,
1782 const char *to_s, Py_ssize_t to_len,
1783 Py_ssize_t maxcount)
1784{
1785 char *result_s, *start, *end;
1786 char *self_s;
1787 Py_ssize_t self_len, offset;
1788 PyByteArrayObject *result;
1789
1790 /* The result bytes will be the same size */
1791
1792 self_s = PyByteArray_AS_STRING(self);
1793 self_len = PyByteArray_GET_SIZE(self);
1794
1795 offset = findstring(self_s, self_len,
1796 from_s, from_len,
1797 0, self_len, FORWARD);
1798 if (offset == -1) {
1799 /* No matches; return the original bytes */
1800 return return_self(self);
1801 }
1802
1803 /* Need to make a new bytes */
1804 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1805 if (result == NULL)
1806 return NULL;
1807 result_s = PyByteArray_AS_STRING(result);
1808 Py_MEMCPY(result_s, self_s, self_len);
1809
1810 /* change everything in-place, starting with this one */
1811 start = result_s + offset;
1812 Py_MEMCPY(start, to_s, from_len);
1813 start += from_len;
1814 end = result_s + self_len;
1815
1816 while ( --maxcount > 0) {
1817 offset = findstring(start, end-start,
1818 from_s, from_len,
1819 0, end-start, FORWARD);
1820 if (offset==-1)
1821 break;
1822 Py_MEMCPY(start+offset, to_s, from_len);
1823 start += offset+from_len;
1824 }
1825
1826 return result;
1827}
1828
1829/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1830Py_LOCAL(PyByteArrayObject *)
1831replace_single_character(PyByteArrayObject *self,
1832 char from_c,
1833 const char *to_s, Py_ssize_t to_len,
1834 Py_ssize_t maxcount)
1835{
1836 char *self_s, *result_s;
1837 char *start, *next, *end;
1838 Py_ssize_t self_len, result_len;
1839 Py_ssize_t count, product;
1840 PyByteArrayObject *result;
1841
1842 self_s = PyByteArray_AS_STRING(self);
1843 self_len = PyByteArray_GET_SIZE(self);
1844
1845 count = countchar(self_s, self_len, from_c, maxcount);
1846 if (count == 0) {
1847 /* no matches, return unchanged */
1848 return return_self(self);
1849 }
1850
1851 /* use the difference between current and new, hence the "-1" */
1852 /* result_len = self_len + count * (to_len-1) */
1853 product = count * (to_len-1);
1854 if (product / (to_len-1) != count) {
1855 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1856 return NULL;
1857 }
1858 result_len = self_len + product;
1859 if (result_len < 0) {
1860 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1861 return NULL;
1862 }
1863
1864 if ( (result = (PyByteArrayObject *)
1865 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1866 return NULL;
1867 result_s = PyByteArray_AS_STRING(result);
1868
1869 start = self_s;
1870 end = self_s + self_len;
1871 while (count-- > 0) {
1872 next = findchar(start, end-start, from_c);
1873 if (next == NULL)
1874 break;
1875
1876 if (next == start) {
1877 /* replace with the 'to' */
1878 Py_MEMCPY(result_s, to_s, to_len);
1879 result_s += to_len;
1880 start += 1;
1881 } else {
1882 /* copy the unchanged old then the 'to' */
1883 Py_MEMCPY(result_s, start, next-start);
1884 result_s += (next-start);
1885 Py_MEMCPY(result_s, to_s, to_len);
1886 result_s += to_len;
1887 start = next+1;
1888 }
1889 }
1890 /* Copy the remainder of the remaining bytes */
1891 Py_MEMCPY(result_s, start, end-start);
1892
1893 return result;
1894}
1895
1896/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1897Py_LOCAL(PyByteArrayObject *)
1898replace_substring(PyByteArrayObject *self,
1899 const char *from_s, Py_ssize_t from_len,
1900 const char *to_s, Py_ssize_t to_len,
1901 Py_ssize_t maxcount)
1902{
1903 char *self_s, *result_s;
1904 char *start, *next, *end;
1905 Py_ssize_t self_len, result_len;
1906 Py_ssize_t count, offset, product;
1907 PyByteArrayObject *result;
1908
1909 self_s = PyByteArray_AS_STRING(self);
1910 self_len = PyByteArray_GET_SIZE(self);
1911
1912 count = countstring(self_s, self_len,
1913 from_s, from_len,
1914 0, self_len, FORWARD, maxcount);
1915 if (count == 0) {
1916 /* no matches, return unchanged */
1917 return return_self(self);
1918 }
1919
1920 /* Check for overflow */
1921 /* result_len = self_len + count * (to_len-from_len) */
1922 product = count * (to_len-from_len);
1923 if (product / (to_len-from_len) != count) {
1924 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1925 return NULL;
1926 }
1927 result_len = self_len + product;
1928 if (result_len < 0) {
1929 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1930 return NULL;
1931 }
1932
1933 if ( (result = (PyByteArrayObject *)
1934 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1935 return NULL;
1936 result_s = PyByteArray_AS_STRING(result);
1937
1938 start = self_s;
1939 end = self_s + self_len;
1940 while (count-- > 0) {
1941 offset = findstring(start, end-start,
1942 from_s, from_len,
1943 0, end-start, FORWARD);
1944 if (offset == -1)
1945 break;
1946 next = start+offset;
1947 if (next == start) {
1948 /* replace with the 'to' */
1949 Py_MEMCPY(result_s, to_s, to_len);
1950 result_s += to_len;
1951 start += from_len;
1952 } else {
1953 /* copy the unchanged old then the 'to' */
1954 Py_MEMCPY(result_s, start, next-start);
1955 result_s += (next-start);
1956 Py_MEMCPY(result_s, to_s, to_len);
1957 result_s += to_len;
1958 start = next+from_len;
1959 }
1960 }
1961 /* Copy the remainder of the remaining bytes */
1962 Py_MEMCPY(result_s, start, end-start);
1963
1964 return result;
1965}
1966
1967
1968Py_LOCAL(PyByteArrayObject *)
1969replace(PyByteArrayObject *self,
1970 const char *from_s, Py_ssize_t from_len,
1971 const char *to_s, Py_ssize_t to_len,
1972 Py_ssize_t maxcount)
1973{
1974 if (maxcount < 0) {
1975 maxcount = PY_SSIZE_T_MAX;
1976 } else if (maxcount == 0 || PyByteArray_GET_SIZE(self) == 0) {
1977 /* nothing to do; return the original bytes */
1978 return return_self(self);
1979 }
1980
1981 if (maxcount == 0 ||
1982 (from_len == 0 && to_len == 0)) {
1983 /* nothing to do; return the original bytes */
1984 return return_self(self);
1985 }
1986
1987 /* Handle zero-length special cases */
1988
1989 if (from_len == 0) {
1990 /* insert the 'to' bytes everywhere. */
1991 /* >>> "Python".replace("", ".") */
1992 /* '.P.y.t.h.o.n.' */
1993 return replace_interleave(self, to_s, to_len, maxcount);
1994 }
1995
1996 /* Except for "".replace("", "A") == "A" there is no way beyond this */
1997 /* point for an empty self bytes to generate a non-empty bytes */
1998 /* Special case so the remaining code always gets a non-empty bytes */
1999 if (PyByteArray_GET_SIZE(self) == 0) {
2000 return return_self(self);
2001 }
2002
2003 if (to_len == 0) {
2004 /* delete all occurances of 'from' bytes */
2005 if (from_len == 1) {
2006 return replace_delete_single_character(
2007 self, from_s[0], maxcount);
2008 } else {
2009 return replace_delete_substring(self, from_s, from_len, maxcount);
2010 }
2011 }
2012
2013 /* Handle special case where both bytes have the same length */
2014
2015 if (from_len == to_len) {
2016 if (from_len == 1) {
2017 return replace_single_character_in_place(
2018 self,
2019 from_s[0],
2020 to_s[0],
2021 maxcount);
2022 } else {
2023 return replace_substring_in_place(
2024 self, from_s, from_len, to_s, to_len, maxcount);
2025 }
2026 }
2027
2028 /* Otherwise use the more generic algorithms */
2029 if (from_len == 1) {
2030 return replace_single_character(self, from_s[0],
2031 to_s, to_len, maxcount);
2032 } else {
2033 /* len('from')>=2, len('to')>=1 */
2034 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2035 }
2036}
2037
2038
2039PyDoc_STRVAR(replace__doc__,
2040"B.replace(old, new[, count]) -> bytes\n\
2041\n\
2042Return a copy of B with all occurrences of subsection\n\
2043old replaced by new. If the optional argument count is\n\
2044given, only the first count occurrences are replaced.");
2045
2046static PyObject *
2047bytes_replace(PyByteArrayObject *self, PyObject *args)
2048{
2049 Py_ssize_t count = -1;
2050 PyObject *from, *to, *res;
2051 Py_buffer vfrom, vto;
2052
2053 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2054 return NULL;
2055
2056 if (_getbuffer(from, &vfrom) < 0)
2057 return NULL;
2058 if (_getbuffer(to, &vto) < 0) {
2059 PyObject_ReleaseBuffer(from, &vfrom);
2060 return NULL;
2061 }
2062
2063 res = (PyObject *)replace((PyByteArrayObject *) self,
2064 vfrom.buf, vfrom.len,
2065 vto.buf, vto.len, count);
2066
2067 PyObject_ReleaseBuffer(from, &vfrom);
2068 PyObject_ReleaseBuffer(to, &vto);
2069 return res;
2070}
2071
2072
2073/* Overallocate the initial list to reduce the number of reallocs for small
2074 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2075 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2076 text (roughly 11 words per line) and field delimited data (usually 1-10
2077 fields). For large strings the split algorithms are bandwidth limited
2078 so increasing the preallocation likely will not improve things.*/
2079
2080#define MAX_PREALLOC 12
2081
2082/* 5 splits gives 6 elements */
2083#define PREALLOC_SIZE(maxsplit) \
2084 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2085
2086#define SPLIT_APPEND(data, left, right) \
2087 str = PyByteArray_FromStringAndSize((data) + (left), \
2088 (right) - (left)); \
2089 if (str == NULL) \
2090 goto onError; \
2091 if (PyList_Append(list, str)) { \
2092 Py_DECREF(str); \
2093 goto onError; \
2094 } \
2095 else \
2096 Py_DECREF(str);
2097
2098#define SPLIT_ADD(data, left, right) { \
2099 str = PyByteArray_FromStringAndSize((data) + (left), \
2100 (right) - (left)); \
2101 if (str == NULL) \
2102 goto onError; \
2103 if (count < MAX_PREALLOC) { \
2104 PyList_SET_ITEM(list, count, str); \
2105 } else { \
2106 if (PyList_Append(list, str)) { \
2107 Py_DECREF(str); \
2108 goto onError; \
2109 } \
2110 else \
2111 Py_DECREF(str); \
2112 } \
2113 count++; }
2114
2115/* Always force the list to the expected size. */
2116#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
2117
2118
2119Py_LOCAL_INLINE(PyObject *)
2120split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2121{
2122 register Py_ssize_t i, j, count = 0;
2123 PyObject *str;
2124 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2125
2126 if (list == NULL)
2127 return NULL;
2128
2129 i = j = 0;
2130 while ((j < len) && (maxcount-- > 0)) {
2131 for(; j < len; j++) {
2132 /* I found that using memchr makes no difference */
2133 if (s[j] == ch) {
2134 SPLIT_ADD(s, i, j);
2135 i = j = j + 1;
2136 break;
2137 }
2138 }
2139 }
2140 if (i <= len) {
2141 SPLIT_ADD(s, i, len);
2142 }
2143 FIX_PREALLOC_SIZE(list);
2144 return list;
2145
2146 onError:
2147 Py_DECREF(list);
2148 return NULL;
2149}
2150
2151
2152Py_LOCAL_INLINE(PyObject *)
2153split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2154{
2155 register Py_ssize_t i, j, count = 0;
2156 PyObject *str;
2157 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2158
2159 if (list == NULL)
2160 return NULL;
2161
2162 for (i = j = 0; i < len; ) {
2163 /* find a token */
2164 while (i < len && ISSPACE(s[i]))
2165 i++;
2166 j = i;
2167 while (i < len && !ISSPACE(s[i]))
2168 i++;
2169 if (j < i) {
2170 if (maxcount-- <= 0)
2171 break;
2172 SPLIT_ADD(s, j, i);
2173 while (i < len && ISSPACE(s[i]))
2174 i++;
2175 j = i;
2176 }
2177 }
2178 if (j < len) {
2179 SPLIT_ADD(s, j, len);
2180 }
2181 FIX_PREALLOC_SIZE(list);
2182 return list;
2183
2184 onError:
2185 Py_DECREF(list);
2186 return NULL;
2187}
2188
2189PyDoc_STRVAR(split__doc__,
2190"B.split([sep[, maxsplit]]) -> list of bytearray\n\
2191\n\
2192Return a list of the sections in B, using sep as the delimiter.\n\
2193If sep is not given, B is split on ASCII whitespace characters\n\
2194(space, tab, return, newline, formfeed, vertical tab).\n\
2195If maxsplit is given, at most maxsplit splits are done.");
2196
2197static PyObject *
2198bytes_split(PyByteArrayObject *self, PyObject *args)
2199{
2200 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
2201 Py_ssize_t maxsplit = -1, count = 0;
2202 const char *s = PyByteArray_AS_STRING(self), *sub;
2203 PyObject *list, *str, *subobj = Py_None;
2204 Py_buffer vsub;
2205#ifdef USE_FAST
2206 Py_ssize_t pos;
2207#endif
2208
2209 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
2210 return NULL;
2211 if (maxsplit < 0)
2212 maxsplit = PY_SSIZE_T_MAX;
2213
2214 if (subobj == Py_None)
2215 return split_whitespace(s, len, maxsplit);
2216
2217 if (_getbuffer(subobj, &vsub) < 0)
2218 return NULL;
2219 sub = vsub.buf;
2220 n = vsub.len;
2221
2222 if (n == 0) {
2223 PyErr_SetString(PyExc_ValueError, "empty separator");
2224 PyObject_ReleaseBuffer(subobj, &vsub);
2225 return NULL;
2226 }
2227 if (n == 1)
2228 return split_char(s, len, sub[0], maxsplit);
2229
2230 list = PyList_New(PREALLOC_SIZE(maxsplit));
2231 if (list == NULL) {
2232 PyObject_ReleaseBuffer(subobj, &vsub);
2233 return NULL;
2234 }
2235
2236#ifdef USE_FAST
2237 i = j = 0;
2238 while (maxsplit-- > 0) {
2239 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2240 if (pos < 0)
2241 break;
2242 j = i+pos;
2243 SPLIT_ADD(s, i, j);
2244 i = j + n;
2245 }
2246#else
2247 i = j = 0;
2248 while ((j+n <= len) && (maxsplit-- > 0)) {
2249 for (; j+n <= len; j++) {
2250 if (Py_STRING_MATCH(s, j, sub, n)) {
2251 SPLIT_ADD(s, i, j);
2252 i = j = j + n;
2253 break;
2254 }
2255 }
2256 }
2257#endif
2258 SPLIT_ADD(s, i, len);
2259 FIX_PREALLOC_SIZE(list);
2260 PyObject_ReleaseBuffer(subobj, &vsub);
2261 return list;
2262
2263 onError:
2264 Py_DECREF(list);
2265 PyObject_ReleaseBuffer(subobj, &vsub);
2266 return NULL;
2267}
2268
2269/* stringlib's partition shares nullbytes in some cases.
2270 undo this, we don't want the nullbytes to be shared. */
2271static PyObject *
2272make_nullbytes_unique(PyObject *result)
2273{
2274 if (result != NULL) {
2275 int i;
2276 assert(PyTuple_Check(result));
2277 assert(PyTuple_GET_SIZE(result) == 3);
2278 for (i = 0; i < 3; i++) {
2279 if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) {
2280 PyObject *new = PyByteArray_FromStringAndSize(NULL, 0);
2281 if (new == NULL) {
2282 Py_DECREF(result);
2283 result = NULL;
2284 break;
2285 }
2286 Py_DECREF(nullbytes);
2287 PyTuple_SET_ITEM(result, i, new);
2288 }
2289 }
2290 }
2291 return result;
2292}
2293
2294PyDoc_STRVAR(partition__doc__,
2295"B.partition(sep) -> (head, sep, tail)\n\
2296\n\
2297Searches for the separator sep in B, and returns the part before it,\n\
2298the separator itself, and the part after it. If the separator is not\n\
2299found, returns B and two empty bytearray objects.");
2300
2301static PyObject *
2302bytes_partition(PyByteArrayObject *self, PyObject *sep_obj)
2303{
2304 PyObject *bytesep, *result;
2305
2306 bytesep = PyByteArray_FromObject(sep_obj);
2307 if (! bytesep)
2308 return NULL;
2309
2310 result = stringlib_partition(
2311 (PyObject*) self,
2312 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2313 bytesep,
2314 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2315 );
2316
2317 Py_DECREF(bytesep);
2318 return make_nullbytes_unique(result);
2319}
2320
2321PyDoc_STRVAR(rpartition__doc__,
2322"B.rpartition(sep) -> (tail, sep, head)\n\
2323\n\
2324Searches for the separator sep in B, starting at the end of B,\n\
2325and returns the part before it, the separator itself, and the\n\
2326part after it. If the separator is not found, returns two empty\n\
2327bytearray objects and B.");
2328
2329static PyObject *
2330bytes_rpartition(PyByteArrayObject *self, PyObject *sep_obj)
2331{
2332 PyObject *bytesep, *result;
2333
2334 bytesep = PyByteArray_FromObject(sep_obj);
2335 if (! bytesep)
2336 return NULL;
2337
2338 result = stringlib_rpartition(
2339 (PyObject*) self,
2340 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2341 bytesep,
2342 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2343 );
2344
2345 Py_DECREF(bytesep);
2346 return make_nullbytes_unique(result);
2347}
2348
2349Py_LOCAL_INLINE(PyObject *)
2350rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2351{
2352 register Py_ssize_t i, j, count=0;
2353 PyObject *str;
2354 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2355
2356 if (list == NULL)
2357 return NULL;
2358
2359 i = j = len - 1;
2360 while ((i >= 0) && (maxcount-- > 0)) {
2361 for (; i >= 0; i--) {
2362 if (s[i] == ch) {
2363 SPLIT_ADD(s, i + 1, j + 1);
2364 j = i = i - 1;
2365 break;
2366 }
2367 }
2368 }
2369 if (j >= -1) {
2370 SPLIT_ADD(s, 0, j + 1);
2371 }
2372 FIX_PREALLOC_SIZE(list);
2373 if (PyList_Reverse(list) < 0)
2374 goto onError;
2375
2376 return list;
2377
2378 onError:
2379 Py_DECREF(list);
2380 return NULL;
2381}
2382
2383Py_LOCAL_INLINE(PyObject *)
2384rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2385{
2386 register Py_ssize_t i, j, count = 0;
2387 PyObject *str;
2388 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2389
2390 if (list == NULL)
2391 return NULL;
2392
2393 for (i = j = len - 1; i >= 0; ) {
2394 /* find a token */
2395 while (i >= 0 && ISSPACE(s[i]))
2396 i--;
2397 j = i;
2398 while (i >= 0 && !ISSPACE(s[i]))
2399 i--;
2400 if (j > i) {
2401 if (maxcount-- <= 0)
2402 break;
2403 SPLIT_ADD(s, i + 1, j + 1);
2404 while (i >= 0 && ISSPACE(s[i]))
2405 i--;
2406 j = i;
2407 }
2408 }
2409 if (j >= 0) {
2410 SPLIT_ADD(s, 0, j + 1);
2411 }
2412 FIX_PREALLOC_SIZE(list);
2413 if (PyList_Reverse(list) < 0)
2414 goto onError;
2415
2416 return list;
2417
2418 onError:
2419 Py_DECREF(list);
2420 return NULL;
2421}
2422
2423PyDoc_STRVAR(rsplit__doc__,
2424"B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2425\n\
2426Return a list of the sections in B, using sep as the delimiter,\n\
2427starting at the end of B and working to the front.\n\
2428If sep is not given, B is split on ASCII whitespace characters\n\
2429(space, tab, return, newline, formfeed, vertical tab).\n\
2430If maxsplit is given, at most maxsplit splits are done.");
2431
2432static PyObject *
2433bytes_rsplit(PyByteArrayObject *self, PyObject *args)
2434{
2435 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
2436 Py_ssize_t maxsplit = -1, count = 0;
2437 const char *s = PyByteArray_AS_STRING(self), *sub;
2438 PyObject *list, *str, *subobj = Py_None;
2439 Py_buffer vsub;
2440
2441 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
2442 return NULL;
2443 if (maxsplit < 0)
2444 maxsplit = PY_SSIZE_T_MAX;
2445
2446 if (subobj == Py_None)
2447 return rsplit_whitespace(s, len, maxsplit);
2448
2449 if (_getbuffer(subobj, &vsub) < 0)
2450 return NULL;
2451 sub = vsub.buf;
2452 n = vsub.len;
2453
2454 if (n == 0) {
2455 PyErr_SetString(PyExc_ValueError, "empty separator");
2456 PyObject_ReleaseBuffer(subobj, &vsub);
2457 return NULL;
2458 }
2459 else if (n == 1)
2460 return rsplit_char(s, len, sub[0], maxsplit);
2461
2462 list = PyList_New(PREALLOC_SIZE(maxsplit));
2463 if (list == NULL) {
2464 PyObject_ReleaseBuffer(subobj, &vsub);
2465 return NULL;
2466 }
2467
2468 j = len;
2469 i = j - n;
2470
2471 while ( (i >= 0) && (maxsplit-- > 0) ) {
2472 for (; i>=0; i--) {
2473 if (Py_STRING_MATCH(s, i, sub, n)) {
2474 SPLIT_ADD(s, i + n, j);
2475 j = i;
2476 i -= n;
2477 break;
2478 }
2479 }
2480 }
2481 SPLIT_ADD(s, 0, j);
2482 FIX_PREALLOC_SIZE(list);
2483 if (PyList_Reverse(list) < 0)
2484 goto onError;
2485 PyObject_ReleaseBuffer(subobj, &vsub);
2486 return list;
2487
2488onError:
2489 Py_DECREF(list);
2490 PyObject_ReleaseBuffer(subobj, &vsub);
2491 return NULL;
2492}
2493
2494PyDoc_STRVAR(reverse__doc__,
2495"B.reverse() -> None\n\
2496\n\
2497Reverse the order of the values in B in place.");
2498static PyObject *
2499bytes_reverse(PyByteArrayObject *self, PyObject *unused)
2500{
2501 char swap, *head, *tail;
2502 Py_ssize_t i, j, n = Py_SIZE(self);
2503
2504 j = n / 2;
2505 head = self->ob_bytes;
2506 tail = head + n - 1;
2507 for (i = 0; i < j; i++) {
2508 swap = *head;
2509 *head++ = *tail;
2510 *tail-- = swap;
2511 }
2512
2513 Py_RETURN_NONE;
2514}
2515
2516PyDoc_STRVAR(insert__doc__,
2517"B.insert(index, int) -> None\n\
2518\n\
2519Insert a single item into the bytearray before the given index.");
2520static PyObject *
2521bytes_insert(PyByteArrayObject *self, PyObject *args)
2522{
2523 int value;
2524 Py_ssize_t where, n = Py_SIZE(self);
2525
2526 if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
2527 return NULL;
2528
2529 if (n == PY_SSIZE_T_MAX) {
2530 PyErr_SetString(PyExc_OverflowError,
2531 "cannot add more objects to bytes");
2532 return NULL;
2533 }
2534 if (value < 0 || value >= 256) {
2535 PyErr_SetString(PyExc_ValueError,
2536 "byte must be in range(0, 256)");
2537 return NULL;
2538 }
2539 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2540 return NULL;
2541
2542 if (where < 0) {
2543 where += n;
2544 if (where < 0)
2545 where = 0;
2546 }
2547 if (where > n)
2548 where = n;
2549 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
2550 self->ob_bytes[where] = value;
2551
2552 Py_RETURN_NONE;
2553}
2554
2555PyDoc_STRVAR(append__doc__,
2556"B.append(int) -> None\n\
2557\n\
2558Append a single item to the end of B.");
2559static PyObject *
2560bytes_append(PyByteArrayObject *self, PyObject *arg)
2561{
2562 int value;
2563 Py_ssize_t n = Py_SIZE(self);
2564
2565 if (! _getbytevalue(arg, &value))
2566 return NULL;
2567 if (n == PY_SSIZE_T_MAX) {
2568 PyErr_SetString(PyExc_OverflowError,
2569 "cannot add more objects to bytes");
2570 return NULL;
2571 }
2572 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2573 return NULL;
2574
2575 self->ob_bytes[n] = value;
2576
2577 Py_RETURN_NONE;
2578}
2579
2580PyDoc_STRVAR(extend__doc__,
2581"B.extend(iterable int) -> None\n\
2582\n\
2583Append all the elements from the iterator or sequence to the\n\
2584end of B.");
2585static PyObject *
2586bytes_extend(PyByteArrayObject *self, PyObject *arg)
2587{
2588 PyObject *it, *item, *bytes_obj;
2589 Py_ssize_t buf_size = 0, len = 0;
2590 int value;
2591 char *buf;
2592
2593 /* bytes_setslice code only accepts something supporting PEP 3118. */
2594 if (PyObject_CheckBuffer(arg)) {
2595 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), arg) == -1)
2596 return NULL;
2597
2598 Py_RETURN_NONE;
2599 }
2600
2601 it = PyObject_GetIter(arg);
2602 if (it == NULL)
2603 return NULL;
2604
2605 /* Try to determine the length of the argument. 32 is abitrary. */
2606 buf_size = _PyObject_LengthHint(arg, 32);
2607
2608 bytes_obj = PyByteArray_FromStringAndSize(NULL, buf_size);
2609 if (bytes_obj == NULL)
2610 return NULL;
2611 buf = PyByteArray_AS_STRING(bytes_obj);
2612
2613 while ((item = PyIter_Next(it)) != NULL) {
2614 if (! _getbytevalue(item, &value)) {
2615 Py_DECREF(item);
2616 Py_DECREF(it);
2617 Py_DECREF(bytes_obj);
2618 return NULL;
2619 }
2620 buf[len++] = value;
2621 Py_DECREF(item);
2622
2623 if (len >= buf_size) {
2624 buf_size = len + (len >> 1) + 1;
2625 if (PyByteArray_Resize((PyObject *)bytes_obj, buf_size) < 0) {
2626 Py_DECREF(it);
2627 Py_DECREF(bytes_obj);
2628 return NULL;
2629 }
2630 /* Recompute the `buf' pointer, since the resizing operation may
2631 have invalidated it. */
2632 buf = PyByteArray_AS_STRING(bytes_obj);
2633 }
2634 }
2635 Py_DECREF(it);
2636
2637 /* Resize down to exact size. */
2638 if (PyByteArray_Resize((PyObject *)bytes_obj, len) < 0) {
2639 Py_DECREF(bytes_obj);
2640 return NULL;
2641 }
2642
2643 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), bytes_obj) == -1)
2644 return NULL;
2645 Py_DECREF(bytes_obj);
2646
2647 Py_RETURN_NONE;
2648}
2649
2650PyDoc_STRVAR(pop__doc__,
2651"B.pop([index]) -> int\n\
2652\n\
2653Remove and return a single item from B. If no index\n\
2654argument is give, will pop the last value.");
2655static PyObject *
2656bytes_pop(PyByteArrayObject *self, PyObject *args)
2657{
2658 int value;
2659 Py_ssize_t where = -1, n = Py_SIZE(self);
2660
2661 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2662 return NULL;
2663
2664 if (n == 0) {
2665 PyErr_SetString(PyExc_OverflowError,
2666 "cannot pop an empty bytes");
2667 return NULL;
2668 }
2669 if (where < 0)
2670 where += Py_SIZE(self);
2671 if (where < 0 || where >= Py_SIZE(self)) {
2672 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2673 return NULL;
2674 }
2675
2676 value = self->ob_bytes[where];
2677 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2678 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2679 return NULL;
2680
2681 return PyLong_FromLong(value);
2682}
2683
2684PyDoc_STRVAR(remove__doc__,
2685"B.remove(int) -> None\n\
2686\n\
2687Remove the first occurance of a value in B.");
2688static PyObject *
2689bytes_remove(PyByteArrayObject *self, PyObject *arg)
2690{
2691 int value;
2692 Py_ssize_t where, n = Py_SIZE(self);
2693
2694 if (! _getbytevalue(arg, &value))
2695 return NULL;
2696
2697 for (where = 0; where < n; where++) {
2698 if (self->ob_bytes[where] == value)
2699 break;
2700 }
2701 if (where == n) {
2702 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2703 return NULL;
2704 }
2705
2706 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2707 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2708 return NULL;
2709
2710 Py_RETURN_NONE;
2711}
2712
2713/* XXX These two helpers could be optimized if argsize == 1 */
2714
2715static Py_ssize_t
2716lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2717 void *argptr, Py_ssize_t argsize)
2718{
2719 Py_ssize_t i = 0;
2720 while (i < mysize && memchr(argptr, myptr[i], argsize))
2721 i++;
2722 return i;
2723}
2724
2725static Py_ssize_t
2726rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2727 void *argptr, Py_ssize_t argsize)
2728{
2729 Py_ssize_t i = mysize - 1;
2730 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2731 i--;
2732 return i + 1;
2733}
2734
2735PyDoc_STRVAR(strip__doc__,
2736"B.strip([bytes]) -> bytearray\n\
2737\n\
2738Strip leading and trailing bytes contained in the argument.\n\
2739If the argument is omitted, strip ASCII whitespace.");
2740static PyObject *
2741bytes_strip(PyByteArrayObject *self, PyObject *args)
2742{
2743 Py_ssize_t left, right, mysize, argsize;
2744 void *myptr, *argptr;
2745 PyObject *arg = Py_None;
2746 Py_buffer varg;
2747 if (!PyArg_ParseTuple(args, "|O:strip", &arg))
2748 return NULL;
2749 if (arg == Py_None) {
2750 argptr = "\t\n\r\f\v ";
2751 argsize = 6;
2752 }
2753 else {
2754 if (_getbuffer(arg, &varg) < 0)
2755 return NULL;
2756 argptr = varg.buf;
2757 argsize = varg.len;
2758 }
2759 myptr = self->ob_bytes;
2760 mysize = Py_SIZE(self);
2761 left = lstrip_helper(myptr, mysize, argptr, argsize);
2762 if (left == mysize)
2763 right = left;
2764 else
2765 right = rstrip_helper(myptr, mysize, argptr, argsize);
2766 if (arg != Py_None)
2767 PyObject_ReleaseBuffer(arg, &varg);
2768 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2769}
2770
2771PyDoc_STRVAR(lstrip__doc__,
2772"B.lstrip([bytes]) -> bytearray\n\
2773\n\
2774Strip leading bytes contained in the argument.\n\
2775If the argument is omitted, strip leading ASCII whitespace.");
2776static PyObject *
2777bytes_lstrip(PyByteArrayObject *self, PyObject *args)
2778{
2779 Py_ssize_t left, right, mysize, argsize;
2780 void *myptr, *argptr;
2781 PyObject *arg = Py_None;
2782 Py_buffer varg;
2783 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
2784 return NULL;
2785 if (arg == Py_None) {
2786 argptr = "\t\n\r\f\v ";
2787 argsize = 6;
2788 }
2789 else {
2790 if (_getbuffer(arg, &varg) < 0)
2791 return NULL;
2792 argptr = varg.buf;
2793 argsize = varg.len;
2794 }
2795 myptr = self->ob_bytes;
2796 mysize = Py_SIZE(self);
2797 left = lstrip_helper(myptr, mysize, argptr, argsize);
2798 right = mysize;
2799 if (arg != Py_None)
2800 PyObject_ReleaseBuffer(arg, &varg);
2801 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2802}
2803
2804PyDoc_STRVAR(rstrip__doc__,
2805"B.rstrip([bytes]) -> bytearray\n\
2806\n\
2807Strip trailing bytes contained in the argument.\n\
2808If the argument is omitted, strip trailing ASCII whitespace.");
2809static PyObject *
2810bytes_rstrip(PyByteArrayObject *self, PyObject *args)
2811{
2812 Py_ssize_t left, right, mysize, argsize;
2813 void *myptr, *argptr;
2814 PyObject *arg = Py_None;
2815 Py_buffer varg;
2816 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
2817 return NULL;
2818 if (arg == Py_None) {
2819 argptr = "\t\n\r\f\v ";
2820 argsize = 6;
2821 }
2822 else {
2823 if (_getbuffer(arg, &varg) < 0)
2824 return NULL;
2825 argptr = varg.buf;
2826 argsize = varg.len;
2827 }
2828 myptr = self->ob_bytes;
2829 mysize = Py_SIZE(self);
2830 left = 0;
2831 right = rstrip_helper(myptr, mysize, argptr, argsize);
2832 if (arg != Py_None)
2833 PyObject_ReleaseBuffer(arg, &varg);
2834 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2835}
2836
2837PyDoc_STRVAR(decode_doc,
2838"B.decode([encoding[, errors]]) -> unicode object.\n\
2839\n\
2840Decodes B using the codec registered for encoding. encoding defaults\n\
2841to the default encoding. errors may be given to set a different error\n\
2842handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2843a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2844as well as any other name registered with codecs.register_error that is\n\
2845able to handle UnicodeDecodeErrors.");
2846
2847static PyObject *
2848bytes_decode(PyObject *self, PyObject *args)
2849{
2850 const char *encoding = NULL;
2851 const char *errors = NULL;
2852
2853 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2854 return NULL;
2855 if (encoding == NULL)
2856 encoding = PyUnicode_GetDefaultEncoding();
2857 return PyCodec_Decode(self, encoding, errors);
2858}
2859
2860PyDoc_STRVAR(alloc_doc,
2861"B.__alloc__() -> int\n\
2862\n\
2863Returns the number of bytes actually allocated.");
2864
2865static PyObject *
2866bytes_alloc(PyByteArrayObject *self)
2867{
2868 return PyLong_FromSsize_t(self->ob_alloc);
2869}
2870
2871PyDoc_STRVAR(join_doc,
2872"B.join(iterable_of_bytes) -> bytes\n\
2873\n\
2874Concatenates any number of bytearray objects, with B in between each pair.");
2875
2876static PyObject *
2877bytes_join(PyByteArrayObject *self, PyObject *it)
2878{
2879 PyObject *seq;
2880 Py_ssize_t mysize = Py_SIZE(self);
2881 Py_ssize_t i;
2882 Py_ssize_t n;
2883 PyObject **items;
2884 Py_ssize_t totalsize = 0;
2885 PyObject *result;
2886 char *dest;
2887
2888 seq = PySequence_Fast(it, "can only join an iterable");
2889 if (seq == NULL)
2890 return NULL;
2891 n = PySequence_Fast_GET_SIZE(seq);
2892 items = PySequence_Fast_ITEMS(seq);
2893
2894 /* Compute the total size, and check that they are all bytes */
2895 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2896 for (i = 0; i < n; i++) {
2897 PyObject *obj = items[i];
2898 if (!PyByteArray_Check(obj) && !PyBytes_Check(obj)) {
2899 PyErr_Format(PyExc_TypeError,
2900 "can only join an iterable of bytes "
2901 "(item %ld has type '%.100s')",
2902 /* XXX %ld isn't right on Win64 */
2903 (long)i, Py_TYPE(obj)->tp_name);
2904 goto error;
2905 }
2906 if (i > 0)
2907 totalsize += mysize;
2908 totalsize += Py_SIZE(obj);
2909 if (totalsize < 0) {
2910 PyErr_NoMemory();
2911 goto error;
2912 }
2913 }
2914
2915 /* Allocate the result, and copy the bytes */
2916 result = PyByteArray_FromStringAndSize(NULL, totalsize);
2917 if (result == NULL)
2918 goto error;
2919 dest = PyByteArray_AS_STRING(result);
2920 for (i = 0; i < n; i++) {
2921 PyObject *obj = items[i];
2922 Py_ssize_t size = Py_SIZE(obj);
2923 char *buf;
2924 if (PyByteArray_Check(obj))
2925 buf = PyByteArray_AS_STRING(obj);
2926 else
2927 buf = PyBytes_AS_STRING(obj);
2928 if (i) {
2929 memcpy(dest, self->ob_bytes, mysize);
2930 dest += mysize;
2931 }
2932 memcpy(dest, buf, size);
2933 dest += size;
2934 }
2935
2936 /* Done */
2937 Py_DECREF(seq);
2938 return result;
2939
2940 /* Error handling */
2941 error:
2942 Py_DECREF(seq);
2943 return NULL;
2944}
2945
2946PyDoc_STRVAR(fromhex_doc,
2947"bytearray.fromhex(string) -> bytearray\n\
2948\n\
2949Create a bytearray object from a string of hexadecimal numbers.\n\
2950Spaces between two numbers are accepted.\n\
2951Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
2952
2953static int
2954hex_digit_to_int(Py_UNICODE c)
2955{
2956 if (c >= 128)
2957 return -1;
2958 if (ISDIGIT(c))
2959 return c - '0';
2960 else {
2961 if (ISUPPER(c))
2962 c = TOLOWER(c);
2963 if (c >= 'a' && c <= 'f')
2964 return c - 'a' + 10;
2965 }
2966 return -1;
2967}
2968
2969static PyObject *
2970bytes_fromhex(PyObject *cls, PyObject *args)
2971{
2972 PyObject *newbytes, *hexobj;
2973 char *buf;
2974 Py_UNICODE *hex;
2975 Py_ssize_t hexlen, byteslen, i, j;
2976 int top, bot;
2977
2978 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2979 return NULL;
2980 assert(PyUnicode_Check(hexobj));
2981 hexlen = PyUnicode_GET_SIZE(hexobj);
2982 hex = PyUnicode_AS_UNICODE(hexobj);
2983 byteslen = hexlen/2; /* This overestimates if there are spaces */
2984 newbytes = PyByteArray_FromStringAndSize(NULL, byteslen);
2985 if (!newbytes)
2986 return NULL;
2987 buf = PyByteArray_AS_STRING(newbytes);
2988 for (i = j = 0; i < hexlen; i += 2) {
2989 /* skip over spaces in the input */
2990 while (hex[i] == ' ')
2991 i++;
2992 if (i >= hexlen)
2993 break;
2994 top = hex_digit_to_int(hex[i]);
2995 bot = hex_digit_to_int(hex[i+1]);
2996 if (top == -1 || bot == -1) {
2997 PyErr_Format(PyExc_ValueError,
2998 "non-hexadecimal number found in "
2999 "fromhex() arg at position %zd", i);
3000 goto error;
3001 }
3002 buf[j++] = (top << 4) + bot;
3003 }
3004 if (PyByteArray_Resize(newbytes, j) < 0)
3005 goto error;
3006 return newbytes;
3007
3008 error:
3009 Py_DECREF(newbytes);
3010 return NULL;
3011}
3012
3013PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3014
3015static PyObject *
3016bytes_reduce(PyByteArrayObject *self)
3017{
3018 PyObject *latin1, *dict;
3019 if (self->ob_bytes)
3020 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
3021 Py_SIZE(self), NULL);
3022 else
3023 latin1 = PyUnicode_FromString("");
3024
3025 dict = PyObject_GetAttrString((PyObject *)self, "__dict__");
3026 if (dict == NULL) {
3027 PyErr_Clear();
3028 dict = Py_None;
3029 Py_INCREF(dict);
3030 }
3031
3032 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self), latin1, "latin-1", dict);
3033}
3034
3035static PySequenceMethods bytes_as_sequence = {
3036 (lenfunc)bytes_length, /* sq_length */
3037 (binaryfunc)PyByteArray_Concat, /* sq_concat */
3038 (ssizeargfunc)bytes_repeat, /* sq_repeat */
3039 (ssizeargfunc)bytes_getitem, /* sq_item */
3040 0, /* sq_slice */
3041 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
3042 0, /* sq_ass_slice */
3043 (objobjproc)bytes_contains, /* sq_contains */
3044 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
3045 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
3046};
3047
3048static PyMappingMethods bytes_as_mapping = {
3049 (lenfunc)bytes_length,
3050 (binaryfunc)bytes_subscript,
3051 (objobjargproc)bytes_ass_subscript,
3052};
3053
3054static PyBufferProcs bytes_as_buffer = {
3055 (getbufferproc)bytes_getbuffer,
3056 (releasebufferproc)bytes_releasebuffer,
3057};
3058
3059static PyMethodDef
3060bytes_methods[] = {
3061 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
3062 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
3063 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
3064 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3065 _Py_capitalize__doc__},
3066 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3067 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
3068 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
3069 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
3070 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
3071 expandtabs__doc__},
3072 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
3073 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
3074 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
3075 fromhex_doc},
3076 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3077 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
3078 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3079 _Py_isalnum__doc__},
3080 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3081 _Py_isalpha__doc__},
3082 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3083 _Py_isdigit__doc__},
3084 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3085 _Py_islower__doc__},
3086 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3087 _Py_isspace__doc__},
3088 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3089 _Py_istitle__doc__},
3090 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3091 _Py_isupper__doc__},
3092 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
3093 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3094 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
3095 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
3096 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
3097 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
3098 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
3099 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
3100 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
3101 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3102 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3103 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
3104 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
3105 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
3106 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
3107 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
3108 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
3109 splitlines__doc__},
3110 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS ,
3111 startswith__doc__},
3112 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
3113 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3114 _Py_swapcase__doc__},
3115 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
3116 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
3117 translate__doc__},
3118 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3119 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
3120 {NULL}
3121};
3122
3123PyDoc_STRVAR(bytes_doc,
3124"bytearray(iterable_of_ints) -> bytearray.\n\
3125bytearray(string, encoding[, errors]) -> bytearray.\n\
3126bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
3127bytearray(memory_view) -> bytearray.\n\
3128\n\
3129Construct an mutable bytearray object from:\n\
3130 - an iterable yielding integers in range(256)\n\
3131 - a text string encoded using the specified encoding\n\
3132 - a bytes or a bytearray object\n\
3133 - any object implementing the buffer API.\n\
3134\n\
3135bytearray(int) -> bytearray.\n\
3136\n\
3137Construct a zero-initialized bytearray of the given length.");
3138
3139
3140static PyObject *bytes_iter(PyObject *seq);
3141
3142PyTypeObject PyByteArray_Type = {
3143 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3144 "bytearray",
3145 sizeof(PyByteArrayObject),
3146 0,
3147 (destructor)bytes_dealloc, /* tp_dealloc */
3148 0, /* tp_print */
3149 0, /* tp_getattr */
3150 0, /* tp_setattr */
3151 0, /* tp_compare */
3152 (reprfunc)bytes_repr, /* tp_repr */
3153 0, /* tp_as_number */
3154 &bytes_as_sequence, /* tp_as_sequence */
3155 &bytes_as_mapping, /* tp_as_mapping */
3156 0, /* tp_hash */
3157 0, /* tp_call */
3158 bytes_str, /* tp_str */
3159 PyObject_GenericGetAttr, /* tp_getattro */
3160 0, /* tp_setattro */
3161 &bytes_as_buffer, /* tp_as_buffer */
3162 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3163 bytes_doc, /* tp_doc */
3164 0, /* tp_traverse */
3165 0, /* tp_clear */
3166 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3167 0, /* tp_weaklistoffset */
3168 bytes_iter, /* tp_iter */
3169 0, /* tp_iternext */
3170 bytes_methods, /* tp_methods */
3171 0, /* tp_members */
3172 0, /* tp_getset */
3173 0, /* tp_base */
3174 0, /* tp_dict */
3175 0, /* tp_descr_get */
3176 0, /* tp_descr_set */
3177 0, /* tp_dictoffset */
3178 (initproc)bytes_init, /* tp_init */
3179 PyType_GenericAlloc, /* tp_alloc */
3180 PyType_GenericNew, /* tp_new */
3181 PyObject_Del, /* tp_free */
3182};
3183
3184/*********************** Bytes Iterator ****************************/
3185
3186typedef struct {
3187 PyObject_HEAD
3188 Py_ssize_t it_index;
3189 PyByteArrayObject *it_seq; /* Set to NULL when iterator is exhausted */
3190} bytesiterobject;
3191
3192static void
3193bytesiter_dealloc(bytesiterobject *it)
3194{
3195 _PyObject_GC_UNTRACK(it);
3196 Py_XDECREF(it->it_seq);
3197 PyObject_GC_Del(it);
3198}
3199
3200static int
3201bytesiter_traverse(bytesiterobject *it, visitproc visit, void *arg)
3202{
3203 Py_VISIT(it->it_seq);
3204 return 0;
3205}
3206
3207static PyObject *
3208bytesiter_next(bytesiterobject *it)
3209{
3210 PyByteArrayObject *seq;
3211 PyObject *item;
3212
3213 assert(it != NULL);
3214 seq = it->it_seq;
3215 if (seq == NULL)
3216 return NULL;
3217 assert(PyByteArray_Check(seq));
3218
3219 if (it->it_index < PyByteArray_GET_SIZE(seq)) {
3220 item = PyLong_FromLong(
3221 (unsigned char)seq->ob_bytes[it->it_index]);
3222 if (item != NULL)
3223 ++it->it_index;
3224 return item;
3225 }
3226
3227 Py_DECREF(seq);
3228 it->it_seq = NULL;
3229 return NULL;
3230}
3231
3232static PyObject *
3233bytesiter_length_hint(bytesiterobject *it)
3234{
3235 Py_ssize_t len = 0;
3236 if (it->it_seq)
3237 len = PyByteArray_GET_SIZE(it->it_seq) - it->it_index;
3238 return PyLong_FromSsize_t(len);
3239}
3240
3241PyDoc_STRVAR(length_hint_doc,
3242 "Private method returning an estimate of len(list(it)).");
3243
3244static PyMethodDef bytesiter_methods[] = {
3245 {"__length_hint__", (PyCFunction)bytesiter_length_hint, METH_NOARGS,
3246 length_hint_doc},
3247 {NULL, NULL} /* sentinel */
3248};
3249
3250PyTypeObject PyByteArrayIter_Type = {
3251 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3252 "bytearray_iterator", /* tp_name */
3253 sizeof(bytesiterobject), /* tp_basicsize */
3254 0, /* tp_itemsize */
3255 /* methods */
3256 (destructor)bytesiter_dealloc, /* tp_dealloc */
3257 0, /* tp_print */
3258 0, /* tp_getattr */
3259 0, /* tp_setattr */
3260 0, /* tp_compare */
3261 0, /* tp_repr */
3262 0, /* tp_as_number */
3263 0, /* tp_as_sequence */
3264 0, /* tp_as_mapping */
3265 0, /* tp_hash */
3266 0, /* tp_call */
3267 0, /* tp_str */
3268 PyObject_GenericGetAttr, /* tp_getattro */
3269 0, /* tp_setattro */
3270 0, /* tp_as_buffer */
3271 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3272 0, /* tp_doc */
3273 (traverseproc)bytesiter_traverse, /* tp_traverse */
3274 0, /* tp_clear */
3275 0, /* tp_richcompare */
3276 0, /* tp_weaklistoffset */
3277 PyObject_SelfIter, /* tp_iter */
3278 (iternextfunc)bytesiter_next, /* tp_iternext */
3279 bytesiter_methods, /* tp_methods */
3280 0,
3281};
3282
3283static PyObject *
3284bytes_iter(PyObject *seq)
3285{
3286 bytesiterobject *it;
3287
3288 if (!PyByteArray_Check(seq)) {
3289 PyErr_BadInternalCall();
3290 return NULL;
3291 }
3292 it = PyObject_GC_New(bytesiterobject, &PyByteArrayIter_Type);
3293 if (it == NULL)
3294 return NULL;
3295 it->it_index = 0;
3296 Py_INCREF(seq);
3297 it->it_seq = (PyByteArrayObject *)seq;
3298 _PyObject_GC_TRACK(it);
3299 return (PyObject *)it;
3300}