blob: af7a1b18d96b9a3bdf8e69033058d7fe4d1b403e [file] [log] [blame]
Christian Heimes1a6387e2008-03-26 12:49:49 +00001/* PyBytes (bytearray) implementation */
2
3#define PY_SSIZE_T_CLEAN
4#include "Python.h"
5#include "structmember.h"
6#include "bytes_methods.h"
7
8static PyBytesObject *nullbytes = NULL;
9
10void
11PyBytes_Fini(void)
12{
13 Py_CLEAR(nullbytes);
14}
15
16int
17PyBytes_Init(void)
18{
19 nullbytes = PyObject_New(PyBytesObject, &PyBytes_Type);
20 if (nullbytes == NULL)
21 return 0;
22 nullbytes->ob_bytes = NULL;
23 Py_SIZE(nullbytes) = nullbytes->ob_alloc = 0;
24 nullbytes->ob_exports = 0;
25 return 1;
26}
27
28/* end nullbytes support */
29
30/* Helpers */
31
32static int
33_getbytevalue(PyObject* arg, int *value)
34{
35 long face_value;
36
37 if (PyInt_Check(arg)) {
38 face_value = PyInt_AsLong(arg);
39 if (face_value < 0 || face_value >= 256) {
40 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
41 return 0;
42 }
43 }
44 else if (PyString_CheckExact(arg)) {
45 if (Py_SIZE(arg) != 1) {
46 PyErr_SetString(PyExc_ValueError, "string must be of size 1");
47 return 0;
48 }
49 face_value = Py_CHARMASK(((PyStringObject*)arg)->ob_sval[0]);
50 }
51 else {
52 PyErr_Format(PyExc_TypeError, "an integer or string of size 1 is required");
53 return 0;
54 }
55
56 *value = face_value;
57 return 1;
58}
59
60static Py_ssize_t
61bytes_buffer_getreadbuf(PyBytesObject *self, Py_ssize_t index, const void **ptr)
62{
63 if ( index != 0 ) {
64 PyErr_SetString(PyExc_SystemError,
65 "accessing non-existent bytes segment");
66 return -1;
67 }
68 *ptr = (void *)self->ob_bytes;
69 return Py_SIZE(self);
70}
71
72static Py_ssize_t
73bytes_buffer_getwritebuf(PyBytesObject *self, Py_ssize_t index, const void **ptr)
74{
75 if ( index != 0 ) {
76 PyErr_SetString(PyExc_SystemError,
77 "accessing non-existent bytes segment");
78 return -1;
79 }
80 *ptr = (void *)self->ob_bytes;
81 return Py_SIZE(self);
82}
83
84static Py_ssize_t
85bytes_buffer_getsegcount(PyBytesObject *self, Py_ssize_t *lenp)
86{
87 if ( lenp )
88 *lenp = Py_SIZE(self);
89 return 1;
90}
91
92static Py_ssize_t
93bytes_buffer_getcharbuf(PyBytesObject *self, Py_ssize_t index, const char **ptr)
94{
95 if ( index != 0 ) {
96 PyErr_SetString(PyExc_SystemError,
97 "accessing non-existent bytes segment");
98 return -1;
99 }
100 *ptr = self->ob_bytes;
101 return Py_SIZE(self);
102}
103
104static int
105bytes_getbuffer(PyBytesObject *obj, Py_buffer *view, int flags)
106{
107 int ret;
108 void *ptr;
109 if (view == NULL) {
110 obj->ob_exports++;
111 return 0;
112 }
113 if (obj->ob_bytes == NULL)
114 ptr = "";
115 else
116 ptr = obj->ob_bytes;
117 ret = PyBuffer_FillInfo(view, ptr, Py_SIZE(obj), 0, flags);
118 if (ret >= 0) {
119 obj->ob_exports++;
120 }
121 return ret;
122}
123
124static void
125bytes_releasebuffer(PyBytesObject *obj, Py_buffer *view)
126{
127 obj->ob_exports--;
128}
129
130static Py_ssize_t
131_getbuffer(PyObject *obj, Py_buffer *view)
132{
133 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
134
135 if (buffer == NULL || buffer->bf_getbuffer == NULL)
136 {
137 PyErr_Format(PyExc_TypeError,
138 "Type %.100s doesn't support the buffer API",
139 Py_TYPE(obj)->tp_name);
140 return -1;
141 }
142
143 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
144 return -1;
145 return view->len;
146}
147
148/* Direct API functions */
149
150PyObject *
151PyBytes_FromObject(PyObject *input)
152{
153 return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
154 input, NULL);
155}
156
157PyObject *
158PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
159{
160 PyBytesObject *new;
161 Py_ssize_t alloc;
162
163 assert(size >= 0);
Gregory P. Smithc00eb732008-04-09 23:16:37 +0000164 if (size < 0) {
165 PyErr_SetString(PyExc_SystemError,
166 "Negative size passed to PyBytes_FromStringAndSize");
167 return NULL;
168 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000169
170 new = PyObject_New(PyBytesObject, &PyBytes_Type);
171 if (new == NULL)
172 return NULL;
173
174 if (size == 0) {
175 new->ob_bytes = NULL;
176 alloc = 0;
177 }
178 else {
179 alloc = size + 1;
180 new->ob_bytes = PyMem_Malloc(alloc);
181 if (new->ob_bytes == NULL) {
182 Py_DECREF(new);
183 return PyErr_NoMemory();
184 }
185 if (bytes != NULL)
186 memcpy(new->ob_bytes, bytes, size);
187 new->ob_bytes[size] = '\0'; /* Trailing null byte */
188 }
189 Py_SIZE(new) = size;
190 new->ob_alloc = alloc;
191 new->ob_exports = 0;
192
193 return (PyObject *)new;
194}
195
196Py_ssize_t
197PyBytes_Size(PyObject *self)
198{
199 assert(self != NULL);
200 assert(PyBytes_Check(self));
201
202 return PyBytes_GET_SIZE(self);
203}
204
205char *
206PyBytes_AsString(PyObject *self)
207{
208 assert(self != NULL);
209 assert(PyBytes_Check(self));
210
211 return PyBytes_AS_STRING(self);
212}
213
214int
215PyBytes_Resize(PyObject *self, Py_ssize_t size)
216{
217 void *sval;
218 Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
219
220 assert(self != NULL);
221 assert(PyBytes_Check(self));
222 assert(size >= 0);
223
224 if (size < alloc / 2) {
225 /* Major downsize; resize down to exact size */
226 alloc = size + 1;
227 }
228 else if (size < alloc) {
229 /* Within allocated size; quick exit */
230 Py_SIZE(self) = size;
231 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
232 return 0;
233 }
234 else if (size <= alloc * 1.125) {
235 /* Moderate upsize; overallocate similar to list_resize() */
236 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
237 }
238 else {
239 /* Major upsize; resize up to exact size */
240 alloc = size + 1;
241 }
242
243 if (((PyBytesObject *)self)->ob_exports > 0) {
244 /*
245 fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports,
246 ((PyBytesObject *)self)->ob_bytes);
247 */
248 PyErr_SetString(PyExc_BufferError,
249 "Existing exports of data: object cannot be re-sized");
250 return -1;
251 }
252
253 sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
254 if (sval == NULL) {
255 PyErr_NoMemory();
256 return -1;
257 }
258
259 ((PyBytesObject *)self)->ob_bytes = sval;
260 Py_SIZE(self) = size;
261 ((PyBytesObject *)self)->ob_alloc = alloc;
262 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
263
264 return 0;
265}
266
267PyObject *
268PyBytes_Concat(PyObject *a, PyObject *b)
269{
270 Py_ssize_t size;
271 Py_buffer va, vb;
272 PyBytesObject *result = NULL;
273
274 va.len = -1;
275 vb.len = -1;
276 if (_getbuffer(a, &va) < 0 ||
277 _getbuffer(b, &vb) < 0) {
278 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
279 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
280 goto done;
281 }
282
283 size = va.len + vb.len;
284 if (size < 0) {
285 return PyErr_NoMemory();
286 goto done;
287 }
288
289 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
290 if (result != NULL) {
291 memcpy(result->ob_bytes, va.buf, va.len);
292 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
293 }
294
295 done:
296 if (va.len != -1)
297 PyObject_ReleaseBuffer(a, &va);
298 if (vb.len != -1)
299 PyObject_ReleaseBuffer(b, &vb);
300 return (PyObject *)result;
301}
302
303/* Functions stuffed into the type object */
304
305static Py_ssize_t
306bytes_length(PyBytesObject *self)
307{
308 return Py_SIZE(self);
309}
310
311static PyObject *
312bytes_iconcat(PyBytesObject *self, PyObject *other)
313{
314 Py_ssize_t mysize;
315 Py_ssize_t size;
316 Py_buffer vo;
317
318 if (_getbuffer(other, &vo) < 0) {
319 PyErr_Format(PyExc_TypeError, "can't concat bytes to %.100s",
320 Py_TYPE(self)->tp_name);
321 return NULL;
322 }
323
324 mysize = Py_SIZE(self);
325 size = mysize + vo.len;
326 if (size < 0) {
327 PyObject_ReleaseBuffer(other, &vo);
328 return PyErr_NoMemory();
329 }
330 if (size < self->ob_alloc) {
331 Py_SIZE(self) = size;
332 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
333 }
334 else if (PyBytes_Resize((PyObject *)self, size) < 0) {
335 PyObject_ReleaseBuffer(other, &vo);
336 return NULL;
337 }
338 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
339 PyObject_ReleaseBuffer(other, &vo);
340 Py_INCREF(self);
341 return (PyObject *)self;
342}
343
344static PyObject *
345bytes_repeat(PyBytesObject *self, Py_ssize_t count)
346{
347 PyBytesObject *result;
348 Py_ssize_t mysize;
349 Py_ssize_t size;
350
351 if (count < 0)
352 count = 0;
353 mysize = Py_SIZE(self);
354 size = mysize * count;
355 if (count != 0 && size / count != mysize)
356 return PyErr_NoMemory();
357 result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
358 if (result != NULL && size != 0) {
359 if (mysize == 1)
360 memset(result->ob_bytes, self->ob_bytes[0], size);
361 else {
362 Py_ssize_t i;
363 for (i = 0; i < count; i++)
364 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
365 }
366 }
367 return (PyObject *)result;
368}
369
370static PyObject *
371bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
372{
373 Py_ssize_t mysize;
374 Py_ssize_t size;
375
376 if (count < 0)
377 count = 0;
378 mysize = Py_SIZE(self);
379 size = mysize * count;
380 if (count != 0 && size / count != mysize)
381 return PyErr_NoMemory();
382 if (size < self->ob_alloc) {
383 Py_SIZE(self) = size;
384 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
385 }
386 else if (PyBytes_Resize((PyObject *)self, size) < 0)
387 return NULL;
388
389 if (mysize == 1)
390 memset(self->ob_bytes, self->ob_bytes[0], size);
391 else {
392 Py_ssize_t i;
393 for (i = 1; i < count; i++)
394 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
395 }
396
397 Py_INCREF(self);
398 return (PyObject *)self;
399}
400
401static PyObject *
402bytes_getitem(PyBytesObject *self, Py_ssize_t i)
403{
404 if (i < 0)
405 i += Py_SIZE(self);
406 if (i < 0 || i >= Py_SIZE(self)) {
407 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
408 return NULL;
409 }
410 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
411}
412
413static PyObject *
414bytes_subscript(PyBytesObject *self, PyObject *item)
415{
416 if (PyIndex_Check(item)) {
417 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
418
419 if (i == -1 && PyErr_Occurred())
420 return NULL;
421
422 if (i < 0)
423 i += PyBytes_GET_SIZE(self);
424
425 if (i < 0 || i >= Py_SIZE(self)) {
426 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
427 return NULL;
428 }
429 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
430 }
431 else if (PySlice_Check(item)) {
432 Py_ssize_t start, stop, step, slicelength, cur, i;
433 if (PySlice_GetIndicesEx((PySliceObject *)item,
434 PyBytes_GET_SIZE(self),
435 &start, &stop, &step, &slicelength) < 0) {
436 return NULL;
437 }
438
439 if (slicelength <= 0)
440 return PyBytes_FromStringAndSize("", 0);
441 else if (step == 1) {
442 return PyBytes_FromStringAndSize(self->ob_bytes + start,
443 slicelength);
444 }
445 else {
446 char *source_buf = PyBytes_AS_STRING(self);
447 char *result_buf = (char *)PyMem_Malloc(slicelength);
448 PyObject *result;
449
450 if (result_buf == NULL)
451 return PyErr_NoMemory();
452
453 for (cur = start, i = 0; i < slicelength;
454 cur += step, i++) {
455 result_buf[i] = source_buf[cur];
456 }
457 result = PyBytes_FromStringAndSize(result_buf, slicelength);
458 PyMem_Free(result_buf);
459 return result;
460 }
461 }
462 else {
463 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integers");
464 return NULL;
465 }
466}
467
468static int
469bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
470 PyObject *values)
471{
472 Py_ssize_t avail, needed;
473 void *bytes;
474 Py_buffer vbytes;
475 int res = 0;
476
477 vbytes.len = -1;
478 if (values == (PyObject *)self) {
479 /* Make a copy and call this function recursively */
480 int err;
481 values = PyBytes_FromObject(values);
482 if (values == NULL)
483 return -1;
484 err = bytes_setslice(self, lo, hi, values);
485 Py_DECREF(values);
486 return err;
487 }
488 if (values == NULL) {
489 /* del b[lo:hi] */
490 bytes = NULL;
491 needed = 0;
492 }
493 else {
494 if (_getbuffer(values, &vbytes) < 0) {
495 PyErr_Format(PyExc_TypeError,
496 "can't set bytes slice from %.100s",
497 Py_TYPE(values)->tp_name);
498 return -1;
499 }
500 needed = vbytes.len;
501 bytes = vbytes.buf;
502 }
503
504 if (lo < 0)
505 lo = 0;
506 if (hi < lo)
507 hi = lo;
508 if (hi > Py_SIZE(self))
509 hi = Py_SIZE(self);
510
511 avail = hi - lo;
512 if (avail < 0)
513 lo = hi = avail = 0;
514
515 if (avail != needed) {
516 if (avail > needed) {
517 /*
518 0 lo hi old_size
519 | |<----avail----->|<-----tomove------>|
520 | |<-needed->|<-----tomove------>|
521 0 lo new_hi new_size
522 */
523 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
524 Py_SIZE(self) - hi);
525 }
526 /* XXX(nnorwitz): need to verify this can't overflow! */
527 if (PyBytes_Resize((PyObject *)self,
528 Py_SIZE(self) + needed - avail) < 0) {
529 res = -1;
530 goto finish;
531 }
532 if (avail < needed) {
533 /*
534 0 lo hi old_size
535 | |<-avail->|<-----tomove------>|
536 | |<----needed---->|<-----tomove------>|
537 0 lo new_hi new_size
538 */
539 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
540 Py_SIZE(self) - lo - needed);
541 }
542 }
543
544 if (needed > 0)
545 memcpy(self->ob_bytes + lo, bytes, needed);
546
547
548 finish:
549 if (vbytes.len != -1)
550 PyObject_ReleaseBuffer(values, &vbytes);
551 return res;
552}
553
554static int
555bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
556{
Neal Norwitz0bcd6132008-03-27 03:49:54 +0000557 int ival;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000558
559 if (i < 0)
560 i += Py_SIZE(self);
561
562 if (i < 0 || i >= Py_SIZE(self)) {
563 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
564 return -1;
565 }
566
567 if (value == NULL)
568 return bytes_setslice(self, i, i+1, NULL);
569
570 if (!_getbytevalue(value, &ival))
571 return -1;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000572
573 self->ob_bytes[i] = ival;
574 return 0;
575}
576
577static int
578bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
579{
580 Py_ssize_t start, stop, step, slicelen, needed;
581 char *bytes;
582
583 if (PyIndex_Check(item)) {
584 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
585
586 if (i == -1 && PyErr_Occurred())
587 return -1;
588
589 if (i < 0)
590 i += PyBytes_GET_SIZE(self);
591
592 if (i < 0 || i >= Py_SIZE(self)) {
593 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
594 return -1;
595 }
596
597 if (values == NULL) {
598 /* Fall through to slice assignment */
599 start = i;
600 stop = i + 1;
601 step = 1;
602 slicelen = 1;
603 }
604 else {
605 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
606 if (ival == -1 && PyErr_Occurred()) {
Neal Norwitz0bcd6132008-03-27 03:49:54 +0000607 int int_value;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000608 /* Also accept str of size 1 in 2.x */
609 PyErr_Clear();
Neal Norwitz0bcd6132008-03-27 03:49:54 +0000610 if (!_getbytevalue(values, &int_value))
Christian Heimes1a6387e2008-03-26 12:49:49 +0000611 return -1;
Neal Norwitz0bcd6132008-03-27 03:49:54 +0000612 ival = (int) int_value;
613 } else if (ival < 0 || ival >= 256) {
Christian Heimes1a6387e2008-03-26 12:49:49 +0000614 PyErr_SetString(PyExc_ValueError,
615 "byte must be in range(0, 256)");
616 return -1;
617 }
618 self->ob_bytes[i] = (char)ival;
619 return 0;
620 }
621 }
622 else if (PySlice_Check(item)) {
623 if (PySlice_GetIndicesEx((PySliceObject *)item,
624 PyBytes_GET_SIZE(self),
625 &start, &stop, &step, &slicelen) < 0) {
626 return -1;
627 }
628 }
629 else {
630 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integer");
631 return -1;
632 }
633
634 if (values == NULL) {
635 bytes = NULL;
636 needed = 0;
637 }
638 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
639 /* Make a copy an call this function recursively */
640 int err;
641 values = PyBytes_FromObject(values);
642 if (values == NULL)
643 return -1;
644 err = bytes_ass_subscript(self, item, values);
645 Py_DECREF(values);
646 return err;
647 }
648 else {
649 assert(PyBytes_Check(values));
650 bytes = ((PyBytesObject *)values)->ob_bytes;
651 needed = Py_SIZE(values);
652 }
653 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
654 if ((step < 0 && start < stop) ||
655 (step > 0 && start > stop))
656 stop = start;
657 if (step == 1) {
658 if (slicelen != needed) {
659 if (slicelen > needed) {
660 /*
661 0 start stop old_size
662 | |<---slicelen--->|<-----tomove------>|
663 | |<-needed->|<-----tomove------>|
664 0 lo new_hi new_size
665 */
666 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
667 Py_SIZE(self) - stop);
668 }
669 if (PyBytes_Resize((PyObject *)self,
670 Py_SIZE(self) + needed - slicelen) < 0)
671 return -1;
672 if (slicelen < needed) {
673 /*
674 0 lo hi old_size
675 | |<-avail->|<-----tomove------>|
676 | |<----needed---->|<-----tomove------>|
677 0 lo new_hi new_size
678 */
679 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
680 Py_SIZE(self) - start - needed);
681 }
682 }
683
684 if (needed > 0)
685 memcpy(self->ob_bytes + start, bytes, needed);
686
687 return 0;
688 }
689 else {
690 if (needed == 0) {
691 /* Delete slice */
692 Py_ssize_t cur, i;
693
694 if (step < 0) {
695 stop = start + 1;
696 start = stop + step * (slicelen - 1) - 1;
697 step = -step;
698 }
699 for (cur = start, i = 0;
700 i < slicelen; cur += step, i++) {
701 Py_ssize_t lim = step - 1;
702
703 if (cur + step >= PyBytes_GET_SIZE(self))
704 lim = PyBytes_GET_SIZE(self) - cur - 1;
705
706 memmove(self->ob_bytes + cur - i,
707 self->ob_bytes + cur + 1, lim);
708 }
709 /* Move the tail of the bytes, in one chunk */
710 cur = start + slicelen*step;
711 if (cur < PyBytes_GET_SIZE(self)) {
712 memmove(self->ob_bytes + cur - slicelen,
713 self->ob_bytes + cur,
714 PyBytes_GET_SIZE(self) - cur);
715 }
716 if (PyBytes_Resize((PyObject *)self,
717 PyBytes_GET_SIZE(self) - slicelen) < 0)
718 return -1;
719
720 return 0;
721 }
722 else {
723 /* Assign slice */
724 Py_ssize_t cur, i;
725
726 if (needed != slicelen) {
727 PyErr_Format(PyExc_ValueError,
728 "attempt to assign bytes of size %zd "
729 "to extended slice of size %zd",
730 needed, slicelen);
731 return -1;
732 }
733 for (cur = start, i = 0; i < slicelen; cur += step, i++)
734 self->ob_bytes[cur] = bytes[i];
735 return 0;
736 }
737 }
738}
739
740static int
741bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
742{
743 static char *kwlist[] = {"source", "encoding", "errors", 0};
744 PyObject *arg = NULL;
745 const char *encoding = NULL;
746 const char *errors = NULL;
747 Py_ssize_t count;
748 PyObject *it;
749 PyObject *(*iternext)(PyObject *);
750
751 if (Py_SIZE(self) != 0) {
752 /* Empty previous contents (yes, do this first of all!) */
753 if (PyBytes_Resize((PyObject *)self, 0) < 0)
754 return -1;
755 }
756
757 /* Parse arguments */
758 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
759 &arg, &encoding, &errors))
760 return -1;
761
762 /* Make a quick exit if no first argument */
763 if (arg == NULL) {
764 if (encoding != NULL || errors != NULL) {
765 PyErr_SetString(PyExc_TypeError,
766 "encoding or errors without sequence argument");
767 return -1;
768 }
769 return 0;
770 }
771
772 if (PyString_Check(arg)) {
773 PyObject *new, *encoded;
774 if (encoding != NULL) {
775 encoded = PyCodec_Encode(arg, encoding, errors);
776 if (encoded == NULL)
777 return -1;
778 assert(PyString_Check(encoded));
779 }
780 else {
781 encoded = arg;
782 Py_INCREF(arg);
783 }
784 new = bytes_iconcat(self, arg);
785 Py_DECREF(encoded);
786 if (new == NULL)
787 return -1;
788 Py_DECREF(new);
789 return 0;
790 }
791
792 if (PyUnicode_Check(arg)) {
793 /* Encode via the codec registry */
794 PyObject *encoded, *new;
795 if (encoding == NULL) {
796 PyErr_SetString(PyExc_TypeError,
797 "unicode argument without an encoding");
798 return -1;
799 }
800 encoded = PyCodec_Encode(arg, encoding, errors);
801 if (encoded == NULL)
802 return -1;
803 assert(PyString_Check(encoded));
804 new = bytes_iconcat(self, encoded);
805 Py_DECREF(encoded);
806 if (new == NULL)
807 return -1;
808 Py_DECREF(new);
809 return 0;
810 }
811
812 /* If it's not unicode, there can't be encoding or errors */
813 if (encoding != NULL || errors != NULL) {
814 PyErr_SetString(PyExc_TypeError,
815 "encoding or errors without a string argument");
816 return -1;
817 }
818
819 /* Is it an int? */
820 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
821 if (count == -1 && PyErr_Occurred())
822 PyErr_Clear();
823 else {
824 if (count < 0) {
825 PyErr_SetString(PyExc_ValueError, "negative count");
826 return -1;
827 }
828 if (count > 0) {
829 if (PyBytes_Resize((PyObject *)self, count))
830 return -1;
831 memset(self->ob_bytes, 0, count);
832 }
833 return 0;
834 }
835
836 /* Use the buffer API */
837 if (PyObject_CheckBuffer(arg)) {
838 Py_ssize_t size;
839 Py_buffer view;
840 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
841 return -1;
842 size = view.len;
843 if (PyBytes_Resize((PyObject *)self, size) < 0) goto fail;
844 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
845 goto fail;
846 PyObject_ReleaseBuffer(arg, &view);
847 return 0;
848 fail:
849 PyObject_ReleaseBuffer(arg, &view);
850 return -1;
851 }
852
853 /* XXX Optimize this if the arguments is a list, tuple */
854
855 /* Get the iterator */
856 it = PyObject_GetIter(arg);
857 if (it == NULL)
858 return -1;
859 iternext = *Py_TYPE(it)->tp_iternext;
860
861 /* Run the iterator to exhaustion */
862 for (;;) {
863 PyObject *item;
864 Py_ssize_t value;
865
866 /* Get the next item */
867 item = iternext(it);
868 if (item == NULL) {
869 if (PyErr_Occurred()) {
870 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
871 goto error;
872 PyErr_Clear();
873 }
874 break;
875 }
876
877 /* Interpret it as an int (__index__) */
878 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
879 Py_DECREF(item);
880 if (value == -1 && PyErr_Occurred())
881 goto error;
882
883 /* Range check */
884 if (value < 0 || value >= 256) {
885 PyErr_SetString(PyExc_ValueError,
886 "bytes must be in range(0, 256)");
887 goto error;
888 }
889
890 /* Append the byte */
891 if (Py_SIZE(self) < self->ob_alloc)
892 Py_SIZE(self)++;
893 else if (PyBytes_Resize((PyObject *)self, Py_SIZE(self)+1) < 0)
894 goto error;
895 self->ob_bytes[Py_SIZE(self)-1] = value;
896 }
897
898 /* Clean up and return success */
899 Py_DECREF(it);
900 return 0;
901
902 error:
903 /* Error handling when it != NULL */
904 Py_DECREF(it);
905 return -1;
906}
907
908/* Mostly copied from string_repr, but without the
909 "smart quote" functionality. */
910static PyObject *
911bytes_repr(PyBytesObject *self)
912{
913 static const char *hexdigits = "0123456789abcdef";
914 const char *quote_prefix = "bytearray(b";
915 const char *quote_postfix = ")";
916 Py_ssize_t length = Py_SIZE(self);
917 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
918 size_t newsize = 14 + 4 * length;
919 PyObject *v;
920 if (newsize > PY_SSIZE_T_MAX || newsize / 4 - 3 != length) {
921 PyErr_SetString(PyExc_OverflowError,
922 "bytearray object is too large to make repr");
923 return NULL;
924 }
925 v = PyUnicode_FromUnicode(NULL, newsize);
926 if (v == NULL) {
927 return NULL;
928 }
929 else {
930 register Py_ssize_t i;
931 register Py_UNICODE c;
932 register Py_UNICODE *p;
933 int quote;
934
935 /* Figure out which quote to use; single is preferred */
936 quote = '\'';
937 {
938 char *test, *start;
939 start = PyBytes_AS_STRING(self);
940 for (test = start; test < start+length; ++test) {
941 if (*test == '"') {
942 quote = '\''; /* back to single */
943 goto decided;
944 }
945 else if (*test == '\'')
946 quote = '"';
947 }
948 decided:
949 ;
950 }
951
952 p = PyUnicode_AS_UNICODE(v);
953 while (*quote_prefix)
954 *p++ = *quote_prefix++;
955 *p++ = quote;
956
957 for (i = 0; i < length; i++) {
958 /* There's at least enough room for a hex escape
959 and a closing quote. */
960 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
961 c = self->ob_bytes[i];
962 if (c == '\'' || c == '\\')
963 *p++ = '\\', *p++ = c;
964 else if (c == '\t')
965 *p++ = '\\', *p++ = 't';
966 else if (c == '\n')
967 *p++ = '\\', *p++ = 'n';
968 else if (c == '\r')
969 *p++ = '\\', *p++ = 'r';
970 else if (c == 0)
971 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
972 else if (c < ' ' || c >= 0x7f) {
973 *p++ = '\\';
974 *p++ = 'x';
975 *p++ = hexdigits[(c & 0xf0) >> 4];
976 *p++ = hexdigits[c & 0xf];
977 }
978 else
979 *p++ = c;
980 }
981 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
982 *p++ = quote;
983 while (*quote_postfix) {
984 *p++ = *quote_postfix++;
985 }
986 *p = '\0';
987 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
988 Py_DECREF(v);
989 return NULL;
990 }
991 return v;
992 }
993}
994
995static PyObject *
996bytes_str(PyObject *op)
997{
998#if 0
999 if (Py_BytesWarningFlag) {
1000 if (PyErr_WarnEx(PyExc_BytesWarning,
1001 "str() on a bytearray instance", 1))
1002 return NULL;
1003 }
1004 return bytes_repr((PyBytesObject*)op);
1005#endif
1006 return PyString_FromStringAndSize(((PyBytesObject*)op)->ob_bytes, Py_SIZE(op));
1007}
1008
1009static PyObject *
1010bytes_richcompare(PyObject *self, PyObject *other, int op)
1011{
1012 Py_ssize_t self_size, other_size;
1013 Py_buffer self_bytes, other_bytes;
1014 PyObject *res;
1015 Py_ssize_t minsize;
1016 int cmp;
1017
1018 /* Bytes can be compared to anything that supports the (binary)
1019 buffer API. Except that a comparison with Unicode is always an
1020 error, even if the comparison is for equality. */
1021 if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
1022 PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
1023 if (Py_BytesWarningFlag && op == Py_EQ) {
1024 if (PyErr_WarnEx(PyExc_BytesWarning,
1025 "Comparsion between bytearray and string", 1))
1026 return NULL;
1027 }
1028
1029 Py_INCREF(Py_NotImplemented);
1030 return Py_NotImplemented;
1031 }
1032
1033 self_size = _getbuffer(self, &self_bytes);
1034 if (self_size < 0) {
1035 PyErr_Clear();
1036 Py_INCREF(Py_NotImplemented);
1037 return Py_NotImplemented;
1038 }
1039
1040 other_size = _getbuffer(other, &other_bytes);
1041 if (other_size < 0) {
1042 PyErr_Clear();
1043 PyObject_ReleaseBuffer(self, &self_bytes);
1044 Py_INCREF(Py_NotImplemented);
1045 return Py_NotImplemented;
1046 }
1047
1048 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
1049 /* Shortcut: if the lengths differ, the objects differ */
1050 cmp = (op == Py_NE);
1051 }
1052 else {
1053 minsize = self_size;
1054 if (other_size < minsize)
1055 minsize = other_size;
1056
1057 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
1058 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1059
1060 if (cmp == 0) {
1061 if (self_size < other_size)
1062 cmp = -1;
1063 else if (self_size > other_size)
1064 cmp = 1;
1065 }
1066
1067 switch (op) {
1068 case Py_LT: cmp = cmp < 0; break;
1069 case Py_LE: cmp = cmp <= 0; break;
1070 case Py_EQ: cmp = cmp == 0; break;
1071 case Py_NE: cmp = cmp != 0; break;
1072 case Py_GT: cmp = cmp > 0; break;
1073 case Py_GE: cmp = cmp >= 0; break;
1074 }
1075 }
1076
1077 res = cmp ? Py_True : Py_False;
1078 PyObject_ReleaseBuffer(self, &self_bytes);
1079 PyObject_ReleaseBuffer(other, &other_bytes);
1080 Py_INCREF(res);
1081 return res;
1082}
1083
1084static void
1085bytes_dealloc(PyBytesObject *self)
1086{
1087 if (self->ob_bytes != 0) {
1088 PyMem_Free(self->ob_bytes);
1089 }
1090 Py_TYPE(self)->tp_free((PyObject *)self);
1091}
1092
1093
1094/* -------------------------------------------------------------------- */
1095/* Methods */
1096
1097#define STRINGLIB_CHAR char
1098#define STRINGLIB_CMP memcmp
1099#define STRINGLIB_LEN PyBytes_GET_SIZE
1100#define STRINGLIB_STR PyBytes_AS_STRING
1101#define STRINGLIB_NEW PyBytes_FromStringAndSize
1102#define STRINGLIB_EMPTY nullbytes
1103#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
1104#define STRINGLIB_MUTABLE 1
1105
1106#include "stringlib/fastsearch.h"
1107#include "stringlib/count.h"
1108#include "stringlib/find.h"
1109#include "stringlib/partition.h"
1110#include "stringlib/ctype.h"
1111#include "stringlib/transmogrify.h"
1112
1113
1114/* The following Py_LOCAL_INLINE and Py_LOCAL functions
1115were copied from the old char* style string object. */
1116
1117Py_LOCAL_INLINE(void)
1118_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1119{
1120 if (*end > len)
1121 *end = len;
1122 else if (*end < 0)
1123 *end += len;
1124 if (*end < 0)
1125 *end = 0;
1126 if (*start < 0)
1127 *start += len;
1128 if (*start < 0)
1129 *start = 0;
1130}
1131
1132
1133Py_LOCAL_INLINE(Py_ssize_t)
1134bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
1135{
1136 PyObject *subobj;
1137 Py_buffer subbuf;
1138 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1139 Py_ssize_t res;
1140
1141 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1142 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1143 return -2;
1144 if (_getbuffer(subobj, &subbuf) < 0)
1145 return -2;
1146 if (dir > 0)
1147 res = stringlib_find_slice(
1148 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1149 subbuf.buf, subbuf.len, start, end);
1150 else
1151 res = stringlib_rfind_slice(
1152 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1153 subbuf.buf, subbuf.len, start, end);
1154 PyObject_ReleaseBuffer(subobj, &subbuf);
1155 return res;
1156}
1157
1158PyDoc_STRVAR(find__doc__,
1159"B.find(sub [,start [,end]]) -> int\n\
1160\n\
1161Return the lowest index in B where subsection sub is found,\n\
1162such that sub is contained within s[start,end]. Optional\n\
1163arguments start and end are interpreted as in slice notation.\n\
1164\n\
1165Return -1 on failure.");
1166
1167static PyObject *
1168bytes_find(PyBytesObject *self, PyObject *args)
1169{
1170 Py_ssize_t result = bytes_find_internal(self, args, +1);
1171 if (result == -2)
1172 return NULL;
1173 return PyInt_FromSsize_t(result);
1174}
1175
1176PyDoc_STRVAR(count__doc__,
1177"B.count(sub [,start [,end]]) -> int\n\
1178\n\
1179Return the number of non-overlapping occurrences of subsection sub in\n\
1180bytes B[start:end]. Optional arguments start and end are interpreted\n\
1181as in slice notation.");
1182
1183static PyObject *
1184bytes_count(PyBytesObject *self, PyObject *args)
1185{
1186 PyObject *sub_obj;
1187 const char *str = PyBytes_AS_STRING(self);
1188 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1189 Py_buffer vsub;
1190 PyObject *count_obj;
1191
1192 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1193 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1194 return NULL;
1195
1196 if (_getbuffer(sub_obj, &vsub) < 0)
1197 return NULL;
1198
1199 _adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
1200
1201 count_obj = PyInt_FromSsize_t(
1202 stringlib_count(str + start, end - start, vsub.buf, vsub.len)
1203 );
1204 PyObject_ReleaseBuffer(sub_obj, &vsub);
1205 return count_obj;
1206}
1207
1208
1209PyDoc_STRVAR(index__doc__,
1210"B.index(sub [,start [,end]]) -> int\n\
1211\n\
1212Like B.find() but raise ValueError when the subsection is not found.");
1213
1214static PyObject *
1215bytes_index(PyBytesObject *self, PyObject *args)
1216{
1217 Py_ssize_t result = bytes_find_internal(self, args, +1);
1218 if (result == -2)
1219 return NULL;
1220 if (result == -1) {
1221 PyErr_SetString(PyExc_ValueError,
1222 "subsection not found");
1223 return NULL;
1224 }
1225 return PyInt_FromSsize_t(result);
1226}
1227
1228
1229PyDoc_STRVAR(rfind__doc__,
1230"B.rfind(sub [,start [,end]]) -> int\n\
1231\n\
1232Return the highest index in B where subsection sub is found,\n\
1233such that sub is contained within s[start,end]. Optional\n\
1234arguments start and end are interpreted as in slice notation.\n\
1235\n\
1236Return -1 on failure.");
1237
1238static PyObject *
1239bytes_rfind(PyBytesObject *self, PyObject *args)
1240{
1241 Py_ssize_t result = bytes_find_internal(self, args, -1);
1242 if (result == -2)
1243 return NULL;
1244 return PyInt_FromSsize_t(result);
1245}
1246
1247
1248PyDoc_STRVAR(rindex__doc__,
1249"B.rindex(sub [,start [,end]]) -> int\n\
1250\n\
1251Like B.rfind() but raise ValueError when the subsection is not found.");
1252
1253static PyObject *
1254bytes_rindex(PyBytesObject *self, PyObject *args)
1255{
1256 Py_ssize_t result = bytes_find_internal(self, args, -1);
1257 if (result == -2)
1258 return NULL;
1259 if (result == -1) {
1260 PyErr_SetString(PyExc_ValueError,
1261 "subsection not found");
1262 return NULL;
1263 }
1264 return PyInt_FromSsize_t(result);
1265}
1266
1267
1268static int
1269bytes_contains(PyObject *self, PyObject *arg)
1270{
1271 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1272 if (ival == -1 && PyErr_Occurred()) {
1273 Py_buffer varg;
1274 int pos;
1275 PyErr_Clear();
1276 if (_getbuffer(arg, &varg) < 0)
1277 return -1;
1278 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
1279 varg.buf, varg.len, 0);
1280 PyObject_ReleaseBuffer(arg, &varg);
1281 return pos >= 0;
1282 }
1283 if (ival < 0 || ival >= 256) {
1284 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1285 return -1;
1286 }
1287
1288 return memchr(PyBytes_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
1289}
1290
1291
1292/* Matches the end (direction >= 0) or start (direction < 0) of self
1293 * against substr, using the start and end arguments. Returns
1294 * -1 on error, 0 if not found and 1 if found.
1295 */
1296Py_LOCAL(int)
1297_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
1298 Py_ssize_t end, int direction)
1299{
1300 Py_ssize_t len = PyBytes_GET_SIZE(self);
1301 const char* str;
1302 Py_buffer vsubstr;
1303 int rv = 0;
1304
1305 str = PyBytes_AS_STRING(self);
1306
1307 if (_getbuffer(substr, &vsubstr) < 0)
1308 return -1;
1309
1310 _adjust_indices(&start, &end, len);
1311
1312 if (direction < 0) {
1313 /* startswith */
1314 if (start+vsubstr.len > len) {
1315 goto done;
1316 }
1317 } else {
1318 /* endswith */
1319 if (end-start < vsubstr.len || start > len) {
1320 goto done;
1321 }
1322
1323 if (end-vsubstr.len > start)
1324 start = end - vsubstr.len;
1325 }
1326 if (end-start >= vsubstr.len)
1327 rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1328
1329done:
1330 PyObject_ReleaseBuffer(substr, &vsubstr);
1331 return rv;
1332}
1333
1334
1335PyDoc_STRVAR(startswith__doc__,
1336"B.startswith(prefix [,start [,end]]) -> bool\n\
1337\n\
1338Return True if B starts with the specified prefix, False otherwise.\n\
1339With optional start, test B beginning at that position.\n\
1340With optional end, stop comparing B at that position.\n\
1341prefix can also be a tuple of strings to try.");
1342
1343static PyObject *
1344bytes_startswith(PyBytesObject *self, PyObject *args)
1345{
1346 Py_ssize_t start = 0;
1347 Py_ssize_t end = PY_SSIZE_T_MAX;
1348 PyObject *subobj;
1349 int result;
1350
1351 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1352 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1353 return NULL;
1354 if (PyTuple_Check(subobj)) {
1355 Py_ssize_t i;
1356 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1357 result = _bytes_tailmatch(self,
1358 PyTuple_GET_ITEM(subobj, i),
1359 start, end, -1);
1360 if (result == -1)
1361 return NULL;
1362 else if (result) {
1363 Py_RETURN_TRUE;
1364 }
1365 }
1366 Py_RETURN_FALSE;
1367 }
1368 result = _bytes_tailmatch(self, subobj, start, end, -1);
1369 if (result == -1)
1370 return NULL;
1371 else
1372 return PyBool_FromLong(result);
1373}
1374
1375PyDoc_STRVAR(endswith__doc__,
1376"B.endswith(suffix [,start [,end]]) -> bool\n\
1377\n\
1378Return True if B ends with the specified suffix, False otherwise.\n\
1379With optional start, test B beginning at that position.\n\
1380With optional end, stop comparing B at that position.\n\
1381suffix can also be a tuple of strings to try.");
1382
1383static PyObject *
1384bytes_endswith(PyBytesObject *self, PyObject *args)
1385{
1386 Py_ssize_t start = 0;
1387 Py_ssize_t end = PY_SSIZE_T_MAX;
1388 PyObject *subobj;
1389 int result;
1390
1391 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1392 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1393 return NULL;
1394 if (PyTuple_Check(subobj)) {
1395 Py_ssize_t i;
1396 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1397 result = _bytes_tailmatch(self,
1398 PyTuple_GET_ITEM(subobj, i),
1399 start, end, +1);
1400 if (result == -1)
1401 return NULL;
1402 else if (result) {
1403 Py_RETURN_TRUE;
1404 }
1405 }
1406 Py_RETURN_FALSE;
1407 }
1408 result = _bytes_tailmatch(self, subobj, start, end, +1);
1409 if (result == -1)
1410 return NULL;
1411 else
1412 return PyBool_FromLong(result);
1413}
1414
1415
1416PyDoc_STRVAR(translate__doc__,
1417"B.translate(table[, deletechars]) -> bytearray\n\
1418\n\
1419Return a copy of B, where all characters occurring in the\n\
1420optional argument deletechars are removed, and the remaining\n\
1421characters have been mapped through the given translation\n\
1422table, which must be a bytes object of length 256.");
1423
1424static PyObject *
1425bytes_translate(PyBytesObject *self, PyObject *args)
1426{
1427 register char *input, *output;
1428 register const char *table;
1429 register Py_ssize_t i, c, changed = 0;
1430 PyObject *input_obj = (PyObject*)self;
1431 const char *output_start;
1432 Py_ssize_t inlen;
1433 PyObject *result;
1434 int trans_table[256];
1435 PyObject *tableobj, *delobj = NULL;
1436 Py_buffer vtable, vdel;
1437
1438 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1439 &tableobj, &delobj))
1440 return NULL;
1441
1442 if (_getbuffer(tableobj, &vtable) < 0)
1443 return NULL;
1444
1445 if (vtable.len != 256) {
1446 PyErr_SetString(PyExc_ValueError,
1447 "translation table must be 256 characters long");
1448 result = NULL;
1449 goto done;
1450 }
1451
1452 if (delobj != NULL) {
1453 if (_getbuffer(delobj, &vdel) < 0) {
1454 result = NULL;
1455 goto done;
1456 }
1457 }
1458 else {
1459 vdel.buf = NULL;
1460 vdel.len = 0;
1461 }
1462
1463 table = (const char *)vtable.buf;
1464 inlen = PyBytes_GET_SIZE(input_obj);
1465 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1466 if (result == NULL)
1467 goto done;
1468 output_start = output = PyBytes_AsString(result);
1469 input = PyBytes_AS_STRING(input_obj);
1470
1471 if (vdel.len == 0) {
1472 /* If no deletions are required, use faster code */
1473 for (i = inlen; --i >= 0; ) {
1474 c = Py_CHARMASK(*input++);
1475 if (Py_CHARMASK((*output++ = table[c])) != c)
1476 changed = 1;
1477 }
1478 if (changed || !PyBytes_CheckExact(input_obj))
1479 goto done;
1480 Py_DECREF(result);
1481 Py_INCREF(input_obj);
1482 result = input_obj;
1483 goto done;
1484 }
1485
1486 for (i = 0; i < 256; i++)
1487 trans_table[i] = Py_CHARMASK(table[i]);
1488
1489 for (i = 0; i < vdel.len; i++)
1490 trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
1491
1492 for (i = inlen; --i >= 0; ) {
1493 c = Py_CHARMASK(*input++);
1494 if (trans_table[c] != -1)
1495 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1496 continue;
1497 changed = 1;
1498 }
1499 if (!changed && PyBytes_CheckExact(input_obj)) {
1500 Py_DECREF(result);
1501 Py_INCREF(input_obj);
1502 result = input_obj;
1503 goto done;
1504 }
1505 /* Fix the size of the resulting string */
1506 if (inlen > 0)
1507 PyBytes_Resize(result, output - output_start);
1508
1509done:
1510 PyObject_ReleaseBuffer(tableobj, &vtable);
1511 if (delobj != NULL)
1512 PyObject_ReleaseBuffer(delobj, &vdel);
1513 return result;
1514}
1515
1516
1517#define FORWARD 1
1518#define REVERSE -1
1519
1520/* find and count characters and substrings */
1521
1522#define findchar(target, target_len, c) \
1523 ((char *)memchr((const void *)(target), c, target_len))
1524
1525/* Don't call if length < 2 */
1526#define Py_STRING_MATCH(target, offset, pattern, length) \
1527 (target[offset] == pattern[0] && \
1528 target[offset+length-1] == pattern[length-1] && \
1529 !memcmp(target+offset+1, pattern+1, length-2) )
1530
1531
1532/* Bytes ops must return a string. */
1533/* If the object is subclass of bytes, create a copy */
1534Py_LOCAL(PyBytesObject *)
1535return_self(PyBytesObject *self)
1536{
1537 if (PyBytes_CheckExact(self)) {
1538 Py_INCREF(self);
1539 return (PyBytesObject *)self;
1540 }
1541 return (PyBytesObject *)PyBytes_FromStringAndSize(
1542 PyBytes_AS_STRING(self),
1543 PyBytes_GET_SIZE(self));
1544}
1545
1546Py_LOCAL_INLINE(Py_ssize_t)
1547countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
1548{
1549 Py_ssize_t count=0;
1550 const char *start=target;
1551 const char *end=target+target_len;
1552
1553 while ( (start=findchar(start, end-start, c)) != NULL ) {
1554 count++;
1555 if (count >= maxcount)
1556 break;
1557 start += 1;
1558 }
1559 return count;
1560}
1561
1562Py_LOCAL(Py_ssize_t)
1563findstring(const char *target, Py_ssize_t target_len,
1564 const char *pattern, Py_ssize_t pattern_len,
1565 Py_ssize_t start,
1566 Py_ssize_t end,
1567 int direction)
1568{
1569 if (start < 0) {
1570 start += target_len;
1571 if (start < 0)
1572 start = 0;
1573 }
1574 if (end > target_len) {
1575 end = target_len;
1576 } else if (end < 0) {
1577 end += target_len;
1578 if (end < 0)
1579 end = 0;
1580 }
1581
1582 /* zero-length substrings always match at the first attempt */
1583 if (pattern_len == 0)
1584 return (direction > 0) ? start : end;
1585
1586 end -= pattern_len;
1587
1588 if (direction < 0) {
1589 for (; end >= start; end--)
1590 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1591 return end;
1592 } else {
1593 for (; start <= end; start++)
1594 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1595 return start;
1596 }
1597 return -1;
1598}
1599
1600Py_LOCAL_INLINE(Py_ssize_t)
1601countstring(const char *target, Py_ssize_t target_len,
1602 const char *pattern, Py_ssize_t pattern_len,
1603 Py_ssize_t start,
1604 Py_ssize_t end,
1605 int direction, Py_ssize_t maxcount)
1606{
1607 Py_ssize_t count=0;
1608
1609 if (start < 0) {
1610 start += target_len;
1611 if (start < 0)
1612 start = 0;
1613 }
1614 if (end > target_len) {
1615 end = target_len;
1616 } else if (end < 0) {
1617 end += target_len;
1618 if (end < 0)
1619 end = 0;
1620 }
1621
1622 /* zero-length substrings match everywhere */
1623 if (pattern_len == 0 || maxcount == 0) {
1624 if (target_len+1 < maxcount)
1625 return target_len+1;
1626 return maxcount;
1627 }
1628
1629 end -= pattern_len;
1630 if (direction < 0) {
1631 for (; (end >= start); end--)
1632 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1633 count++;
1634 if (--maxcount <= 0) break;
1635 end -= pattern_len-1;
1636 }
1637 } else {
1638 for (; (start <= end); start++)
1639 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1640 count++;
1641 if (--maxcount <= 0)
1642 break;
1643 start += pattern_len-1;
1644 }
1645 }
1646 return count;
1647}
1648
1649
1650/* Algorithms for different cases of string replacement */
1651
1652/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1653Py_LOCAL(PyBytesObject *)
1654replace_interleave(PyBytesObject *self,
1655 const char *to_s, Py_ssize_t to_len,
1656 Py_ssize_t maxcount)
1657{
1658 char *self_s, *result_s;
1659 Py_ssize_t self_len, result_len;
1660 Py_ssize_t count, i, product;
1661 PyBytesObject *result;
1662
1663 self_len = PyBytes_GET_SIZE(self);
1664
1665 /* 1 at the end plus 1 after every character */
1666 count = self_len+1;
1667 if (maxcount < count)
1668 count = maxcount;
1669
1670 /* Check for overflow */
1671 /* result_len = count * to_len + self_len; */
1672 product = count * to_len;
1673 if (product / to_len != count) {
1674 PyErr_SetString(PyExc_OverflowError,
1675 "replace string is too long");
1676 return NULL;
1677 }
1678 result_len = product + self_len;
1679 if (result_len < 0) {
1680 PyErr_SetString(PyExc_OverflowError,
1681 "replace string is too long");
1682 return NULL;
1683 }
1684
1685 if (! (result = (PyBytesObject *)
1686 PyBytes_FromStringAndSize(NULL, result_len)) )
1687 return NULL;
1688
1689 self_s = PyBytes_AS_STRING(self);
1690 result_s = PyBytes_AS_STRING(result);
1691
1692 /* TODO: special case single character, which doesn't need memcpy */
1693
1694 /* Lay the first one down (guaranteed this will occur) */
1695 Py_MEMCPY(result_s, to_s, to_len);
1696 result_s += to_len;
1697 count -= 1;
1698
1699 for (i=0; i<count; i++) {
1700 *result_s++ = *self_s++;
1701 Py_MEMCPY(result_s, to_s, to_len);
1702 result_s += to_len;
1703 }
1704
1705 /* Copy the rest of the original string */
1706 Py_MEMCPY(result_s, self_s, self_len-i);
1707
1708 return result;
1709}
1710
1711/* Special case for deleting a single character */
1712/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1713Py_LOCAL(PyBytesObject *)
1714replace_delete_single_character(PyBytesObject *self,
1715 char from_c, Py_ssize_t maxcount)
1716{
1717 char *self_s, *result_s;
1718 char *start, *next, *end;
1719 Py_ssize_t self_len, result_len;
1720 Py_ssize_t count;
1721 PyBytesObject *result;
1722
1723 self_len = PyBytes_GET_SIZE(self);
1724 self_s = PyBytes_AS_STRING(self);
1725
1726 count = countchar(self_s, self_len, from_c, maxcount);
1727 if (count == 0) {
1728 return return_self(self);
1729 }
1730
1731 result_len = self_len - count; /* from_len == 1 */
1732 assert(result_len>=0);
1733
1734 if ( (result = (PyBytesObject *)
1735 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1736 return NULL;
1737 result_s = PyBytes_AS_STRING(result);
1738
1739 start = self_s;
1740 end = self_s + self_len;
1741 while (count-- > 0) {
1742 next = findchar(start, end-start, from_c);
1743 if (next == NULL)
1744 break;
1745 Py_MEMCPY(result_s, start, next-start);
1746 result_s += (next-start);
1747 start = next+1;
1748 }
1749 Py_MEMCPY(result_s, start, end-start);
1750
1751 return result;
1752}
1753
1754/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1755
1756Py_LOCAL(PyBytesObject *)
1757replace_delete_substring(PyBytesObject *self,
1758 const char *from_s, Py_ssize_t from_len,
1759 Py_ssize_t maxcount)
1760{
1761 char *self_s, *result_s;
1762 char *start, *next, *end;
1763 Py_ssize_t self_len, result_len;
1764 Py_ssize_t count, offset;
1765 PyBytesObject *result;
1766
1767 self_len = PyBytes_GET_SIZE(self);
1768 self_s = PyBytes_AS_STRING(self);
1769
1770 count = countstring(self_s, self_len,
1771 from_s, from_len,
1772 0, self_len, 1,
1773 maxcount);
1774
1775 if (count == 0) {
1776 /* no matches */
1777 return return_self(self);
1778 }
1779
1780 result_len = self_len - (count * from_len);
1781 assert (result_len>=0);
1782
1783 if ( (result = (PyBytesObject *)
1784 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1785 return NULL;
1786
1787 result_s = PyBytes_AS_STRING(result);
1788
1789 start = self_s;
1790 end = self_s + self_len;
1791 while (count-- > 0) {
1792 offset = findstring(start, end-start,
1793 from_s, from_len,
1794 0, end-start, FORWARD);
1795 if (offset == -1)
1796 break;
1797 next = start + offset;
1798
1799 Py_MEMCPY(result_s, start, next-start);
1800
1801 result_s += (next-start);
1802 start = next+from_len;
1803 }
1804 Py_MEMCPY(result_s, start, end-start);
1805 return result;
1806}
1807
1808/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1809Py_LOCAL(PyBytesObject *)
1810replace_single_character_in_place(PyBytesObject *self,
1811 char from_c, char to_c,
1812 Py_ssize_t maxcount)
1813{
1814 char *self_s, *result_s, *start, *end, *next;
1815 Py_ssize_t self_len;
1816 PyBytesObject *result;
1817
1818 /* The result string will be the same size */
1819 self_s = PyBytes_AS_STRING(self);
1820 self_len = PyBytes_GET_SIZE(self);
1821
1822 next = findchar(self_s, self_len, from_c);
1823
1824 if (next == NULL) {
1825 /* No matches; return the original bytes */
1826 return return_self(self);
1827 }
1828
1829 /* Need to make a new bytes */
1830 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1831 if (result == NULL)
1832 return NULL;
1833 result_s = PyBytes_AS_STRING(result);
1834 Py_MEMCPY(result_s, self_s, self_len);
1835
1836 /* change everything in-place, starting with this one */
1837 start = result_s + (next-self_s);
1838 *start = to_c;
1839 start++;
1840 end = result_s + self_len;
1841
1842 while (--maxcount > 0) {
1843 next = findchar(start, end-start, from_c);
1844 if (next == NULL)
1845 break;
1846 *next = to_c;
1847 start = next+1;
1848 }
1849
1850 return result;
1851}
1852
1853/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1854Py_LOCAL(PyBytesObject *)
1855replace_substring_in_place(PyBytesObject *self,
1856 const char *from_s, Py_ssize_t from_len,
1857 const char *to_s, Py_ssize_t to_len,
1858 Py_ssize_t maxcount)
1859{
1860 char *result_s, *start, *end;
1861 char *self_s;
1862 Py_ssize_t self_len, offset;
1863 PyBytesObject *result;
1864
1865 /* The result bytes will be the same size */
1866
1867 self_s = PyBytes_AS_STRING(self);
1868 self_len = PyBytes_GET_SIZE(self);
1869
1870 offset = findstring(self_s, self_len,
1871 from_s, from_len,
1872 0, self_len, FORWARD);
1873 if (offset == -1) {
1874 /* No matches; return the original bytes */
1875 return return_self(self);
1876 }
1877
1878 /* Need to make a new bytes */
1879 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1880 if (result == NULL)
1881 return NULL;
1882 result_s = PyBytes_AS_STRING(result);
1883 Py_MEMCPY(result_s, self_s, self_len);
1884
1885 /* change everything in-place, starting with this one */
1886 start = result_s + offset;
1887 Py_MEMCPY(start, to_s, from_len);
1888 start += from_len;
1889 end = result_s + self_len;
1890
1891 while ( --maxcount > 0) {
1892 offset = findstring(start, end-start,
1893 from_s, from_len,
1894 0, end-start, FORWARD);
1895 if (offset==-1)
1896 break;
1897 Py_MEMCPY(start+offset, to_s, from_len);
1898 start += offset+from_len;
1899 }
1900
1901 return result;
1902}
1903
1904/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1905Py_LOCAL(PyBytesObject *)
1906replace_single_character(PyBytesObject *self,
1907 char from_c,
1908 const char *to_s, Py_ssize_t to_len,
1909 Py_ssize_t maxcount)
1910{
1911 char *self_s, *result_s;
1912 char *start, *next, *end;
1913 Py_ssize_t self_len, result_len;
1914 Py_ssize_t count, product;
1915 PyBytesObject *result;
1916
1917 self_s = PyBytes_AS_STRING(self);
1918 self_len = PyBytes_GET_SIZE(self);
1919
1920 count = countchar(self_s, self_len, from_c, maxcount);
1921 if (count == 0) {
1922 /* no matches, return unchanged */
1923 return return_self(self);
1924 }
1925
1926 /* use the difference between current and new, hence the "-1" */
1927 /* result_len = self_len + count * (to_len-1) */
1928 product = count * (to_len-1);
1929 if (product / (to_len-1) != count) {
1930 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1931 return NULL;
1932 }
1933 result_len = self_len + product;
1934 if (result_len < 0) {
1935 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1936 return NULL;
1937 }
1938
1939 if ( (result = (PyBytesObject *)
1940 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1941 return NULL;
1942 result_s = PyBytes_AS_STRING(result);
1943
1944 start = self_s;
1945 end = self_s + self_len;
1946 while (count-- > 0) {
1947 next = findchar(start, end-start, from_c);
1948 if (next == NULL)
1949 break;
1950
1951 if (next == start) {
1952 /* replace with the 'to' */
1953 Py_MEMCPY(result_s, to_s, to_len);
1954 result_s += to_len;
1955 start += 1;
1956 } else {
1957 /* copy the unchanged old then the 'to' */
1958 Py_MEMCPY(result_s, start, next-start);
1959 result_s += (next-start);
1960 Py_MEMCPY(result_s, to_s, to_len);
1961 result_s += to_len;
1962 start = next+1;
1963 }
1964 }
1965 /* Copy the remainder of the remaining bytes */
1966 Py_MEMCPY(result_s, start, end-start);
1967
1968 return result;
1969}
1970
1971/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1972Py_LOCAL(PyBytesObject *)
1973replace_substring(PyBytesObject *self,
1974 const char *from_s, Py_ssize_t from_len,
1975 const char *to_s, Py_ssize_t to_len,
1976 Py_ssize_t maxcount)
1977{
1978 char *self_s, *result_s;
1979 char *start, *next, *end;
1980 Py_ssize_t self_len, result_len;
1981 Py_ssize_t count, offset, product;
1982 PyBytesObject *result;
1983
1984 self_s = PyBytes_AS_STRING(self);
1985 self_len = PyBytes_GET_SIZE(self);
1986
1987 count = countstring(self_s, self_len,
1988 from_s, from_len,
1989 0, self_len, FORWARD, maxcount);
1990 if (count == 0) {
1991 /* no matches, return unchanged */
1992 return return_self(self);
1993 }
1994
1995 /* Check for overflow */
1996 /* result_len = self_len + count * (to_len-from_len) */
1997 product = count * (to_len-from_len);
1998 if (product / (to_len-from_len) != count) {
1999 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
2000 return NULL;
2001 }
2002 result_len = self_len + product;
2003 if (result_len < 0) {
2004 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
2005 return NULL;
2006 }
2007
2008 if ( (result = (PyBytesObject *)
2009 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2010 return NULL;
2011 result_s = PyBytes_AS_STRING(result);
2012
2013 start = self_s;
2014 end = self_s + self_len;
2015 while (count-- > 0) {
2016 offset = findstring(start, end-start,
2017 from_s, from_len,
2018 0, end-start, FORWARD);
2019 if (offset == -1)
2020 break;
2021 next = start+offset;
2022 if (next == start) {
2023 /* replace with the 'to' */
2024 Py_MEMCPY(result_s, to_s, to_len);
2025 result_s += to_len;
2026 start += from_len;
2027 } else {
2028 /* copy the unchanged old then the 'to' */
2029 Py_MEMCPY(result_s, start, next-start);
2030 result_s += (next-start);
2031 Py_MEMCPY(result_s, to_s, to_len);
2032 result_s += to_len;
2033 start = next+from_len;
2034 }
2035 }
2036 /* Copy the remainder of the remaining bytes */
2037 Py_MEMCPY(result_s, start, end-start);
2038
2039 return result;
2040}
2041
2042
2043Py_LOCAL(PyBytesObject *)
2044replace(PyBytesObject *self,
2045 const char *from_s, Py_ssize_t from_len,
2046 const char *to_s, Py_ssize_t to_len,
2047 Py_ssize_t maxcount)
2048{
2049 if (maxcount < 0) {
2050 maxcount = PY_SSIZE_T_MAX;
2051 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2052 /* nothing to do; return the original bytes */
2053 return return_self(self);
2054 }
2055
2056 if (maxcount == 0 ||
2057 (from_len == 0 && to_len == 0)) {
2058 /* nothing to do; return the original bytes */
2059 return return_self(self);
2060 }
2061
2062 /* Handle zero-length special cases */
2063
2064 if (from_len == 0) {
2065 /* insert the 'to' bytes everywhere. */
2066 /* >>> "Python".replace("", ".") */
2067 /* '.P.y.t.h.o.n.' */
2068 return replace_interleave(self, to_s, to_len, maxcount);
2069 }
2070
2071 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2072 /* point for an empty self bytes to generate a non-empty bytes */
2073 /* Special case so the remaining code always gets a non-empty bytes */
2074 if (PyBytes_GET_SIZE(self) == 0) {
2075 return return_self(self);
2076 }
2077
2078 if (to_len == 0) {
2079 /* delete all occurances of 'from' bytes */
2080 if (from_len == 1) {
2081 return replace_delete_single_character(
2082 self, from_s[0], maxcount);
2083 } else {
2084 return replace_delete_substring(self, from_s, from_len, maxcount);
2085 }
2086 }
2087
2088 /* Handle special case where both bytes have the same length */
2089
2090 if (from_len == to_len) {
2091 if (from_len == 1) {
2092 return replace_single_character_in_place(
2093 self,
2094 from_s[0],
2095 to_s[0],
2096 maxcount);
2097 } else {
2098 return replace_substring_in_place(
2099 self, from_s, from_len, to_s, to_len, maxcount);
2100 }
2101 }
2102
2103 /* Otherwise use the more generic algorithms */
2104 if (from_len == 1) {
2105 return replace_single_character(self, from_s[0],
2106 to_s, to_len, maxcount);
2107 } else {
2108 /* len('from')>=2, len('to')>=1 */
2109 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2110 }
2111}
2112
2113
2114PyDoc_STRVAR(replace__doc__,
2115"B.replace(old, new[, count]) -> bytes\n\
2116\n\
2117Return a copy of B with all occurrences of subsection\n\
2118old replaced by new. If the optional argument count is\n\
2119given, only the first count occurrences are replaced.");
2120
2121static PyObject *
2122bytes_replace(PyBytesObject *self, PyObject *args)
2123{
2124 Py_ssize_t count = -1;
2125 PyObject *from, *to, *res;
2126 Py_buffer vfrom, vto;
2127
2128 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2129 return NULL;
2130
2131 if (_getbuffer(from, &vfrom) < 0)
2132 return NULL;
2133 if (_getbuffer(to, &vto) < 0) {
2134 PyObject_ReleaseBuffer(from, &vfrom);
2135 return NULL;
2136 }
2137
2138 res = (PyObject *)replace((PyBytesObject *) self,
2139 vfrom.buf, vfrom.len,
2140 vto.buf, vto.len, count);
2141
2142 PyObject_ReleaseBuffer(from, &vfrom);
2143 PyObject_ReleaseBuffer(to, &vto);
2144 return res;
2145}
2146
2147
2148/* Overallocate the initial list to reduce the number of reallocs for small
2149 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2150 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2151 text (roughly 11 words per line) and field delimited data (usually 1-10
2152 fields). For large strings the split algorithms are bandwidth limited
2153 so increasing the preallocation likely will not improve things.*/
2154
2155#define MAX_PREALLOC 12
2156
2157/* 5 splits gives 6 elements */
2158#define PREALLOC_SIZE(maxsplit) \
2159 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2160
2161#define SPLIT_APPEND(data, left, right) \
2162 str = PyBytes_FromStringAndSize((data) + (left), \
2163 (right) - (left)); \
2164 if (str == NULL) \
2165 goto onError; \
2166 if (PyList_Append(list, str)) { \
2167 Py_DECREF(str); \
2168 goto onError; \
2169 } \
2170 else \
2171 Py_DECREF(str);
2172
2173#define SPLIT_ADD(data, left, right) { \
2174 str = PyBytes_FromStringAndSize((data) + (left), \
2175 (right) - (left)); \
2176 if (str == NULL) \
2177 goto onError; \
2178 if (count < MAX_PREALLOC) { \
2179 PyList_SET_ITEM(list, count, str); \
2180 } else { \
2181 if (PyList_Append(list, str)) { \
2182 Py_DECREF(str); \
2183 goto onError; \
2184 } \
2185 else \
2186 Py_DECREF(str); \
2187 } \
2188 count++; }
2189
2190/* Always force the list to the expected size. */
2191#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
2192
2193
2194Py_LOCAL_INLINE(PyObject *)
2195split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2196{
2197 register Py_ssize_t i, j, count = 0;
2198 PyObject *str;
2199 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2200
2201 if (list == NULL)
2202 return NULL;
2203
2204 i = j = 0;
2205 while ((j < len) && (maxcount-- > 0)) {
2206 for(; j < len; j++) {
2207 /* I found that using memchr makes no difference */
2208 if (s[j] == ch) {
2209 SPLIT_ADD(s, i, j);
2210 i = j = j + 1;
2211 break;
2212 }
2213 }
2214 }
2215 if (i <= len) {
2216 SPLIT_ADD(s, i, len);
2217 }
2218 FIX_PREALLOC_SIZE(list);
2219 return list;
2220
2221 onError:
2222 Py_DECREF(list);
2223 return NULL;
2224}
2225
2226
2227Py_LOCAL_INLINE(PyObject *)
2228split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2229{
2230 register Py_ssize_t i, j, count = 0;
2231 PyObject *str;
2232 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2233
2234 if (list == NULL)
2235 return NULL;
2236
2237 for (i = j = 0; i < len; ) {
2238 /* find a token */
2239 while (i < len && ISSPACE(s[i]))
2240 i++;
2241 j = i;
2242 while (i < len && !ISSPACE(s[i]))
2243 i++;
2244 if (j < i) {
2245 if (maxcount-- <= 0)
2246 break;
2247 SPLIT_ADD(s, j, i);
2248 while (i < len && ISSPACE(s[i]))
2249 i++;
2250 j = i;
2251 }
2252 }
2253 if (j < len) {
2254 SPLIT_ADD(s, j, len);
2255 }
2256 FIX_PREALLOC_SIZE(list);
2257 return list;
2258
2259 onError:
2260 Py_DECREF(list);
2261 return NULL;
2262}
2263
2264PyDoc_STRVAR(split__doc__,
2265"B.split([sep[, maxsplit]]) -> list of bytearray\n\
2266\n\
2267Return a list of the sections in B, using sep as the delimiter.\n\
2268If sep is not given, B is split on ASCII whitespace characters\n\
2269(space, tab, return, newline, formfeed, vertical tab).\n\
2270If maxsplit is given, at most maxsplit splits are done.");
2271
2272static PyObject *
2273bytes_split(PyBytesObject *self, PyObject *args)
2274{
2275 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2276 Py_ssize_t maxsplit = -1, count = 0;
2277 const char *s = PyBytes_AS_STRING(self), *sub;
2278 PyObject *list, *str, *subobj = Py_None;
2279 Py_buffer vsub;
2280#ifdef USE_FAST
2281 Py_ssize_t pos;
2282#endif
2283
2284 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
2285 return NULL;
2286 if (maxsplit < 0)
2287 maxsplit = PY_SSIZE_T_MAX;
2288
2289 if (subobj == Py_None)
2290 return split_whitespace(s, len, maxsplit);
2291
2292 if (_getbuffer(subobj, &vsub) < 0)
2293 return NULL;
2294 sub = vsub.buf;
2295 n = vsub.len;
2296
2297 if (n == 0) {
2298 PyErr_SetString(PyExc_ValueError, "empty separator");
2299 PyObject_ReleaseBuffer(subobj, &vsub);
2300 return NULL;
2301 }
2302 if (n == 1)
2303 return split_char(s, len, sub[0], maxsplit);
2304
2305 list = PyList_New(PREALLOC_SIZE(maxsplit));
2306 if (list == NULL) {
2307 PyObject_ReleaseBuffer(subobj, &vsub);
2308 return NULL;
2309 }
2310
2311#ifdef USE_FAST
2312 i = j = 0;
2313 while (maxsplit-- > 0) {
2314 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2315 if (pos < 0)
2316 break;
2317 j = i+pos;
2318 SPLIT_ADD(s, i, j);
2319 i = j + n;
2320 }
2321#else
2322 i = j = 0;
2323 while ((j+n <= len) && (maxsplit-- > 0)) {
2324 for (; j+n <= len; j++) {
2325 if (Py_STRING_MATCH(s, j, sub, n)) {
2326 SPLIT_ADD(s, i, j);
2327 i = j = j + n;
2328 break;
2329 }
2330 }
2331 }
2332#endif
2333 SPLIT_ADD(s, i, len);
2334 FIX_PREALLOC_SIZE(list);
2335 PyObject_ReleaseBuffer(subobj, &vsub);
2336 return list;
2337
2338 onError:
2339 Py_DECREF(list);
2340 PyObject_ReleaseBuffer(subobj, &vsub);
2341 return NULL;
2342}
2343
2344/* stringlib's partition shares nullbytes in some cases.
2345 undo this, we don't want the nullbytes to be shared. */
2346static PyObject *
2347make_nullbytes_unique(PyObject *result)
2348{
2349 if (result != NULL) {
2350 int i;
2351 assert(PyTuple_Check(result));
2352 assert(PyTuple_GET_SIZE(result) == 3);
2353 for (i = 0; i < 3; i++) {
2354 if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) {
2355 PyObject *new = PyBytes_FromStringAndSize(NULL, 0);
2356 if (new == NULL) {
2357 Py_DECREF(result);
2358 result = NULL;
2359 break;
2360 }
2361 Py_DECREF(nullbytes);
2362 PyTuple_SET_ITEM(result, i, new);
2363 }
2364 }
2365 }
2366 return result;
2367}
2368
2369PyDoc_STRVAR(partition__doc__,
2370"B.partition(sep) -> (head, sep, tail)\n\
2371\n\
2372Searches for the separator sep in B, and returns the part before it,\n\
2373the separator itself, and the part after it. If the separator is not\n\
2374found, returns B and two empty bytearray objects.");
2375
2376static PyObject *
2377bytes_partition(PyBytesObject *self, PyObject *sep_obj)
2378{
2379 PyObject *bytesep, *result;
2380
2381 bytesep = PyBytes_FromObject(sep_obj);
2382 if (! bytesep)
2383 return NULL;
2384
2385 result = stringlib_partition(
2386 (PyObject*) self,
2387 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2388 bytesep,
2389 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2390 );
2391
2392 Py_DECREF(bytesep);
2393 return make_nullbytes_unique(result);
2394}
2395
2396PyDoc_STRVAR(rpartition__doc__,
2397"B.rpartition(sep) -> (tail, sep, head)\n\
2398\n\
2399Searches for the separator sep in B, starting at the end of B,\n\
2400and returns the part before it, the separator itself, and the\n\
2401part after it. If the separator is not found, returns two empty\n\
2402bytearray objects and B.");
2403
2404static PyObject *
2405bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
2406{
2407 PyObject *bytesep, *result;
2408
2409 bytesep = PyBytes_FromObject(sep_obj);
2410 if (! bytesep)
2411 return NULL;
2412
2413 result = stringlib_rpartition(
2414 (PyObject*) self,
2415 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2416 bytesep,
2417 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2418 );
2419
2420 Py_DECREF(bytesep);
2421 return make_nullbytes_unique(result);
2422}
2423
2424Py_LOCAL_INLINE(PyObject *)
2425rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2426{
2427 register Py_ssize_t i, j, count=0;
2428 PyObject *str;
2429 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2430
2431 if (list == NULL)
2432 return NULL;
2433
2434 i = j = len - 1;
2435 while ((i >= 0) && (maxcount-- > 0)) {
2436 for (; i >= 0; i--) {
2437 if (s[i] == ch) {
2438 SPLIT_ADD(s, i + 1, j + 1);
2439 j = i = i - 1;
2440 break;
2441 }
2442 }
2443 }
2444 if (j >= -1) {
2445 SPLIT_ADD(s, 0, j + 1);
2446 }
2447 FIX_PREALLOC_SIZE(list);
2448 if (PyList_Reverse(list) < 0)
2449 goto onError;
2450
2451 return list;
2452
2453 onError:
2454 Py_DECREF(list);
2455 return NULL;
2456}
2457
2458Py_LOCAL_INLINE(PyObject *)
2459rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2460{
2461 register Py_ssize_t i, j, count = 0;
2462 PyObject *str;
2463 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2464
2465 if (list == NULL)
2466 return NULL;
2467
2468 for (i = j = len - 1; i >= 0; ) {
2469 /* find a token */
2470 while (i >= 0 && ISSPACE(s[i]))
2471 i--;
2472 j = i;
2473 while (i >= 0 && !ISSPACE(s[i]))
2474 i--;
2475 if (j > i) {
2476 if (maxcount-- <= 0)
2477 break;
2478 SPLIT_ADD(s, i + 1, j + 1);
2479 while (i >= 0 && ISSPACE(s[i]))
2480 i--;
2481 j = i;
2482 }
2483 }
2484 if (j >= 0) {
2485 SPLIT_ADD(s, 0, j + 1);
2486 }
2487 FIX_PREALLOC_SIZE(list);
2488 if (PyList_Reverse(list) < 0)
2489 goto onError;
2490
2491 return list;
2492
2493 onError:
2494 Py_DECREF(list);
2495 return NULL;
2496}
2497
2498PyDoc_STRVAR(rsplit__doc__,
2499"B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2500\n\
2501Return a list of the sections in B, using sep as the delimiter,\n\
2502starting at the end of B and working to the front.\n\
2503If sep is not given, B is split on ASCII whitespace characters\n\
2504(space, tab, return, newline, formfeed, vertical tab).\n\
2505If maxsplit is given, at most maxsplit splits are done.");
2506
2507static PyObject *
2508bytes_rsplit(PyBytesObject *self, PyObject *args)
2509{
2510 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2511 Py_ssize_t maxsplit = -1, count = 0;
2512 const char *s = PyBytes_AS_STRING(self), *sub;
2513 PyObject *list, *str, *subobj = Py_None;
2514 Py_buffer vsub;
2515
2516 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
2517 return NULL;
2518 if (maxsplit < 0)
2519 maxsplit = PY_SSIZE_T_MAX;
2520
2521 if (subobj == Py_None)
2522 return rsplit_whitespace(s, len, maxsplit);
2523
2524 if (_getbuffer(subobj, &vsub) < 0)
2525 return NULL;
2526 sub = vsub.buf;
2527 n = vsub.len;
2528
2529 if (n == 0) {
2530 PyErr_SetString(PyExc_ValueError, "empty separator");
2531 PyObject_ReleaseBuffer(subobj, &vsub);
2532 return NULL;
2533 }
2534 else if (n == 1)
2535 return rsplit_char(s, len, sub[0], maxsplit);
2536
2537 list = PyList_New(PREALLOC_SIZE(maxsplit));
2538 if (list == NULL) {
2539 PyObject_ReleaseBuffer(subobj, &vsub);
2540 return NULL;
2541 }
2542
2543 j = len;
2544 i = j - n;
2545
2546 while ( (i >= 0) && (maxsplit-- > 0) ) {
2547 for (; i>=0; i--) {
2548 if (Py_STRING_MATCH(s, i, sub, n)) {
2549 SPLIT_ADD(s, i + n, j);
2550 j = i;
2551 i -= n;
2552 break;
2553 }
2554 }
2555 }
2556 SPLIT_ADD(s, 0, j);
2557 FIX_PREALLOC_SIZE(list);
2558 if (PyList_Reverse(list) < 0)
2559 goto onError;
2560 PyObject_ReleaseBuffer(subobj, &vsub);
2561 return list;
2562
2563onError:
2564 Py_DECREF(list);
2565 PyObject_ReleaseBuffer(subobj, &vsub);
2566 return NULL;
2567}
2568
2569PyDoc_STRVAR(reverse__doc__,
2570"B.reverse() -> None\n\
2571\n\
2572Reverse the order of the values in B in place.");
2573static PyObject *
2574bytes_reverse(PyBytesObject *self, PyObject *unused)
2575{
2576 char swap, *head, *tail;
2577 Py_ssize_t i, j, n = Py_SIZE(self);
2578
2579 j = n / 2;
2580 head = self->ob_bytes;
2581 tail = head + n - 1;
2582 for (i = 0; i < j; i++) {
2583 swap = *head;
2584 *head++ = *tail;
2585 *tail-- = swap;
2586 }
2587
2588 Py_RETURN_NONE;
2589}
2590
2591PyDoc_STRVAR(insert__doc__,
2592"B.insert(index, int) -> None\n\
2593\n\
2594Insert a single item into the bytearray before the given index.");
2595static PyObject *
2596bytes_insert(PyBytesObject *self, PyObject *args)
2597{
2598 int value;
2599 Py_ssize_t where, n = Py_SIZE(self);
2600
2601 if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
2602 return NULL;
2603
2604 if (n == PY_SSIZE_T_MAX) {
2605 PyErr_SetString(PyExc_OverflowError,
2606 "cannot add more objects to bytes");
2607 return NULL;
2608 }
2609 if (value < 0 || value >= 256) {
2610 PyErr_SetString(PyExc_ValueError,
2611 "byte must be in range(0, 256)");
2612 return NULL;
2613 }
2614 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2615 return NULL;
2616
2617 if (where < 0) {
2618 where += n;
2619 if (where < 0)
2620 where = 0;
2621 }
2622 if (where > n)
2623 where = n;
2624 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
2625 self->ob_bytes[where] = value;
2626
2627 Py_RETURN_NONE;
2628}
2629
2630PyDoc_STRVAR(append__doc__,
2631"B.append(int) -> None\n\
2632\n\
2633Append a single item to the end of B.");
2634static PyObject *
2635bytes_append(PyBytesObject *self, PyObject *arg)
2636{
2637 int value;
2638 Py_ssize_t n = Py_SIZE(self);
2639
2640 if (! _getbytevalue(arg, &value))
2641 return NULL;
2642 if (n == PY_SSIZE_T_MAX) {
2643 PyErr_SetString(PyExc_OverflowError,
2644 "cannot add more objects to bytes");
2645 return NULL;
2646 }
2647 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2648 return NULL;
2649
2650 self->ob_bytes[n] = value;
2651
2652 Py_RETURN_NONE;
2653}
2654
2655PyDoc_STRVAR(extend__doc__,
2656"B.extend(iterable int) -> None\n\
2657\n\
2658Append all the elements from the iterator or sequence to the\n\
2659end of B.");
2660static PyObject *
2661bytes_extend(PyBytesObject *self, PyObject *arg)
2662{
2663 PyObject *it, *item, *tmp, *res;
2664 Py_ssize_t buf_size = 0, len = 0;
2665 int value;
2666 char *buf;
2667
2668 /* bytes_setslice code only accepts something supporting PEP 3118. */
2669 if (PyObject_CheckBuffer(arg)) {
2670 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), arg) == -1)
2671 return NULL;
2672
2673 Py_RETURN_NONE;
2674 }
2675
2676 it = PyObject_GetIter(arg);
2677 if (it == NULL)
2678 return NULL;
2679
2680 /* Try to determine the length of the argument. 32 is abitrary. */
2681 buf_size = _PyObject_LengthHint(arg, 32);
2682
2683 buf = (char *)PyMem_Malloc(buf_size * sizeof(char));
2684 if (buf == NULL)
2685 return PyErr_NoMemory();
2686
2687 while ((item = PyIter_Next(it)) != NULL) {
2688 if (! _getbytevalue(item, &value)) {
2689 Py_DECREF(item);
2690 Py_DECREF(it);
Neal Norwitz4ebd46a2008-03-27 04:40:07 +00002691 PyMem_Free(buf);
Christian Heimes1a6387e2008-03-26 12:49:49 +00002692 return NULL;
2693 }
2694 buf[len++] = value;
2695 Py_DECREF(item);
2696 if (len >= buf_size) {
Neal Norwitz4ebd46a2008-03-27 04:40:07 +00002697 char *new_buf;
Christian Heimes1a6387e2008-03-26 12:49:49 +00002698 buf_size = len + (len >> 1) + 1;
Neal Norwitz4ebd46a2008-03-27 04:40:07 +00002699 new_buf = (char *)PyMem_Realloc(buf, buf_size * sizeof(char));
2700 if (new_buf == NULL) {
Christian Heimes1a6387e2008-03-26 12:49:49 +00002701 Py_DECREF(it);
Neal Norwitz4ebd46a2008-03-27 04:40:07 +00002702 PyMem_Free(buf);
Christian Heimes1a6387e2008-03-26 12:49:49 +00002703 return PyErr_NoMemory();
2704 }
Neal Norwitz4ebd46a2008-03-27 04:40:07 +00002705 buf = new_buf;
Christian Heimes1a6387e2008-03-26 12:49:49 +00002706 }
2707 }
2708 Py_DECREF(it);
2709
2710 /* XXX: Is possible to avoid a full copy of the buffer? */
2711 tmp = PyBytes_FromStringAndSize(buf, len);
2712 res = bytes_extend(self, tmp);
2713 Py_DECREF(tmp);
2714 PyMem_Free(buf);
2715
2716 return res;
2717}
2718
2719PyDoc_STRVAR(pop__doc__,
2720"B.pop([index]) -> int\n\
2721\n\
2722Remove and return a single item from B. If no index\n\
2723argument is give, will pop the last value.");
2724static PyObject *
2725bytes_pop(PyBytesObject *self, PyObject *args)
2726{
2727 int value;
2728 Py_ssize_t where = -1, n = Py_SIZE(self);
2729
2730 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2731 return NULL;
2732
2733 if (n == 0) {
2734 PyErr_SetString(PyExc_OverflowError,
2735 "cannot pop an empty bytes");
2736 return NULL;
2737 }
2738 if (where < 0)
2739 where += Py_SIZE(self);
2740 if (where < 0 || where >= Py_SIZE(self)) {
2741 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2742 return NULL;
2743 }
2744
2745 value = self->ob_bytes[where];
2746 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2747 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2748 return NULL;
2749
2750 return PyInt_FromLong(value);
2751}
2752
2753PyDoc_STRVAR(remove__doc__,
2754"B.remove(int) -> None\n\
2755\n\
2756Remove the first occurance of a value in B.");
2757static PyObject *
2758bytes_remove(PyBytesObject *self, PyObject *arg)
2759{
2760 int value;
2761 Py_ssize_t where, n = Py_SIZE(self);
2762
2763 if (! _getbytevalue(arg, &value))
2764 return NULL;
2765
2766 for (where = 0; where < n; where++) {
2767 if (self->ob_bytes[where] == value)
2768 break;
2769 }
2770 if (where == n) {
2771 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2772 return NULL;
2773 }
2774
2775 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2776 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2777 return NULL;
2778
2779 Py_RETURN_NONE;
2780}
2781
2782/* XXX These two helpers could be optimized if argsize == 1 */
2783
2784static Py_ssize_t
2785lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2786 void *argptr, Py_ssize_t argsize)
2787{
2788 Py_ssize_t i = 0;
2789 while (i < mysize && memchr(argptr, myptr[i], argsize))
2790 i++;
2791 return i;
2792}
2793
2794static Py_ssize_t
2795rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2796 void *argptr, Py_ssize_t argsize)
2797{
2798 Py_ssize_t i = mysize - 1;
2799 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2800 i--;
2801 return i + 1;
2802}
2803
2804PyDoc_STRVAR(strip__doc__,
2805"B.strip([bytes]) -> bytearray\n\
2806\n\
2807Strip leading and trailing bytes contained in the argument.\n\
2808If the argument is omitted, strip ASCII whitespace.");
2809static PyObject *
2810bytes_strip(PyBytesObject *self, PyObject *args)
2811{
2812 Py_ssize_t left, right, mysize, argsize;
2813 void *myptr, *argptr;
2814 PyObject *arg = Py_None;
2815 Py_buffer varg;
2816 if (!PyArg_ParseTuple(args, "|O:strip", &arg))
2817 return NULL;
2818 if (arg == Py_None) {
2819 argptr = "\t\n\r\f\v ";
2820 argsize = 6;
2821 }
2822 else {
2823 if (_getbuffer(arg, &varg) < 0)
2824 return NULL;
2825 argptr = varg.buf;
2826 argsize = varg.len;
2827 }
2828 myptr = self->ob_bytes;
2829 mysize = Py_SIZE(self);
2830 left = lstrip_helper(myptr, mysize, argptr, argsize);
2831 if (left == mysize)
2832 right = left;
2833 else
2834 right = rstrip_helper(myptr, mysize, argptr, argsize);
2835 if (arg != Py_None)
2836 PyObject_ReleaseBuffer(arg, &varg);
2837 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2838}
2839
2840PyDoc_STRVAR(lstrip__doc__,
2841"B.lstrip([bytes]) -> bytearray\n\
2842\n\
2843Strip leading bytes contained in the argument.\n\
2844If the argument is omitted, strip leading ASCII whitespace.");
2845static PyObject *
2846bytes_lstrip(PyBytesObject *self, PyObject *args)
2847{
2848 Py_ssize_t left, right, mysize, argsize;
2849 void *myptr, *argptr;
2850 PyObject *arg = Py_None;
2851 Py_buffer varg;
2852 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
2853 return NULL;
2854 if (arg == Py_None) {
2855 argptr = "\t\n\r\f\v ";
2856 argsize = 6;
2857 }
2858 else {
2859 if (_getbuffer(arg, &varg) < 0)
2860 return NULL;
2861 argptr = varg.buf;
2862 argsize = varg.len;
2863 }
2864 myptr = self->ob_bytes;
2865 mysize = Py_SIZE(self);
2866 left = lstrip_helper(myptr, mysize, argptr, argsize);
2867 right = mysize;
2868 if (arg != Py_None)
2869 PyObject_ReleaseBuffer(arg, &varg);
2870 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2871}
2872
2873PyDoc_STRVAR(rstrip__doc__,
2874"B.rstrip([bytes]) -> bytearray\n\
2875\n\
2876Strip trailing bytes contained in the argument.\n\
2877If the argument is omitted, strip trailing ASCII whitespace.");
2878static PyObject *
2879bytes_rstrip(PyBytesObject *self, PyObject *args)
2880{
2881 Py_ssize_t left, right, mysize, argsize;
2882 void *myptr, *argptr;
2883 PyObject *arg = Py_None;
2884 Py_buffer varg;
2885 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
2886 return NULL;
2887 if (arg == Py_None) {
2888 argptr = "\t\n\r\f\v ";
2889 argsize = 6;
2890 }
2891 else {
2892 if (_getbuffer(arg, &varg) < 0)
2893 return NULL;
2894 argptr = varg.buf;
2895 argsize = varg.len;
2896 }
2897 myptr = self->ob_bytes;
2898 mysize = Py_SIZE(self);
2899 left = 0;
2900 right = rstrip_helper(myptr, mysize, argptr, argsize);
2901 if (arg != Py_None)
2902 PyObject_ReleaseBuffer(arg, &varg);
2903 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2904}
2905
2906PyDoc_STRVAR(decode_doc,
2907"B.decode([encoding[, errors]]) -> unicode object.\n\
2908\n\
2909Decodes B using the codec registered for encoding. encoding defaults\n\
2910to the default encoding. errors may be given to set a different error\n\
2911handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2912a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2913as well as any other name registered with codecs.register_error that is\n\
2914able to handle UnicodeDecodeErrors.");
2915
2916static PyObject *
2917bytes_decode(PyObject *self, PyObject *args)
2918{
2919 const char *encoding = NULL;
2920 const char *errors = NULL;
2921
2922 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2923 return NULL;
2924 if (encoding == NULL)
2925 encoding = PyUnicode_GetDefaultEncoding();
2926 return PyCodec_Decode(self, encoding, errors);
2927}
2928
2929PyDoc_STRVAR(alloc_doc,
2930"B.__alloc__() -> int\n\
2931\n\
2932Returns the number of bytes actually allocated.");
2933
2934static PyObject *
2935bytes_alloc(PyBytesObject *self)
2936{
2937 return PyInt_FromSsize_t(self->ob_alloc);
2938}
2939
2940PyDoc_STRVAR(join_doc,
2941"B.join(iterable_of_bytes) -> bytes\n\
2942\n\
2943Concatenates any number of bytearray objects, with B in between each pair.");
2944
2945static PyObject *
2946bytes_join(PyBytesObject *self, PyObject *it)
2947{
2948 PyObject *seq;
2949 Py_ssize_t mysize = Py_SIZE(self);
2950 Py_ssize_t i;
2951 Py_ssize_t n;
2952 PyObject **items;
2953 Py_ssize_t totalsize = 0;
2954 PyObject *result;
2955 char *dest;
2956
2957 seq = PySequence_Fast(it, "can only join an iterable");
2958 if (seq == NULL)
2959 return NULL;
2960 n = PySequence_Fast_GET_SIZE(seq);
2961 items = PySequence_Fast_ITEMS(seq);
2962
2963 /* Compute the total size, and check that they are all bytes */
2964 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2965 for (i = 0; i < n; i++) {
2966 PyObject *obj = items[i];
2967 if (!PyBytes_Check(obj) && !PyString_Check(obj)) {
2968 PyErr_Format(PyExc_TypeError,
2969 "can only join an iterable of bytes "
2970 "(item %ld has type '%.100s')",
2971 /* XXX %ld isn't right on Win64 */
2972 (long)i, Py_TYPE(obj)->tp_name);
2973 goto error;
2974 }
2975 if (i > 0)
2976 totalsize += mysize;
2977 totalsize += Py_SIZE(obj);
2978 if (totalsize < 0) {
2979 PyErr_NoMemory();
2980 goto error;
2981 }
2982 }
2983
2984 /* Allocate the result, and copy the bytes */
2985 result = PyBytes_FromStringAndSize(NULL, totalsize);
2986 if (result == NULL)
2987 goto error;
2988 dest = PyBytes_AS_STRING(result);
2989 for (i = 0; i < n; i++) {
2990 PyObject *obj = items[i];
2991 Py_ssize_t size = Py_SIZE(obj);
2992 char *buf;
2993 if (PyBytes_Check(obj))
2994 buf = PyBytes_AS_STRING(obj);
2995 else
2996 buf = PyString_AS_STRING(obj);
2997 if (i) {
2998 memcpy(dest, self->ob_bytes, mysize);
2999 dest += mysize;
3000 }
3001 memcpy(dest, buf, size);
3002 dest += size;
3003 }
3004
3005 /* Done */
3006 Py_DECREF(seq);
3007 return result;
3008
3009 /* Error handling */
3010 error:
3011 Py_DECREF(seq);
3012 return NULL;
3013}
3014
3015PyDoc_STRVAR(fromhex_doc,
3016"bytearray.fromhex(string) -> bytearray\n\
3017\n\
3018Create a bytearray object from a string of hexadecimal numbers.\n\
3019Spaces between two numbers are accepted.\n\
3020Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
3021
3022static int
3023hex_digit_to_int(Py_UNICODE c)
3024{
3025 if (c >= 128)
3026 return -1;
3027 if (ISDIGIT(c))
3028 return c - '0';
3029 else {
3030 if (ISUPPER(c))
3031 c = TOLOWER(c);
3032 if (c >= 'a' && c <= 'f')
3033 return c - 'a' + 10;
3034 }
3035 return -1;
3036}
3037
3038static PyObject *
3039bytes_fromhex(PyObject *cls, PyObject *args)
3040{
3041 PyObject *newbytes, *hexobj;
3042 char *buf;
3043 Py_UNICODE *hex;
3044 Py_ssize_t hexlen, byteslen, i, j;
3045 int top, bot;
3046
3047 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
3048 return NULL;
3049 assert(PyUnicode_Check(hexobj));
3050 hexlen = PyUnicode_GET_SIZE(hexobj);
3051 hex = PyUnicode_AS_UNICODE(hexobj);
3052 byteslen = hexlen/2; /* This overestimates if there are spaces */
3053 newbytes = PyBytes_FromStringAndSize(NULL, byteslen);
3054 if (!newbytes)
3055 return NULL;
3056 buf = PyBytes_AS_STRING(newbytes);
3057 for (i = j = 0; i < hexlen; i += 2) {
3058 /* skip over spaces in the input */
3059 while (hex[i] == ' ')
3060 i++;
3061 if (i >= hexlen)
3062 break;
3063 top = hex_digit_to_int(hex[i]);
3064 bot = hex_digit_to_int(hex[i+1]);
3065 if (top == -1 || bot == -1) {
3066 PyErr_Format(PyExc_ValueError,
3067 "non-hexadecimal number found in "
3068 "fromhex() arg at position %zd", i);
3069 goto error;
3070 }
3071 buf[j++] = (top << 4) + bot;
3072 }
3073 if (PyBytes_Resize(newbytes, j) < 0)
3074 goto error;
3075 return newbytes;
3076
3077 error:
3078 Py_DECREF(newbytes);
3079 return NULL;
3080}
3081
3082PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3083
3084static PyObject *
3085bytes_reduce(PyBytesObject *self)
3086{
3087 PyObject *latin1, *dict;
3088 if (self->ob_bytes)
3089 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
3090 Py_SIZE(self), NULL);
3091 else
3092 latin1 = PyUnicode_FromString("");
3093
3094 dict = PyObject_GetAttrString((PyObject *)self, "__dict__");
3095 if (dict == NULL) {
3096 PyErr_Clear();
3097 dict = Py_None;
3098 Py_INCREF(dict);
3099 }
3100
3101 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self), latin1, "latin-1", dict);
3102}
3103
3104static PySequenceMethods bytes_as_sequence = {
3105 (lenfunc)bytes_length, /* sq_length */
3106 (binaryfunc)PyBytes_Concat, /* sq_concat */
3107 (ssizeargfunc)bytes_repeat, /* sq_repeat */
3108 (ssizeargfunc)bytes_getitem, /* sq_item */
3109 0, /* sq_slice */
3110 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
3111 0, /* sq_ass_slice */
3112 (objobjproc)bytes_contains, /* sq_contains */
3113 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
3114 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
3115};
3116
3117static PyMappingMethods bytes_as_mapping = {
3118 (lenfunc)bytes_length,
3119 (binaryfunc)bytes_subscript,
3120 (objobjargproc)bytes_ass_subscript,
3121};
3122
3123static PyBufferProcs bytes_as_buffer = {
3124 (readbufferproc)bytes_buffer_getreadbuf,
3125 (writebufferproc)bytes_buffer_getwritebuf,
3126 (segcountproc)bytes_buffer_getsegcount,
3127 (charbufferproc)bytes_buffer_getcharbuf,
3128 (getbufferproc)bytes_getbuffer,
3129 (releasebufferproc)bytes_releasebuffer,
3130};
3131
3132static PyMethodDef
3133bytes_methods[] = {
3134 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
3135 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
3136 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
3137 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3138 _Py_capitalize__doc__},
3139 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3140 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
3141 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
3142 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
3143 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
3144 expandtabs__doc__},
3145 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
3146 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
3147 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
3148 fromhex_doc},
3149 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3150 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
3151 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3152 _Py_isalnum__doc__},
3153 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3154 _Py_isalpha__doc__},
3155 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3156 _Py_isdigit__doc__},
3157 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3158 _Py_islower__doc__},
3159 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3160 _Py_isspace__doc__},
3161 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3162 _Py_istitle__doc__},
3163 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3164 _Py_isupper__doc__},
3165 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
3166 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3167 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
3168 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
3169 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
3170 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
3171 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
3172 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
3173 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
3174 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3175 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3176 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
3177 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
3178 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
3179 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
3180 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
3181 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
3182 splitlines__doc__},
3183 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS ,
3184 startswith__doc__},
3185 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
3186 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3187 _Py_swapcase__doc__},
3188 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
3189 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
3190 translate__doc__},
3191 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3192 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
3193 {NULL}
3194};
3195
3196PyDoc_STRVAR(bytes_doc,
3197"bytearray(iterable_of_ints) -> bytearray.\n\
3198bytearray(string, encoding[, errors]) -> bytearray.\n\
3199bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
3200bytearray(memory_view) -> bytearray.\n\
3201\n\
3202Construct an mutable bytearray object from:\n\
3203 - an iterable yielding integers in range(256)\n\
3204 - a text string encoded using the specified encoding\n\
3205 - a bytes or a bytearray object\n\
3206 - any object implementing the buffer API.\n\
3207\n\
3208bytearray(int) -> bytearray.\n\
3209\n\
3210Construct a zero-initialized bytearray of the given length.");
3211
3212
3213static PyObject *bytes_iter(PyObject *seq);
3214
3215PyTypeObject PyBytes_Type = {
3216 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3217 "bytearray",
3218 sizeof(PyBytesObject),
3219 0,
3220 (destructor)bytes_dealloc, /* tp_dealloc */
3221 0, /* tp_print */
3222 0, /* tp_getattr */
3223 0, /* tp_setattr */
3224 0, /* tp_compare */
3225 (reprfunc)bytes_repr, /* tp_repr */
3226 0, /* tp_as_number */
3227 &bytes_as_sequence, /* tp_as_sequence */
3228 &bytes_as_mapping, /* tp_as_mapping */
3229 0, /* tp_hash */
3230 0, /* tp_call */
3231 bytes_str, /* tp_str */
3232 PyObject_GenericGetAttr, /* tp_getattro */
3233 0, /* tp_setattro */
3234 &bytes_as_buffer, /* tp_as_buffer */
3235 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3236 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3237 bytes_doc, /* tp_doc */
3238 0, /* tp_traverse */
3239 0, /* tp_clear */
3240 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3241 0, /* tp_weaklistoffset */
3242 bytes_iter, /* tp_iter */
3243 0, /* tp_iternext */
3244 bytes_methods, /* tp_methods */
3245 0, /* tp_members */
3246 0, /* tp_getset */
3247 0, /* tp_base */
3248 0, /* tp_dict */
3249 0, /* tp_descr_get */
3250 0, /* tp_descr_set */
3251 0, /* tp_dictoffset */
3252 (initproc)bytes_init, /* tp_init */
3253 PyType_GenericAlloc, /* tp_alloc */
3254 PyType_GenericNew, /* tp_new */
3255 PyObject_Del, /* tp_free */
3256};
3257
3258/*********************** Bytes Iterator ****************************/
3259
3260typedef struct {
3261 PyObject_HEAD
3262 Py_ssize_t it_index;
3263 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3264} bytesiterobject;
3265
3266static void
3267bytesiter_dealloc(bytesiterobject *it)
3268{
3269 _PyObject_GC_UNTRACK(it);
3270 Py_XDECREF(it->it_seq);
3271 PyObject_GC_Del(it);
3272}
3273
3274static int
3275bytesiter_traverse(bytesiterobject *it, visitproc visit, void *arg)
3276{
3277 Py_VISIT(it->it_seq);
3278 return 0;
3279}
3280
3281static PyObject *
3282bytesiter_next(bytesiterobject *it)
3283{
3284 PyBytesObject *seq;
3285 PyObject *item;
3286
3287 assert(it != NULL);
3288 seq = it->it_seq;
3289 if (seq == NULL)
3290 return NULL;
3291 assert(PyBytes_Check(seq));
3292
3293 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3294 item = PyInt_FromLong(
3295 (unsigned char)seq->ob_bytes[it->it_index]);
3296 if (item != NULL)
3297 ++it->it_index;
3298 return item;
3299 }
3300
3301 Py_DECREF(seq);
3302 it->it_seq = NULL;
3303 return NULL;
3304}
3305
3306static PyObject *
3307bytesiter_length_hint(bytesiterobject *it)
3308{
3309 Py_ssize_t len = 0;
3310 if (it->it_seq)
3311 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3312 return PyInt_FromSsize_t(len);
3313}
3314
3315PyDoc_STRVAR(length_hint_doc,
3316 "Private method returning an estimate of len(list(it)).");
3317
3318static PyMethodDef bytesiter_methods[] = {
3319 {"__length_hint__", (PyCFunction)bytesiter_length_hint, METH_NOARGS,
3320 length_hint_doc},
3321 {NULL, NULL} /* sentinel */
3322};
3323
3324PyTypeObject PyBytesIter_Type = {
3325 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3326 "bytearray_iterator", /* tp_name */
3327 sizeof(bytesiterobject), /* tp_basicsize */
3328 0, /* tp_itemsize */
3329 /* methods */
3330 (destructor)bytesiter_dealloc, /* tp_dealloc */
3331 0, /* tp_print */
3332 0, /* tp_getattr */
3333 0, /* tp_setattr */
3334 0, /* tp_compare */
3335 0, /* tp_repr */
3336 0, /* tp_as_number */
3337 0, /* tp_as_sequence */
3338 0, /* tp_as_mapping */
3339 0, /* tp_hash */
3340 0, /* tp_call */
3341 0, /* tp_str */
3342 PyObject_GenericGetAttr, /* tp_getattro */
3343 0, /* tp_setattro */
3344 0, /* tp_as_buffer */
3345 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3346 0, /* tp_doc */
3347 (traverseproc)bytesiter_traverse, /* tp_traverse */
3348 0, /* tp_clear */
3349 0, /* tp_richcompare */
3350 0, /* tp_weaklistoffset */
3351 PyObject_SelfIter, /* tp_iter */
3352 (iternextfunc)bytesiter_next, /* tp_iternext */
3353 bytesiter_methods, /* tp_methods */
3354 0,
3355};
3356
3357static PyObject *
3358bytes_iter(PyObject *seq)
3359{
3360 bytesiterobject *it;
3361
3362 if (!PyBytes_Check(seq)) {
3363 PyErr_BadInternalCall();
3364 return NULL;
3365 }
3366 it = PyObject_GC_New(bytesiterobject, &PyBytesIter_Type);
3367 if (it == NULL)
3368 return NULL;
3369 it->it_index = 0;
3370 Py_INCREF(seq);
3371 it->it_seq = (PyBytesObject *)seq;
3372 _PyObject_GC_TRACK(it);
3373 return (PyObject *)it;
3374}