blob: 86d58e13a359d934fcee668caac20bcae70c9792 [file] [log] [blame]
Christian Heimes1a6387e2008-03-26 12:49:49 +00001/* PyBytes (bytearray) implementation */
2
3#define PY_SSIZE_T_CLEAN
4#include "Python.h"
5#include "structmember.h"
6#include "bytes_methods.h"
7
8static PyBytesObject *nullbytes = NULL;
9
10void
11PyBytes_Fini(void)
12{
13 Py_CLEAR(nullbytes);
14}
15
16int
17PyBytes_Init(void)
18{
19 nullbytes = PyObject_New(PyBytesObject, &PyBytes_Type);
20 if (nullbytes == NULL)
21 return 0;
22 nullbytes->ob_bytes = NULL;
23 Py_SIZE(nullbytes) = nullbytes->ob_alloc = 0;
24 nullbytes->ob_exports = 0;
25 return 1;
26}
27
28/* end nullbytes support */
29
30/* Helpers */
31
32static int
33_getbytevalue(PyObject* arg, int *value)
34{
35 long face_value;
36
37 if (PyInt_Check(arg)) {
38 face_value = PyInt_AsLong(arg);
39 if (face_value < 0 || face_value >= 256) {
40 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
41 return 0;
42 }
43 }
44 else if (PyString_CheckExact(arg)) {
45 if (Py_SIZE(arg) != 1) {
46 PyErr_SetString(PyExc_ValueError, "string must be of size 1");
47 return 0;
48 }
49 face_value = Py_CHARMASK(((PyStringObject*)arg)->ob_sval[0]);
50 }
51 else {
52 PyErr_Format(PyExc_TypeError, "an integer or string of size 1 is required");
53 return 0;
54 }
55
56 *value = face_value;
57 return 1;
58}
59
60static Py_ssize_t
61bytes_buffer_getreadbuf(PyBytesObject *self, Py_ssize_t index, const void **ptr)
62{
63 if ( index != 0 ) {
64 PyErr_SetString(PyExc_SystemError,
65 "accessing non-existent bytes segment");
66 return -1;
67 }
68 *ptr = (void *)self->ob_bytes;
69 return Py_SIZE(self);
70}
71
72static Py_ssize_t
73bytes_buffer_getwritebuf(PyBytesObject *self, Py_ssize_t index, const void **ptr)
74{
75 if ( index != 0 ) {
76 PyErr_SetString(PyExc_SystemError,
77 "accessing non-existent bytes segment");
78 return -1;
79 }
80 *ptr = (void *)self->ob_bytes;
81 return Py_SIZE(self);
82}
83
84static Py_ssize_t
85bytes_buffer_getsegcount(PyBytesObject *self, Py_ssize_t *lenp)
86{
87 if ( lenp )
88 *lenp = Py_SIZE(self);
89 return 1;
90}
91
92static Py_ssize_t
93bytes_buffer_getcharbuf(PyBytesObject *self, Py_ssize_t index, const char **ptr)
94{
95 if ( index != 0 ) {
96 PyErr_SetString(PyExc_SystemError,
97 "accessing non-existent bytes segment");
98 return -1;
99 }
100 *ptr = self->ob_bytes;
101 return Py_SIZE(self);
102}
103
104static int
105bytes_getbuffer(PyBytesObject *obj, Py_buffer *view, int flags)
106{
107 int ret;
108 void *ptr;
109 if (view == NULL) {
110 obj->ob_exports++;
111 return 0;
112 }
113 if (obj->ob_bytes == NULL)
114 ptr = "";
115 else
116 ptr = obj->ob_bytes;
117 ret = PyBuffer_FillInfo(view, ptr, Py_SIZE(obj), 0, flags);
118 if (ret >= 0) {
119 obj->ob_exports++;
120 }
121 return ret;
122}
123
124static void
125bytes_releasebuffer(PyBytesObject *obj, Py_buffer *view)
126{
127 obj->ob_exports--;
128}
129
130static Py_ssize_t
131_getbuffer(PyObject *obj, Py_buffer *view)
132{
133 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
134
135 if (buffer == NULL || buffer->bf_getbuffer == NULL)
136 {
137 PyErr_Format(PyExc_TypeError,
138 "Type %.100s doesn't support the buffer API",
139 Py_TYPE(obj)->tp_name);
140 return -1;
141 }
142
143 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
144 return -1;
145 return view->len;
146}
147
148/* Direct API functions */
149
150PyObject *
151PyBytes_FromObject(PyObject *input)
152{
153 return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
154 input, NULL);
155}
156
157PyObject *
158PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
159{
160 PyBytesObject *new;
161 Py_ssize_t alloc;
162
163 assert(size >= 0);
164
165 new = PyObject_New(PyBytesObject, &PyBytes_Type);
166 if (new == NULL)
167 return NULL;
168
169 if (size == 0) {
170 new->ob_bytes = NULL;
171 alloc = 0;
172 }
173 else {
174 alloc = size + 1;
175 new->ob_bytes = PyMem_Malloc(alloc);
176 if (new->ob_bytes == NULL) {
177 Py_DECREF(new);
178 return PyErr_NoMemory();
179 }
180 if (bytes != NULL)
181 memcpy(new->ob_bytes, bytes, size);
182 new->ob_bytes[size] = '\0'; /* Trailing null byte */
183 }
184 Py_SIZE(new) = size;
185 new->ob_alloc = alloc;
186 new->ob_exports = 0;
187
188 return (PyObject *)new;
189}
190
191Py_ssize_t
192PyBytes_Size(PyObject *self)
193{
194 assert(self != NULL);
195 assert(PyBytes_Check(self));
196
197 return PyBytes_GET_SIZE(self);
198}
199
200char *
201PyBytes_AsString(PyObject *self)
202{
203 assert(self != NULL);
204 assert(PyBytes_Check(self));
205
206 return PyBytes_AS_STRING(self);
207}
208
209int
210PyBytes_Resize(PyObject *self, Py_ssize_t size)
211{
212 void *sval;
213 Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
214
215 assert(self != NULL);
216 assert(PyBytes_Check(self));
217 assert(size >= 0);
218
219 if (size < alloc / 2) {
220 /* Major downsize; resize down to exact size */
221 alloc = size + 1;
222 }
223 else if (size < alloc) {
224 /* Within allocated size; quick exit */
225 Py_SIZE(self) = size;
226 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
227 return 0;
228 }
229 else if (size <= alloc * 1.125) {
230 /* Moderate upsize; overallocate similar to list_resize() */
231 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
232 }
233 else {
234 /* Major upsize; resize up to exact size */
235 alloc = size + 1;
236 }
237
238 if (((PyBytesObject *)self)->ob_exports > 0) {
239 /*
240 fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports,
241 ((PyBytesObject *)self)->ob_bytes);
242 */
243 PyErr_SetString(PyExc_BufferError,
244 "Existing exports of data: object cannot be re-sized");
245 return -1;
246 }
247
248 sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
249 if (sval == NULL) {
250 PyErr_NoMemory();
251 return -1;
252 }
253
254 ((PyBytesObject *)self)->ob_bytes = sval;
255 Py_SIZE(self) = size;
256 ((PyBytesObject *)self)->ob_alloc = alloc;
257 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
258
259 return 0;
260}
261
262PyObject *
263PyBytes_Concat(PyObject *a, PyObject *b)
264{
265 Py_ssize_t size;
266 Py_buffer va, vb;
267 PyBytesObject *result = NULL;
268
269 va.len = -1;
270 vb.len = -1;
271 if (_getbuffer(a, &va) < 0 ||
272 _getbuffer(b, &vb) < 0) {
273 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
274 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
275 goto done;
276 }
277
278 size = va.len + vb.len;
279 if (size < 0) {
280 return PyErr_NoMemory();
281 goto done;
282 }
283
284 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
285 if (result != NULL) {
286 memcpy(result->ob_bytes, va.buf, va.len);
287 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
288 }
289
290 done:
291 if (va.len != -1)
292 PyObject_ReleaseBuffer(a, &va);
293 if (vb.len != -1)
294 PyObject_ReleaseBuffer(b, &vb);
295 return (PyObject *)result;
296}
297
298/* Functions stuffed into the type object */
299
300static Py_ssize_t
301bytes_length(PyBytesObject *self)
302{
303 return Py_SIZE(self);
304}
305
306static PyObject *
307bytes_iconcat(PyBytesObject *self, PyObject *other)
308{
309 Py_ssize_t mysize;
310 Py_ssize_t size;
311 Py_buffer vo;
312
313 if (_getbuffer(other, &vo) < 0) {
314 PyErr_Format(PyExc_TypeError, "can't concat bytes to %.100s",
315 Py_TYPE(self)->tp_name);
316 return NULL;
317 }
318
319 mysize = Py_SIZE(self);
320 size = mysize + vo.len;
321 if (size < 0) {
322 PyObject_ReleaseBuffer(other, &vo);
323 return PyErr_NoMemory();
324 }
325 if (size < self->ob_alloc) {
326 Py_SIZE(self) = size;
327 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
328 }
329 else if (PyBytes_Resize((PyObject *)self, size) < 0) {
330 PyObject_ReleaseBuffer(other, &vo);
331 return NULL;
332 }
333 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
334 PyObject_ReleaseBuffer(other, &vo);
335 Py_INCREF(self);
336 return (PyObject *)self;
337}
338
339static PyObject *
340bytes_repeat(PyBytesObject *self, Py_ssize_t count)
341{
342 PyBytesObject *result;
343 Py_ssize_t mysize;
344 Py_ssize_t size;
345
346 if (count < 0)
347 count = 0;
348 mysize = Py_SIZE(self);
349 size = mysize * count;
350 if (count != 0 && size / count != mysize)
351 return PyErr_NoMemory();
352 result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
353 if (result != NULL && size != 0) {
354 if (mysize == 1)
355 memset(result->ob_bytes, self->ob_bytes[0], size);
356 else {
357 Py_ssize_t i;
358 for (i = 0; i < count; i++)
359 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
360 }
361 }
362 return (PyObject *)result;
363}
364
365static PyObject *
366bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
367{
368 Py_ssize_t mysize;
369 Py_ssize_t size;
370
371 if (count < 0)
372 count = 0;
373 mysize = Py_SIZE(self);
374 size = mysize * count;
375 if (count != 0 && size / count != mysize)
376 return PyErr_NoMemory();
377 if (size < self->ob_alloc) {
378 Py_SIZE(self) = size;
379 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
380 }
381 else if (PyBytes_Resize((PyObject *)self, size) < 0)
382 return NULL;
383
384 if (mysize == 1)
385 memset(self->ob_bytes, self->ob_bytes[0], size);
386 else {
387 Py_ssize_t i;
388 for (i = 1; i < count; i++)
389 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
390 }
391
392 Py_INCREF(self);
393 return (PyObject *)self;
394}
395
396static PyObject *
397bytes_getitem(PyBytesObject *self, Py_ssize_t i)
398{
399 if (i < 0)
400 i += Py_SIZE(self);
401 if (i < 0 || i >= Py_SIZE(self)) {
402 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
403 return NULL;
404 }
405 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
406}
407
408static PyObject *
409bytes_subscript(PyBytesObject *self, PyObject *item)
410{
411 if (PyIndex_Check(item)) {
412 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
413
414 if (i == -1 && PyErr_Occurred())
415 return NULL;
416
417 if (i < 0)
418 i += PyBytes_GET_SIZE(self);
419
420 if (i < 0 || i >= Py_SIZE(self)) {
421 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
422 return NULL;
423 }
424 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
425 }
426 else if (PySlice_Check(item)) {
427 Py_ssize_t start, stop, step, slicelength, cur, i;
428 if (PySlice_GetIndicesEx((PySliceObject *)item,
429 PyBytes_GET_SIZE(self),
430 &start, &stop, &step, &slicelength) < 0) {
431 return NULL;
432 }
433
434 if (slicelength <= 0)
435 return PyBytes_FromStringAndSize("", 0);
436 else if (step == 1) {
437 return PyBytes_FromStringAndSize(self->ob_bytes + start,
438 slicelength);
439 }
440 else {
441 char *source_buf = PyBytes_AS_STRING(self);
442 char *result_buf = (char *)PyMem_Malloc(slicelength);
443 PyObject *result;
444
445 if (result_buf == NULL)
446 return PyErr_NoMemory();
447
448 for (cur = start, i = 0; i < slicelength;
449 cur += step, i++) {
450 result_buf[i] = source_buf[cur];
451 }
452 result = PyBytes_FromStringAndSize(result_buf, slicelength);
453 PyMem_Free(result_buf);
454 return result;
455 }
456 }
457 else {
458 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integers");
459 return NULL;
460 }
461}
462
463static int
464bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
465 PyObject *values)
466{
467 Py_ssize_t avail, needed;
468 void *bytes;
469 Py_buffer vbytes;
470 int res = 0;
471
472 vbytes.len = -1;
473 if (values == (PyObject *)self) {
474 /* Make a copy and call this function recursively */
475 int err;
476 values = PyBytes_FromObject(values);
477 if (values == NULL)
478 return -1;
479 err = bytes_setslice(self, lo, hi, values);
480 Py_DECREF(values);
481 return err;
482 }
483 if (values == NULL) {
484 /* del b[lo:hi] */
485 bytes = NULL;
486 needed = 0;
487 }
488 else {
489 if (_getbuffer(values, &vbytes) < 0) {
490 PyErr_Format(PyExc_TypeError,
491 "can't set bytes slice from %.100s",
492 Py_TYPE(values)->tp_name);
493 return -1;
494 }
495 needed = vbytes.len;
496 bytes = vbytes.buf;
497 }
498
499 if (lo < 0)
500 lo = 0;
501 if (hi < lo)
502 hi = lo;
503 if (hi > Py_SIZE(self))
504 hi = Py_SIZE(self);
505
506 avail = hi - lo;
507 if (avail < 0)
508 lo = hi = avail = 0;
509
510 if (avail != needed) {
511 if (avail > needed) {
512 /*
513 0 lo hi old_size
514 | |<----avail----->|<-----tomove------>|
515 | |<-needed->|<-----tomove------>|
516 0 lo new_hi new_size
517 */
518 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
519 Py_SIZE(self) - hi);
520 }
521 /* XXX(nnorwitz): need to verify this can't overflow! */
522 if (PyBytes_Resize((PyObject *)self,
523 Py_SIZE(self) + needed - avail) < 0) {
524 res = -1;
525 goto finish;
526 }
527 if (avail < needed) {
528 /*
529 0 lo hi old_size
530 | |<-avail->|<-----tomove------>|
531 | |<----needed---->|<-----tomove------>|
532 0 lo new_hi new_size
533 */
534 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
535 Py_SIZE(self) - lo - needed);
536 }
537 }
538
539 if (needed > 0)
540 memcpy(self->ob_bytes + lo, bytes, needed);
541
542
543 finish:
544 if (vbytes.len != -1)
545 PyObject_ReleaseBuffer(values, &vbytes);
546 return res;
547}
548
549static int
550bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
551{
552 Py_ssize_t ival;
553
554 if (i < 0)
555 i += Py_SIZE(self);
556
557 if (i < 0 || i >= Py_SIZE(self)) {
558 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
559 return -1;
560 }
561
562 if (value == NULL)
563 return bytes_setslice(self, i, i+1, NULL);
564
565 if (!_getbytevalue(value, &ival))
566 return -1;
567#if 0
568 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
569 if (ival == -1 && PyErr_Occurred())
570 return -1;
571
572 if (ival < 0 || ival >= 256) {
573 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
574 return -1;
575 }
576#endif
577
578 self->ob_bytes[i] = ival;
579 return 0;
580}
581
582static int
583bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
584{
585 Py_ssize_t start, stop, step, slicelen, needed;
586 char *bytes;
587
588 if (PyIndex_Check(item)) {
589 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
590
591 if (i == -1 && PyErr_Occurred())
592 return -1;
593
594 if (i < 0)
595 i += PyBytes_GET_SIZE(self);
596
597 if (i < 0 || i >= Py_SIZE(self)) {
598 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
599 return -1;
600 }
601
602 if (values == NULL) {
603 /* Fall through to slice assignment */
604 start = i;
605 stop = i + 1;
606 step = 1;
607 slicelen = 1;
608 }
609 else {
610 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
611 if (ival == -1 && PyErr_Occurred()) {
612 /* Also accept str of size 1 in 2.x */
613 PyErr_Clear();
614 if (!_getbytevalue(values, &ival))
615 return -1;
616 }
617 if (ival < 0 || ival >= 256) {
618 PyErr_SetString(PyExc_ValueError,
619 "byte must be in range(0, 256)");
620 return -1;
621 }
622 self->ob_bytes[i] = (char)ival;
623 return 0;
624 }
625 }
626 else if (PySlice_Check(item)) {
627 if (PySlice_GetIndicesEx((PySliceObject *)item,
628 PyBytes_GET_SIZE(self),
629 &start, &stop, &step, &slicelen) < 0) {
630 return -1;
631 }
632 }
633 else {
634 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integer");
635 return -1;
636 }
637
638 if (values == NULL) {
639 bytes = NULL;
640 needed = 0;
641 }
642 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
643 /* Make a copy an call this function recursively */
644 int err;
645 values = PyBytes_FromObject(values);
646 if (values == NULL)
647 return -1;
648 err = bytes_ass_subscript(self, item, values);
649 Py_DECREF(values);
650 return err;
651 }
652 else {
653 assert(PyBytes_Check(values));
654 bytes = ((PyBytesObject *)values)->ob_bytes;
655 needed = Py_SIZE(values);
656 }
657 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
658 if ((step < 0 && start < stop) ||
659 (step > 0 && start > stop))
660 stop = start;
661 if (step == 1) {
662 if (slicelen != needed) {
663 if (slicelen > needed) {
664 /*
665 0 start stop old_size
666 | |<---slicelen--->|<-----tomove------>|
667 | |<-needed->|<-----tomove------>|
668 0 lo new_hi new_size
669 */
670 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
671 Py_SIZE(self) - stop);
672 }
673 if (PyBytes_Resize((PyObject *)self,
674 Py_SIZE(self) + needed - slicelen) < 0)
675 return -1;
676 if (slicelen < needed) {
677 /*
678 0 lo hi old_size
679 | |<-avail->|<-----tomove------>|
680 | |<----needed---->|<-----tomove------>|
681 0 lo new_hi new_size
682 */
683 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
684 Py_SIZE(self) - start - needed);
685 }
686 }
687
688 if (needed > 0)
689 memcpy(self->ob_bytes + start, bytes, needed);
690
691 return 0;
692 }
693 else {
694 if (needed == 0) {
695 /* Delete slice */
696 Py_ssize_t cur, i;
697
698 if (step < 0) {
699 stop = start + 1;
700 start = stop + step * (slicelen - 1) - 1;
701 step = -step;
702 }
703 for (cur = start, i = 0;
704 i < slicelen; cur += step, i++) {
705 Py_ssize_t lim = step - 1;
706
707 if (cur + step >= PyBytes_GET_SIZE(self))
708 lim = PyBytes_GET_SIZE(self) - cur - 1;
709
710 memmove(self->ob_bytes + cur - i,
711 self->ob_bytes + cur + 1, lim);
712 }
713 /* Move the tail of the bytes, in one chunk */
714 cur = start + slicelen*step;
715 if (cur < PyBytes_GET_SIZE(self)) {
716 memmove(self->ob_bytes + cur - slicelen,
717 self->ob_bytes + cur,
718 PyBytes_GET_SIZE(self) - cur);
719 }
720 if (PyBytes_Resize((PyObject *)self,
721 PyBytes_GET_SIZE(self) - slicelen) < 0)
722 return -1;
723
724 return 0;
725 }
726 else {
727 /* Assign slice */
728 Py_ssize_t cur, i;
729
730 if (needed != slicelen) {
731 PyErr_Format(PyExc_ValueError,
732 "attempt to assign bytes of size %zd "
733 "to extended slice of size %zd",
734 needed, slicelen);
735 return -1;
736 }
737 for (cur = start, i = 0; i < slicelen; cur += step, i++)
738 self->ob_bytes[cur] = bytes[i];
739 return 0;
740 }
741 }
742}
743
744static int
745bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
746{
747 static char *kwlist[] = {"source", "encoding", "errors", 0};
748 PyObject *arg = NULL;
749 const char *encoding = NULL;
750 const char *errors = NULL;
751 Py_ssize_t count;
752 PyObject *it;
753 PyObject *(*iternext)(PyObject *);
754
755 if (Py_SIZE(self) != 0) {
756 /* Empty previous contents (yes, do this first of all!) */
757 if (PyBytes_Resize((PyObject *)self, 0) < 0)
758 return -1;
759 }
760
761 /* Parse arguments */
762 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
763 &arg, &encoding, &errors))
764 return -1;
765
766 /* Make a quick exit if no first argument */
767 if (arg == NULL) {
768 if (encoding != NULL || errors != NULL) {
769 PyErr_SetString(PyExc_TypeError,
770 "encoding or errors without sequence argument");
771 return -1;
772 }
773 return 0;
774 }
775
776 if (PyString_Check(arg)) {
777 PyObject *new, *encoded;
778 if (encoding != NULL) {
779 encoded = PyCodec_Encode(arg, encoding, errors);
780 if (encoded == NULL)
781 return -1;
782 assert(PyString_Check(encoded));
783 }
784 else {
785 encoded = arg;
786 Py_INCREF(arg);
787 }
788 new = bytes_iconcat(self, arg);
789 Py_DECREF(encoded);
790 if (new == NULL)
791 return -1;
792 Py_DECREF(new);
793 return 0;
794 }
795
796 if (PyUnicode_Check(arg)) {
797 /* Encode via the codec registry */
798 PyObject *encoded, *new;
799 if (encoding == NULL) {
800 PyErr_SetString(PyExc_TypeError,
801 "unicode argument without an encoding");
802 return -1;
803 }
804 encoded = PyCodec_Encode(arg, encoding, errors);
805 if (encoded == NULL)
806 return -1;
807 assert(PyString_Check(encoded));
808 new = bytes_iconcat(self, encoded);
809 Py_DECREF(encoded);
810 if (new == NULL)
811 return -1;
812 Py_DECREF(new);
813 return 0;
814 }
815
816 /* If it's not unicode, there can't be encoding or errors */
817 if (encoding != NULL || errors != NULL) {
818 PyErr_SetString(PyExc_TypeError,
819 "encoding or errors without a string argument");
820 return -1;
821 }
822
823 /* Is it an int? */
824 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
825 if (count == -1 && PyErr_Occurred())
826 PyErr_Clear();
827 else {
828 if (count < 0) {
829 PyErr_SetString(PyExc_ValueError, "negative count");
830 return -1;
831 }
832 if (count > 0) {
833 if (PyBytes_Resize((PyObject *)self, count))
834 return -1;
835 memset(self->ob_bytes, 0, count);
836 }
837 return 0;
838 }
839
840 /* Use the buffer API */
841 if (PyObject_CheckBuffer(arg)) {
842 Py_ssize_t size;
843 Py_buffer view;
844 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
845 return -1;
846 size = view.len;
847 if (PyBytes_Resize((PyObject *)self, size) < 0) goto fail;
848 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
849 goto fail;
850 PyObject_ReleaseBuffer(arg, &view);
851 return 0;
852 fail:
853 PyObject_ReleaseBuffer(arg, &view);
854 return -1;
855 }
856
857 /* XXX Optimize this if the arguments is a list, tuple */
858
859 /* Get the iterator */
860 it = PyObject_GetIter(arg);
861 if (it == NULL)
862 return -1;
863 iternext = *Py_TYPE(it)->tp_iternext;
864
865 /* Run the iterator to exhaustion */
866 for (;;) {
867 PyObject *item;
868 Py_ssize_t value;
869
870 /* Get the next item */
871 item = iternext(it);
872 if (item == NULL) {
873 if (PyErr_Occurred()) {
874 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
875 goto error;
876 PyErr_Clear();
877 }
878 break;
879 }
880
881 /* Interpret it as an int (__index__) */
882 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
883 Py_DECREF(item);
884 if (value == -1 && PyErr_Occurred())
885 goto error;
886
887 /* Range check */
888 if (value < 0 || value >= 256) {
889 PyErr_SetString(PyExc_ValueError,
890 "bytes must be in range(0, 256)");
891 goto error;
892 }
893
894 /* Append the byte */
895 if (Py_SIZE(self) < self->ob_alloc)
896 Py_SIZE(self)++;
897 else if (PyBytes_Resize((PyObject *)self, Py_SIZE(self)+1) < 0)
898 goto error;
899 self->ob_bytes[Py_SIZE(self)-1] = value;
900 }
901
902 /* Clean up and return success */
903 Py_DECREF(it);
904 return 0;
905
906 error:
907 /* Error handling when it != NULL */
908 Py_DECREF(it);
909 return -1;
910}
911
912/* Mostly copied from string_repr, but without the
913 "smart quote" functionality. */
914static PyObject *
915bytes_repr(PyBytesObject *self)
916{
917 static const char *hexdigits = "0123456789abcdef";
918 const char *quote_prefix = "bytearray(b";
919 const char *quote_postfix = ")";
920 Py_ssize_t length = Py_SIZE(self);
921 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
922 size_t newsize = 14 + 4 * length;
923 PyObject *v;
924 if (newsize > PY_SSIZE_T_MAX || newsize / 4 - 3 != length) {
925 PyErr_SetString(PyExc_OverflowError,
926 "bytearray object is too large to make repr");
927 return NULL;
928 }
929 v = PyUnicode_FromUnicode(NULL, newsize);
930 if (v == NULL) {
931 return NULL;
932 }
933 else {
934 register Py_ssize_t i;
935 register Py_UNICODE c;
936 register Py_UNICODE *p;
937 int quote;
938
939 /* Figure out which quote to use; single is preferred */
940 quote = '\'';
941 {
942 char *test, *start;
943 start = PyBytes_AS_STRING(self);
944 for (test = start; test < start+length; ++test) {
945 if (*test == '"') {
946 quote = '\''; /* back to single */
947 goto decided;
948 }
949 else if (*test == '\'')
950 quote = '"';
951 }
952 decided:
953 ;
954 }
955
956 p = PyUnicode_AS_UNICODE(v);
957 while (*quote_prefix)
958 *p++ = *quote_prefix++;
959 *p++ = quote;
960
961 for (i = 0; i < length; i++) {
962 /* There's at least enough room for a hex escape
963 and a closing quote. */
964 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
965 c = self->ob_bytes[i];
966 if (c == '\'' || c == '\\')
967 *p++ = '\\', *p++ = c;
968 else if (c == '\t')
969 *p++ = '\\', *p++ = 't';
970 else if (c == '\n')
971 *p++ = '\\', *p++ = 'n';
972 else if (c == '\r')
973 *p++ = '\\', *p++ = 'r';
974 else if (c == 0)
975 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
976 else if (c < ' ' || c >= 0x7f) {
977 *p++ = '\\';
978 *p++ = 'x';
979 *p++ = hexdigits[(c & 0xf0) >> 4];
980 *p++ = hexdigits[c & 0xf];
981 }
982 else
983 *p++ = c;
984 }
985 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
986 *p++ = quote;
987 while (*quote_postfix) {
988 *p++ = *quote_postfix++;
989 }
990 *p = '\0';
991 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
992 Py_DECREF(v);
993 return NULL;
994 }
995 return v;
996 }
997}
998
999static PyObject *
1000bytes_str(PyObject *op)
1001{
1002#if 0
1003 if (Py_BytesWarningFlag) {
1004 if (PyErr_WarnEx(PyExc_BytesWarning,
1005 "str() on a bytearray instance", 1))
1006 return NULL;
1007 }
1008 return bytes_repr((PyBytesObject*)op);
1009#endif
1010 return PyString_FromStringAndSize(((PyBytesObject*)op)->ob_bytes, Py_SIZE(op));
1011}
1012
1013static PyObject *
1014bytes_richcompare(PyObject *self, PyObject *other, int op)
1015{
1016 Py_ssize_t self_size, other_size;
1017 Py_buffer self_bytes, other_bytes;
1018 PyObject *res;
1019 Py_ssize_t minsize;
1020 int cmp;
1021
1022 /* Bytes can be compared to anything that supports the (binary)
1023 buffer API. Except that a comparison with Unicode is always an
1024 error, even if the comparison is for equality. */
1025 if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
1026 PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
1027 if (Py_BytesWarningFlag && op == Py_EQ) {
1028 if (PyErr_WarnEx(PyExc_BytesWarning,
1029 "Comparsion between bytearray and string", 1))
1030 return NULL;
1031 }
1032
1033 Py_INCREF(Py_NotImplemented);
1034 return Py_NotImplemented;
1035 }
1036
1037 self_size = _getbuffer(self, &self_bytes);
1038 if (self_size < 0) {
1039 PyErr_Clear();
1040 Py_INCREF(Py_NotImplemented);
1041 return Py_NotImplemented;
1042 }
1043
1044 other_size = _getbuffer(other, &other_bytes);
1045 if (other_size < 0) {
1046 PyErr_Clear();
1047 PyObject_ReleaseBuffer(self, &self_bytes);
1048 Py_INCREF(Py_NotImplemented);
1049 return Py_NotImplemented;
1050 }
1051
1052 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
1053 /* Shortcut: if the lengths differ, the objects differ */
1054 cmp = (op == Py_NE);
1055 }
1056 else {
1057 minsize = self_size;
1058 if (other_size < minsize)
1059 minsize = other_size;
1060
1061 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
1062 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1063
1064 if (cmp == 0) {
1065 if (self_size < other_size)
1066 cmp = -1;
1067 else if (self_size > other_size)
1068 cmp = 1;
1069 }
1070
1071 switch (op) {
1072 case Py_LT: cmp = cmp < 0; break;
1073 case Py_LE: cmp = cmp <= 0; break;
1074 case Py_EQ: cmp = cmp == 0; break;
1075 case Py_NE: cmp = cmp != 0; break;
1076 case Py_GT: cmp = cmp > 0; break;
1077 case Py_GE: cmp = cmp >= 0; break;
1078 }
1079 }
1080
1081 res = cmp ? Py_True : Py_False;
1082 PyObject_ReleaseBuffer(self, &self_bytes);
1083 PyObject_ReleaseBuffer(other, &other_bytes);
1084 Py_INCREF(res);
1085 return res;
1086}
1087
1088static void
1089bytes_dealloc(PyBytesObject *self)
1090{
1091 if (self->ob_bytes != 0) {
1092 PyMem_Free(self->ob_bytes);
1093 }
1094 Py_TYPE(self)->tp_free((PyObject *)self);
1095}
1096
1097
1098/* -------------------------------------------------------------------- */
1099/* Methods */
1100
1101#define STRINGLIB_CHAR char
1102#define STRINGLIB_CMP memcmp
1103#define STRINGLIB_LEN PyBytes_GET_SIZE
1104#define STRINGLIB_STR PyBytes_AS_STRING
1105#define STRINGLIB_NEW PyBytes_FromStringAndSize
1106#define STRINGLIB_EMPTY nullbytes
1107#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
1108#define STRINGLIB_MUTABLE 1
1109
1110#include "stringlib/fastsearch.h"
1111#include "stringlib/count.h"
1112#include "stringlib/find.h"
1113#include "stringlib/partition.h"
1114#include "stringlib/ctype.h"
1115#include "stringlib/transmogrify.h"
1116
1117
1118/* The following Py_LOCAL_INLINE and Py_LOCAL functions
1119were copied from the old char* style string object. */
1120
1121Py_LOCAL_INLINE(void)
1122_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1123{
1124 if (*end > len)
1125 *end = len;
1126 else if (*end < 0)
1127 *end += len;
1128 if (*end < 0)
1129 *end = 0;
1130 if (*start < 0)
1131 *start += len;
1132 if (*start < 0)
1133 *start = 0;
1134}
1135
1136
1137Py_LOCAL_INLINE(Py_ssize_t)
1138bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
1139{
1140 PyObject *subobj;
1141 Py_buffer subbuf;
1142 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1143 Py_ssize_t res;
1144
1145 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1146 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1147 return -2;
1148 if (_getbuffer(subobj, &subbuf) < 0)
1149 return -2;
1150 if (dir > 0)
1151 res = stringlib_find_slice(
1152 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1153 subbuf.buf, subbuf.len, start, end);
1154 else
1155 res = stringlib_rfind_slice(
1156 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1157 subbuf.buf, subbuf.len, start, end);
1158 PyObject_ReleaseBuffer(subobj, &subbuf);
1159 return res;
1160}
1161
1162PyDoc_STRVAR(find__doc__,
1163"B.find(sub [,start [,end]]) -> int\n\
1164\n\
1165Return the lowest index in B where subsection sub is found,\n\
1166such that sub is contained within s[start,end]. Optional\n\
1167arguments start and end are interpreted as in slice notation.\n\
1168\n\
1169Return -1 on failure.");
1170
1171static PyObject *
1172bytes_find(PyBytesObject *self, PyObject *args)
1173{
1174 Py_ssize_t result = bytes_find_internal(self, args, +1);
1175 if (result == -2)
1176 return NULL;
1177 return PyInt_FromSsize_t(result);
1178}
1179
1180PyDoc_STRVAR(count__doc__,
1181"B.count(sub [,start [,end]]) -> int\n\
1182\n\
1183Return the number of non-overlapping occurrences of subsection sub in\n\
1184bytes B[start:end]. Optional arguments start and end are interpreted\n\
1185as in slice notation.");
1186
1187static PyObject *
1188bytes_count(PyBytesObject *self, PyObject *args)
1189{
1190 PyObject *sub_obj;
1191 const char *str = PyBytes_AS_STRING(self);
1192 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1193 Py_buffer vsub;
1194 PyObject *count_obj;
1195
1196 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1197 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1198 return NULL;
1199
1200 if (_getbuffer(sub_obj, &vsub) < 0)
1201 return NULL;
1202
1203 _adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
1204
1205 count_obj = PyInt_FromSsize_t(
1206 stringlib_count(str + start, end - start, vsub.buf, vsub.len)
1207 );
1208 PyObject_ReleaseBuffer(sub_obj, &vsub);
1209 return count_obj;
1210}
1211
1212
1213PyDoc_STRVAR(index__doc__,
1214"B.index(sub [,start [,end]]) -> int\n\
1215\n\
1216Like B.find() but raise ValueError when the subsection is not found.");
1217
1218static PyObject *
1219bytes_index(PyBytesObject *self, PyObject *args)
1220{
1221 Py_ssize_t result = bytes_find_internal(self, args, +1);
1222 if (result == -2)
1223 return NULL;
1224 if (result == -1) {
1225 PyErr_SetString(PyExc_ValueError,
1226 "subsection not found");
1227 return NULL;
1228 }
1229 return PyInt_FromSsize_t(result);
1230}
1231
1232
1233PyDoc_STRVAR(rfind__doc__,
1234"B.rfind(sub [,start [,end]]) -> int\n\
1235\n\
1236Return the highest index in B where subsection sub is found,\n\
1237such that sub is contained within s[start,end]. Optional\n\
1238arguments start and end are interpreted as in slice notation.\n\
1239\n\
1240Return -1 on failure.");
1241
1242static PyObject *
1243bytes_rfind(PyBytesObject *self, PyObject *args)
1244{
1245 Py_ssize_t result = bytes_find_internal(self, args, -1);
1246 if (result == -2)
1247 return NULL;
1248 return PyInt_FromSsize_t(result);
1249}
1250
1251
1252PyDoc_STRVAR(rindex__doc__,
1253"B.rindex(sub [,start [,end]]) -> int\n\
1254\n\
1255Like B.rfind() but raise ValueError when the subsection is not found.");
1256
1257static PyObject *
1258bytes_rindex(PyBytesObject *self, PyObject *args)
1259{
1260 Py_ssize_t result = bytes_find_internal(self, args, -1);
1261 if (result == -2)
1262 return NULL;
1263 if (result == -1) {
1264 PyErr_SetString(PyExc_ValueError,
1265 "subsection not found");
1266 return NULL;
1267 }
1268 return PyInt_FromSsize_t(result);
1269}
1270
1271
1272static int
1273bytes_contains(PyObject *self, PyObject *arg)
1274{
1275 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1276 if (ival == -1 && PyErr_Occurred()) {
1277 Py_buffer varg;
1278 int pos;
1279 PyErr_Clear();
1280 if (_getbuffer(arg, &varg) < 0)
1281 return -1;
1282 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
1283 varg.buf, varg.len, 0);
1284 PyObject_ReleaseBuffer(arg, &varg);
1285 return pos >= 0;
1286 }
1287 if (ival < 0 || ival >= 256) {
1288 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1289 return -1;
1290 }
1291
1292 return memchr(PyBytes_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
1293}
1294
1295
1296/* Matches the end (direction >= 0) or start (direction < 0) of self
1297 * against substr, using the start and end arguments. Returns
1298 * -1 on error, 0 if not found and 1 if found.
1299 */
1300Py_LOCAL(int)
1301_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
1302 Py_ssize_t end, int direction)
1303{
1304 Py_ssize_t len = PyBytes_GET_SIZE(self);
1305 const char* str;
1306 Py_buffer vsubstr;
1307 int rv = 0;
1308
1309 str = PyBytes_AS_STRING(self);
1310
1311 if (_getbuffer(substr, &vsubstr) < 0)
1312 return -1;
1313
1314 _adjust_indices(&start, &end, len);
1315
1316 if (direction < 0) {
1317 /* startswith */
1318 if (start+vsubstr.len > len) {
1319 goto done;
1320 }
1321 } else {
1322 /* endswith */
1323 if (end-start < vsubstr.len || start > len) {
1324 goto done;
1325 }
1326
1327 if (end-vsubstr.len > start)
1328 start = end - vsubstr.len;
1329 }
1330 if (end-start >= vsubstr.len)
1331 rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1332
1333done:
1334 PyObject_ReleaseBuffer(substr, &vsubstr);
1335 return rv;
1336}
1337
1338
1339PyDoc_STRVAR(startswith__doc__,
1340"B.startswith(prefix [,start [,end]]) -> bool\n\
1341\n\
1342Return True if B starts with the specified prefix, False otherwise.\n\
1343With optional start, test B beginning at that position.\n\
1344With optional end, stop comparing B at that position.\n\
1345prefix can also be a tuple of strings to try.");
1346
1347static PyObject *
1348bytes_startswith(PyBytesObject *self, PyObject *args)
1349{
1350 Py_ssize_t start = 0;
1351 Py_ssize_t end = PY_SSIZE_T_MAX;
1352 PyObject *subobj;
1353 int result;
1354
1355 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1356 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1357 return NULL;
1358 if (PyTuple_Check(subobj)) {
1359 Py_ssize_t i;
1360 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1361 result = _bytes_tailmatch(self,
1362 PyTuple_GET_ITEM(subobj, i),
1363 start, end, -1);
1364 if (result == -1)
1365 return NULL;
1366 else if (result) {
1367 Py_RETURN_TRUE;
1368 }
1369 }
1370 Py_RETURN_FALSE;
1371 }
1372 result = _bytes_tailmatch(self, subobj, start, end, -1);
1373 if (result == -1)
1374 return NULL;
1375 else
1376 return PyBool_FromLong(result);
1377}
1378
1379PyDoc_STRVAR(endswith__doc__,
1380"B.endswith(suffix [,start [,end]]) -> bool\n\
1381\n\
1382Return True if B ends with the specified suffix, False otherwise.\n\
1383With optional start, test B beginning at that position.\n\
1384With optional end, stop comparing B at that position.\n\
1385suffix can also be a tuple of strings to try.");
1386
1387static PyObject *
1388bytes_endswith(PyBytesObject *self, PyObject *args)
1389{
1390 Py_ssize_t start = 0;
1391 Py_ssize_t end = PY_SSIZE_T_MAX;
1392 PyObject *subobj;
1393 int result;
1394
1395 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1396 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1397 return NULL;
1398 if (PyTuple_Check(subobj)) {
1399 Py_ssize_t i;
1400 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1401 result = _bytes_tailmatch(self,
1402 PyTuple_GET_ITEM(subobj, i),
1403 start, end, +1);
1404 if (result == -1)
1405 return NULL;
1406 else if (result) {
1407 Py_RETURN_TRUE;
1408 }
1409 }
1410 Py_RETURN_FALSE;
1411 }
1412 result = _bytes_tailmatch(self, subobj, start, end, +1);
1413 if (result == -1)
1414 return NULL;
1415 else
1416 return PyBool_FromLong(result);
1417}
1418
1419
1420PyDoc_STRVAR(translate__doc__,
1421"B.translate(table[, deletechars]) -> bytearray\n\
1422\n\
1423Return a copy of B, where all characters occurring in the\n\
1424optional argument deletechars are removed, and the remaining\n\
1425characters have been mapped through the given translation\n\
1426table, which must be a bytes object of length 256.");
1427
1428static PyObject *
1429bytes_translate(PyBytesObject *self, PyObject *args)
1430{
1431 register char *input, *output;
1432 register const char *table;
1433 register Py_ssize_t i, c, changed = 0;
1434 PyObject *input_obj = (PyObject*)self;
1435 const char *output_start;
1436 Py_ssize_t inlen;
1437 PyObject *result;
1438 int trans_table[256];
1439 PyObject *tableobj, *delobj = NULL;
1440 Py_buffer vtable, vdel;
1441
1442 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1443 &tableobj, &delobj))
1444 return NULL;
1445
1446 if (_getbuffer(tableobj, &vtable) < 0)
1447 return NULL;
1448
1449 if (vtable.len != 256) {
1450 PyErr_SetString(PyExc_ValueError,
1451 "translation table must be 256 characters long");
1452 result = NULL;
1453 goto done;
1454 }
1455
1456 if (delobj != NULL) {
1457 if (_getbuffer(delobj, &vdel) < 0) {
1458 result = NULL;
1459 goto done;
1460 }
1461 }
1462 else {
1463 vdel.buf = NULL;
1464 vdel.len = 0;
1465 }
1466
1467 table = (const char *)vtable.buf;
1468 inlen = PyBytes_GET_SIZE(input_obj);
1469 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1470 if (result == NULL)
1471 goto done;
1472 output_start = output = PyBytes_AsString(result);
1473 input = PyBytes_AS_STRING(input_obj);
1474
1475 if (vdel.len == 0) {
1476 /* If no deletions are required, use faster code */
1477 for (i = inlen; --i >= 0; ) {
1478 c = Py_CHARMASK(*input++);
1479 if (Py_CHARMASK((*output++ = table[c])) != c)
1480 changed = 1;
1481 }
1482 if (changed || !PyBytes_CheckExact(input_obj))
1483 goto done;
1484 Py_DECREF(result);
1485 Py_INCREF(input_obj);
1486 result = input_obj;
1487 goto done;
1488 }
1489
1490 for (i = 0; i < 256; i++)
1491 trans_table[i] = Py_CHARMASK(table[i]);
1492
1493 for (i = 0; i < vdel.len; i++)
1494 trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
1495
1496 for (i = inlen; --i >= 0; ) {
1497 c = Py_CHARMASK(*input++);
1498 if (trans_table[c] != -1)
1499 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1500 continue;
1501 changed = 1;
1502 }
1503 if (!changed && PyBytes_CheckExact(input_obj)) {
1504 Py_DECREF(result);
1505 Py_INCREF(input_obj);
1506 result = input_obj;
1507 goto done;
1508 }
1509 /* Fix the size of the resulting string */
1510 if (inlen > 0)
1511 PyBytes_Resize(result, output - output_start);
1512
1513done:
1514 PyObject_ReleaseBuffer(tableobj, &vtable);
1515 if (delobj != NULL)
1516 PyObject_ReleaseBuffer(delobj, &vdel);
1517 return result;
1518}
1519
1520
1521#define FORWARD 1
1522#define REVERSE -1
1523
1524/* find and count characters and substrings */
1525
1526#define findchar(target, target_len, c) \
1527 ((char *)memchr((const void *)(target), c, target_len))
1528
1529/* Don't call if length < 2 */
1530#define Py_STRING_MATCH(target, offset, pattern, length) \
1531 (target[offset] == pattern[0] && \
1532 target[offset+length-1] == pattern[length-1] && \
1533 !memcmp(target+offset+1, pattern+1, length-2) )
1534
1535
1536/* Bytes ops must return a string. */
1537/* If the object is subclass of bytes, create a copy */
1538Py_LOCAL(PyBytesObject *)
1539return_self(PyBytesObject *self)
1540{
1541 if (PyBytes_CheckExact(self)) {
1542 Py_INCREF(self);
1543 return (PyBytesObject *)self;
1544 }
1545 return (PyBytesObject *)PyBytes_FromStringAndSize(
1546 PyBytes_AS_STRING(self),
1547 PyBytes_GET_SIZE(self));
1548}
1549
1550Py_LOCAL_INLINE(Py_ssize_t)
1551countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
1552{
1553 Py_ssize_t count=0;
1554 const char *start=target;
1555 const char *end=target+target_len;
1556
1557 while ( (start=findchar(start, end-start, c)) != NULL ) {
1558 count++;
1559 if (count >= maxcount)
1560 break;
1561 start += 1;
1562 }
1563 return count;
1564}
1565
1566Py_LOCAL(Py_ssize_t)
1567findstring(const char *target, Py_ssize_t target_len,
1568 const char *pattern, Py_ssize_t pattern_len,
1569 Py_ssize_t start,
1570 Py_ssize_t end,
1571 int direction)
1572{
1573 if (start < 0) {
1574 start += target_len;
1575 if (start < 0)
1576 start = 0;
1577 }
1578 if (end > target_len) {
1579 end = target_len;
1580 } else if (end < 0) {
1581 end += target_len;
1582 if (end < 0)
1583 end = 0;
1584 }
1585
1586 /* zero-length substrings always match at the first attempt */
1587 if (pattern_len == 0)
1588 return (direction > 0) ? start : end;
1589
1590 end -= pattern_len;
1591
1592 if (direction < 0) {
1593 for (; end >= start; end--)
1594 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1595 return end;
1596 } else {
1597 for (; start <= end; start++)
1598 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1599 return start;
1600 }
1601 return -1;
1602}
1603
1604Py_LOCAL_INLINE(Py_ssize_t)
1605countstring(const char *target, Py_ssize_t target_len,
1606 const char *pattern, Py_ssize_t pattern_len,
1607 Py_ssize_t start,
1608 Py_ssize_t end,
1609 int direction, Py_ssize_t maxcount)
1610{
1611 Py_ssize_t count=0;
1612
1613 if (start < 0) {
1614 start += target_len;
1615 if (start < 0)
1616 start = 0;
1617 }
1618 if (end > target_len) {
1619 end = target_len;
1620 } else if (end < 0) {
1621 end += target_len;
1622 if (end < 0)
1623 end = 0;
1624 }
1625
1626 /* zero-length substrings match everywhere */
1627 if (pattern_len == 0 || maxcount == 0) {
1628 if (target_len+1 < maxcount)
1629 return target_len+1;
1630 return maxcount;
1631 }
1632
1633 end -= pattern_len;
1634 if (direction < 0) {
1635 for (; (end >= start); end--)
1636 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1637 count++;
1638 if (--maxcount <= 0) break;
1639 end -= pattern_len-1;
1640 }
1641 } else {
1642 for (; (start <= end); start++)
1643 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1644 count++;
1645 if (--maxcount <= 0)
1646 break;
1647 start += pattern_len-1;
1648 }
1649 }
1650 return count;
1651}
1652
1653
1654/* Algorithms for different cases of string replacement */
1655
1656/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1657Py_LOCAL(PyBytesObject *)
1658replace_interleave(PyBytesObject *self,
1659 const char *to_s, Py_ssize_t to_len,
1660 Py_ssize_t maxcount)
1661{
1662 char *self_s, *result_s;
1663 Py_ssize_t self_len, result_len;
1664 Py_ssize_t count, i, product;
1665 PyBytesObject *result;
1666
1667 self_len = PyBytes_GET_SIZE(self);
1668
1669 /* 1 at the end plus 1 after every character */
1670 count = self_len+1;
1671 if (maxcount < count)
1672 count = maxcount;
1673
1674 /* Check for overflow */
1675 /* result_len = count * to_len + self_len; */
1676 product = count * to_len;
1677 if (product / to_len != count) {
1678 PyErr_SetString(PyExc_OverflowError,
1679 "replace string is too long");
1680 return NULL;
1681 }
1682 result_len = product + self_len;
1683 if (result_len < 0) {
1684 PyErr_SetString(PyExc_OverflowError,
1685 "replace string is too long");
1686 return NULL;
1687 }
1688
1689 if (! (result = (PyBytesObject *)
1690 PyBytes_FromStringAndSize(NULL, result_len)) )
1691 return NULL;
1692
1693 self_s = PyBytes_AS_STRING(self);
1694 result_s = PyBytes_AS_STRING(result);
1695
1696 /* TODO: special case single character, which doesn't need memcpy */
1697
1698 /* Lay the first one down (guaranteed this will occur) */
1699 Py_MEMCPY(result_s, to_s, to_len);
1700 result_s += to_len;
1701 count -= 1;
1702
1703 for (i=0; i<count; i++) {
1704 *result_s++ = *self_s++;
1705 Py_MEMCPY(result_s, to_s, to_len);
1706 result_s += to_len;
1707 }
1708
1709 /* Copy the rest of the original string */
1710 Py_MEMCPY(result_s, self_s, self_len-i);
1711
1712 return result;
1713}
1714
1715/* Special case for deleting a single character */
1716/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1717Py_LOCAL(PyBytesObject *)
1718replace_delete_single_character(PyBytesObject *self,
1719 char from_c, Py_ssize_t maxcount)
1720{
1721 char *self_s, *result_s;
1722 char *start, *next, *end;
1723 Py_ssize_t self_len, result_len;
1724 Py_ssize_t count;
1725 PyBytesObject *result;
1726
1727 self_len = PyBytes_GET_SIZE(self);
1728 self_s = PyBytes_AS_STRING(self);
1729
1730 count = countchar(self_s, self_len, from_c, maxcount);
1731 if (count == 0) {
1732 return return_self(self);
1733 }
1734
1735 result_len = self_len - count; /* from_len == 1 */
1736 assert(result_len>=0);
1737
1738 if ( (result = (PyBytesObject *)
1739 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1740 return NULL;
1741 result_s = PyBytes_AS_STRING(result);
1742
1743 start = self_s;
1744 end = self_s + self_len;
1745 while (count-- > 0) {
1746 next = findchar(start, end-start, from_c);
1747 if (next == NULL)
1748 break;
1749 Py_MEMCPY(result_s, start, next-start);
1750 result_s += (next-start);
1751 start = next+1;
1752 }
1753 Py_MEMCPY(result_s, start, end-start);
1754
1755 return result;
1756}
1757
1758/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1759
1760Py_LOCAL(PyBytesObject *)
1761replace_delete_substring(PyBytesObject *self,
1762 const char *from_s, Py_ssize_t from_len,
1763 Py_ssize_t maxcount)
1764{
1765 char *self_s, *result_s;
1766 char *start, *next, *end;
1767 Py_ssize_t self_len, result_len;
1768 Py_ssize_t count, offset;
1769 PyBytesObject *result;
1770
1771 self_len = PyBytes_GET_SIZE(self);
1772 self_s = PyBytes_AS_STRING(self);
1773
1774 count = countstring(self_s, self_len,
1775 from_s, from_len,
1776 0, self_len, 1,
1777 maxcount);
1778
1779 if (count == 0) {
1780 /* no matches */
1781 return return_self(self);
1782 }
1783
1784 result_len = self_len - (count * from_len);
1785 assert (result_len>=0);
1786
1787 if ( (result = (PyBytesObject *)
1788 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1789 return NULL;
1790
1791 result_s = PyBytes_AS_STRING(result);
1792
1793 start = self_s;
1794 end = self_s + self_len;
1795 while (count-- > 0) {
1796 offset = findstring(start, end-start,
1797 from_s, from_len,
1798 0, end-start, FORWARD);
1799 if (offset == -1)
1800 break;
1801 next = start + offset;
1802
1803 Py_MEMCPY(result_s, start, next-start);
1804
1805 result_s += (next-start);
1806 start = next+from_len;
1807 }
1808 Py_MEMCPY(result_s, start, end-start);
1809 return result;
1810}
1811
1812/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1813Py_LOCAL(PyBytesObject *)
1814replace_single_character_in_place(PyBytesObject *self,
1815 char from_c, char to_c,
1816 Py_ssize_t maxcount)
1817{
1818 char *self_s, *result_s, *start, *end, *next;
1819 Py_ssize_t self_len;
1820 PyBytesObject *result;
1821
1822 /* The result string will be the same size */
1823 self_s = PyBytes_AS_STRING(self);
1824 self_len = PyBytes_GET_SIZE(self);
1825
1826 next = findchar(self_s, self_len, from_c);
1827
1828 if (next == NULL) {
1829 /* No matches; return the original bytes */
1830 return return_self(self);
1831 }
1832
1833 /* Need to make a new bytes */
1834 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1835 if (result == NULL)
1836 return NULL;
1837 result_s = PyBytes_AS_STRING(result);
1838 Py_MEMCPY(result_s, self_s, self_len);
1839
1840 /* change everything in-place, starting with this one */
1841 start = result_s + (next-self_s);
1842 *start = to_c;
1843 start++;
1844 end = result_s + self_len;
1845
1846 while (--maxcount > 0) {
1847 next = findchar(start, end-start, from_c);
1848 if (next == NULL)
1849 break;
1850 *next = to_c;
1851 start = next+1;
1852 }
1853
1854 return result;
1855}
1856
1857/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1858Py_LOCAL(PyBytesObject *)
1859replace_substring_in_place(PyBytesObject *self,
1860 const char *from_s, Py_ssize_t from_len,
1861 const char *to_s, Py_ssize_t to_len,
1862 Py_ssize_t maxcount)
1863{
1864 char *result_s, *start, *end;
1865 char *self_s;
1866 Py_ssize_t self_len, offset;
1867 PyBytesObject *result;
1868
1869 /* The result bytes will be the same size */
1870
1871 self_s = PyBytes_AS_STRING(self);
1872 self_len = PyBytes_GET_SIZE(self);
1873
1874 offset = findstring(self_s, self_len,
1875 from_s, from_len,
1876 0, self_len, FORWARD);
1877 if (offset == -1) {
1878 /* No matches; return the original bytes */
1879 return return_self(self);
1880 }
1881
1882 /* Need to make a new bytes */
1883 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1884 if (result == NULL)
1885 return NULL;
1886 result_s = PyBytes_AS_STRING(result);
1887 Py_MEMCPY(result_s, self_s, self_len);
1888
1889 /* change everything in-place, starting with this one */
1890 start = result_s + offset;
1891 Py_MEMCPY(start, to_s, from_len);
1892 start += from_len;
1893 end = result_s + self_len;
1894
1895 while ( --maxcount > 0) {
1896 offset = findstring(start, end-start,
1897 from_s, from_len,
1898 0, end-start, FORWARD);
1899 if (offset==-1)
1900 break;
1901 Py_MEMCPY(start+offset, to_s, from_len);
1902 start += offset+from_len;
1903 }
1904
1905 return result;
1906}
1907
1908/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1909Py_LOCAL(PyBytesObject *)
1910replace_single_character(PyBytesObject *self,
1911 char from_c,
1912 const char *to_s, Py_ssize_t to_len,
1913 Py_ssize_t maxcount)
1914{
1915 char *self_s, *result_s;
1916 char *start, *next, *end;
1917 Py_ssize_t self_len, result_len;
1918 Py_ssize_t count, product;
1919 PyBytesObject *result;
1920
1921 self_s = PyBytes_AS_STRING(self);
1922 self_len = PyBytes_GET_SIZE(self);
1923
1924 count = countchar(self_s, self_len, from_c, maxcount);
1925 if (count == 0) {
1926 /* no matches, return unchanged */
1927 return return_self(self);
1928 }
1929
1930 /* use the difference between current and new, hence the "-1" */
1931 /* result_len = self_len + count * (to_len-1) */
1932 product = count * (to_len-1);
1933 if (product / (to_len-1) != count) {
1934 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1935 return NULL;
1936 }
1937 result_len = self_len + product;
1938 if (result_len < 0) {
1939 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1940 return NULL;
1941 }
1942
1943 if ( (result = (PyBytesObject *)
1944 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1945 return NULL;
1946 result_s = PyBytes_AS_STRING(result);
1947
1948 start = self_s;
1949 end = self_s + self_len;
1950 while (count-- > 0) {
1951 next = findchar(start, end-start, from_c);
1952 if (next == NULL)
1953 break;
1954
1955 if (next == start) {
1956 /* replace with the 'to' */
1957 Py_MEMCPY(result_s, to_s, to_len);
1958 result_s += to_len;
1959 start += 1;
1960 } else {
1961 /* copy the unchanged old then the 'to' */
1962 Py_MEMCPY(result_s, start, next-start);
1963 result_s += (next-start);
1964 Py_MEMCPY(result_s, to_s, to_len);
1965 result_s += to_len;
1966 start = next+1;
1967 }
1968 }
1969 /* Copy the remainder of the remaining bytes */
1970 Py_MEMCPY(result_s, start, end-start);
1971
1972 return result;
1973}
1974
1975/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1976Py_LOCAL(PyBytesObject *)
1977replace_substring(PyBytesObject *self,
1978 const char *from_s, Py_ssize_t from_len,
1979 const char *to_s, Py_ssize_t to_len,
1980 Py_ssize_t maxcount)
1981{
1982 char *self_s, *result_s;
1983 char *start, *next, *end;
1984 Py_ssize_t self_len, result_len;
1985 Py_ssize_t count, offset, product;
1986 PyBytesObject *result;
1987
1988 self_s = PyBytes_AS_STRING(self);
1989 self_len = PyBytes_GET_SIZE(self);
1990
1991 count = countstring(self_s, self_len,
1992 from_s, from_len,
1993 0, self_len, FORWARD, maxcount);
1994 if (count == 0) {
1995 /* no matches, return unchanged */
1996 return return_self(self);
1997 }
1998
1999 /* Check for overflow */
2000 /* result_len = self_len + count * (to_len-from_len) */
2001 product = count * (to_len-from_len);
2002 if (product / (to_len-from_len) != count) {
2003 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
2004 return NULL;
2005 }
2006 result_len = self_len + product;
2007 if (result_len < 0) {
2008 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
2009 return NULL;
2010 }
2011
2012 if ( (result = (PyBytesObject *)
2013 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2014 return NULL;
2015 result_s = PyBytes_AS_STRING(result);
2016
2017 start = self_s;
2018 end = self_s + self_len;
2019 while (count-- > 0) {
2020 offset = findstring(start, end-start,
2021 from_s, from_len,
2022 0, end-start, FORWARD);
2023 if (offset == -1)
2024 break;
2025 next = start+offset;
2026 if (next == start) {
2027 /* replace with the 'to' */
2028 Py_MEMCPY(result_s, to_s, to_len);
2029 result_s += to_len;
2030 start += from_len;
2031 } else {
2032 /* copy the unchanged old then the 'to' */
2033 Py_MEMCPY(result_s, start, next-start);
2034 result_s += (next-start);
2035 Py_MEMCPY(result_s, to_s, to_len);
2036 result_s += to_len;
2037 start = next+from_len;
2038 }
2039 }
2040 /* Copy the remainder of the remaining bytes */
2041 Py_MEMCPY(result_s, start, end-start);
2042
2043 return result;
2044}
2045
2046
2047Py_LOCAL(PyBytesObject *)
2048replace(PyBytesObject *self,
2049 const char *from_s, Py_ssize_t from_len,
2050 const char *to_s, Py_ssize_t to_len,
2051 Py_ssize_t maxcount)
2052{
2053 if (maxcount < 0) {
2054 maxcount = PY_SSIZE_T_MAX;
2055 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2056 /* nothing to do; return the original bytes */
2057 return return_self(self);
2058 }
2059
2060 if (maxcount == 0 ||
2061 (from_len == 0 && to_len == 0)) {
2062 /* nothing to do; return the original bytes */
2063 return return_self(self);
2064 }
2065
2066 /* Handle zero-length special cases */
2067
2068 if (from_len == 0) {
2069 /* insert the 'to' bytes everywhere. */
2070 /* >>> "Python".replace("", ".") */
2071 /* '.P.y.t.h.o.n.' */
2072 return replace_interleave(self, to_s, to_len, maxcount);
2073 }
2074
2075 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2076 /* point for an empty self bytes to generate a non-empty bytes */
2077 /* Special case so the remaining code always gets a non-empty bytes */
2078 if (PyBytes_GET_SIZE(self) == 0) {
2079 return return_self(self);
2080 }
2081
2082 if (to_len == 0) {
2083 /* delete all occurances of 'from' bytes */
2084 if (from_len == 1) {
2085 return replace_delete_single_character(
2086 self, from_s[0], maxcount);
2087 } else {
2088 return replace_delete_substring(self, from_s, from_len, maxcount);
2089 }
2090 }
2091
2092 /* Handle special case where both bytes have the same length */
2093
2094 if (from_len == to_len) {
2095 if (from_len == 1) {
2096 return replace_single_character_in_place(
2097 self,
2098 from_s[0],
2099 to_s[0],
2100 maxcount);
2101 } else {
2102 return replace_substring_in_place(
2103 self, from_s, from_len, to_s, to_len, maxcount);
2104 }
2105 }
2106
2107 /* Otherwise use the more generic algorithms */
2108 if (from_len == 1) {
2109 return replace_single_character(self, from_s[0],
2110 to_s, to_len, maxcount);
2111 } else {
2112 /* len('from')>=2, len('to')>=1 */
2113 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2114 }
2115}
2116
2117
2118PyDoc_STRVAR(replace__doc__,
2119"B.replace(old, new[, count]) -> bytes\n\
2120\n\
2121Return a copy of B with all occurrences of subsection\n\
2122old replaced by new. If the optional argument count is\n\
2123given, only the first count occurrences are replaced.");
2124
2125static PyObject *
2126bytes_replace(PyBytesObject *self, PyObject *args)
2127{
2128 Py_ssize_t count = -1;
2129 PyObject *from, *to, *res;
2130 Py_buffer vfrom, vto;
2131
2132 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2133 return NULL;
2134
2135 if (_getbuffer(from, &vfrom) < 0)
2136 return NULL;
2137 if (_getbuffer(to, &vto) < 0) {
2138 PyObject_ReleaseBuffer(from, &vfrom);
2139 return NULL;
2140 }
2141
2142 res = (PyObject *)replace((PyBytesObject *) self,
2143 vfrom.buf, vfrom.len,
2144 vto.buf, vto.len, count);
2145
2146 PyObject_ReleaseBuffer(from, &vfrom);
2147 PyObject_ReleaseBuffer(to, &vto);
2148 return res;
2149}
2150
2151
2152/* Overallocate the initial list to reduce the number of reallocs for small
2153 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2154 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2155 text (roughly 11 words per line) and field delimited data (usually 1-10
2156 fields). For large strings the split algorithms are bandwidth limited
2157 so increasing the preallocation likely will not improve things.*/
2158
2159#define MAX_PREALLOC 12
2160
2161/* 5 splits gives 6 elements */
2162#define PREALLOC_SIZE(maxsplit) \
2163 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2164
2165#define SPLIT_APPEND(data, left, right) \
2166 str = PyBytes_FromStringAndSize((data) + (left), \
2167 (right) - (left)); \
2168 if (str == NULL) \
2169 goto onError; \
2170 if (PyList_Append(list, str)) { \
2171 Py_DECREF(str); \
2172 goto onError; \
2173 } \
2174 else \
2175 Py_DECREF(str);
2176
2177#define SPLIT_ADD(data, left, right) { \
2178 str = PyBytes_FromStringAndSize((data) + (left), \
2179 (right) - (left)); \
2180 if (str == NULL) \
2181 goto onError; \
2182 if (count < MAX_PREALLOC) { \
2183 PyList_SET_ITEM(list, count, str); \
2184 } else { \
2185 if (PyList_Append(list, str)) { \
2186 Py_DECREF(str); \
2187 goto onError; \
2188 } \
2189 else \
2190 Py_DECREF(str); \
2191 } \
2192 count++; }
2193
2194/* Always force the list to the expected size. */
2195#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
2196
2197
2198Py_LOCAL_INLINE(PyObject *)
2199split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2200{
2201 register Py_ssize_t i, j, count = 0;
2202 PyObject *str;
2203 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2204
2205 if (list == NULL)
2206 return NULL;
2207
2208 i = j = 0;
2209 while ((j < len) && (maxcount-- > 0)) {
2210 for(; j < len; j++) {
2211 /* I found that using memchr makes no difference */
2212 if (s[j] == ch) {
2213 SPLIT_ADD(s, i, j);
2214 i = j = j + 1;
2215 break;
2216 }
2217 }
2218 }
2219 if (i <= len) {
2220 SPLIT_ADD(s, i, len);
2221 }
2222 FIX_PREALLOC_SIZE(list);
2223 return list;
2224
2225 onError:
2226 Py_DECREF(list);
2227 return NULL;
2228}
2229
2230
2231Py_LOCAL_INLINE(PyObject *)
2232split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2233{
2234 register Py_ssize_t i, j, count = 0;
2235 PyObject *str;
2236 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2237
2238 if (list == NULL)
2239 return NULL;
2240
2241 for (i = j = 0; i < len; ) {
2242 /* find a token */
2243 while (i < len && ISSPACE(s[i]))
2244 i++;
2245 j = i;
2246 while (i < len && !ISSPACE(s[i]))
2247 i++;
2248 if (j < i) {
2249 if (maxcount-- <= 0)
2250 break;
2251 SPLIT_ADD(s, j, i);
2252 while (i < len && ISSPACE(s[i]))
2253 i++;
2254 j = i;
2255 }
2256 }
2257 if (j < len) {
2258 SPLIT_ADD(s, j, len);
2259 }
2260 FIX_PREALLOC_SIZE(list);
2261 return list;
2262
2263 onError:
2264 Py_DECREF(list);
2265 return NULL;
2266}
2267
2268PyDoc_STRVAR(split__doc__,
2269"B.split([sep[, maxsplit]]) -> list of bytearray\n\
2270\n\
2271Return a list of the sections in B, using sep as the delimiter.\n\
2272If sep is not given, B is split on ASCII whitespace characters\n\
2273(space, tab, return, newline, formfeed, vertical tab).\n\
2274If maxsplit is given, at most maxsplit splits are done.");
2275
2276static PyObject *
2277bytes_split(PyBytesObject *self, PyObject *args)
2278{
2279 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2280 Py_ssize_t maxsplit = -1, count = 0;
2281 const char *s = PyBytes_AS_STRING(self), *sub;
2282 PyObject *list, *str, *subobj = Py_None;
2283 Py_buffer vsub;
2284#ifdef USE_FAST
2285 Py_ssize_t pos;
2286#endif
2287
2288 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
2289 return NULL;
2290 if (maxsplit < 0)
2291 maxsplit = PY_SSIZE_T_MAX;
2292
2293 if (subobj == Py_None)
2294 return split_whitespace(s, len, maxsplit);
2295
2296 if (_getbuffer(subobj, &vsub) < 0)
2297 return NULL;
2298 sub = vsub.buf;
2299 n = vsub.len;
2300
2301 if (n == 0) {
2302 PyErr_SetString(PyExc_ValueError, "empty separator");
2303 PyObject_ReleaseBuffer(subobj, &vsub);
2304 return NULL;
2305 }
2306 if (n == 1)
2307 return split_char(s, len, sub[0], maxsplit);
2308
2309 list = PyList_New(PREALLOC_SIZE(maxsplit));
2310 if (list == NULL) {
2311 PyObject_ReleaseBuffer(subobj, &vsub);
2312 return NULL;
2313 }
2314
2315#ifdef USE_FAST
2316 i = j = 0;
2317 while (maxsplit-- > 0) {
2318 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2319 if (pos < 0)
2320 break;
2321 j = i+pos;
2322 SPLIT_ADD(s, i, j);
2323 i = j + n;
2324 }
2325#else
2326 i = j = 0;
2327 while ((j+n <= len) && (maxsplit-- > 0)) {
2328 for (; j+n <= len; j++) {
2329 if (Py_STRING_MATCH(s, j, sub, n)) {
2330 SPLIT_ADD(s, i, j);
2331 i = j = j + n;
2332 break;
2333 }
2334 }
2335 }
2336#endif
2337 SPLIT_ADD(s, i, len);
2338 FIX_PREALLOC_SIZE(list);
2339 PyObject_ReleaseBuffer(subobj, &vsub);
2340 return list;
2341
2342 onError:
2343 Py_DECREF(list);
2344 PyObject_ReleaseBuffer(subobj, &vsub);
2345 return NULL;
2346}
2347
2348/* stringlib's partition shares nullbytes in some cases.
2349 undo this, we don't want the nullbytes to be shared. */
2350static PyObject *
2351make_nullbytes_unique(PyObject *result)
2352{
2353 if (result != NULL) {
2354 int i;
2355 assert(PyTuple_Check(result));
2356 assert(PyTuple_GET_SIZE(result) == 3);
2357 for (i = 0; i < 3; i++) {
2358 if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) {
2359 PyObject *new = PyBytes_FromStringAndSize(NULL, 0);
2360 if (new == NULL) {
2361 Py_DECREF(result);
2362 result = NULL;
2363 break;
2364 }
2365 Py_DECREF(nullbytes);
2366 PyTuple_SET_ITEM(result, i, new);
2367 }
2368 }
2369 }
2370 return result;
2371}
2372
2373PyDoc_STRVAR(partition__doc__,
2374"B.partition(sep) -> (head, sep, tail)\n\
2375\n\
2376Searches for the separator sep in B, and returns the part before it,\n\
2377the separator itself, and the part after it. If the separator is not\n\
2378found, returns B and two empty bytearray objects.");
2379
2380static PyObject *
2381bytes_partition(PyBytesObject *self, PyObject *sep_obj)
2382{
2383 PyObject *bytesep, *result;
2384
2385 bytesep = PyBytes_FromObject(sep_obj);
2386 if (! bytesep)
2387 return NULL;
2388
2389 result = stringlib_partition(
2390 (PyObject*) self,
2391 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2392 bytesep,
2393 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2394 );
2395
2396 Py_DECREF(bytesep);
2397 return make_nullbytes_unique(result);
2398}
2399
2400PyDoc_STRVAR(rpartition__doc__,
2401"B.rpartition(sep) -> (tail, sep, head)\n\
2402\n\
2403Searches for the separator sep in B, starting at the end of B,\n\
2404and returns the part before it, the separator itself, and the\n\
2405part after it. If the separator is not found, returns two empty\n\
2406bytearray objects and B.");
2407
2408static PyObject *
2409bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
2410{
2411 PyObject *bytesep, *result;
2412
2413 bytesep = PyBytes_FromObject(sep_obj);
2414 if (! bytesep)
2415 return NULL;
2416
2417 result = stringlib_rpartition(
2418 (PyObject*) self,
2419 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2420 bytesep,
2421 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2422 );
2423
2424 Py_DECREF(bytesep);
2425 return make_nullbytes_unique(result);
2426}
2427
2428Py_LOCAL_INLINE(PyObject *)
2429rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2430{
2431 register Py_ssize_t i, j, count=0;
2432 PyObject *str;
2433 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2434
2435 if (list == NULL)
2436 return NULL;
2437
2438 i = j = len - 1;
2439 while ((i >= 0) && (maxcount-- > 0)) {
2440 for (; i >= 0; i--) {
2441 if (s[i] == ch) {
2442 SPLIT_ADD(s, i + 1, j + 1);
2443 j = i = i - 1;
2444 break;
2445 }
2446 }
2447 }
2448 if (j >= -1) {
2449 SPLIT_ADD(s, 0, j + 1);
2450 }
2451 FIX_PREALLOC_SIZE(list);
2452 if (PyList_Reverse(list) < 0)
2453 goto onError;
2454
2455 return list;
2456
2457 onError:
2458 Py_DECREF(list);
2459 return NULL;
2460}
2461
2462Py_LOCAL_INLINE(PyObject *)
2463rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2464{
2465 register Py_ssize_t i, j, count = 0;
2466 PyObject *str;
2467 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2468
2469 if (list == NULL)
2470 return NULL;
2471
2472 for (i = j = len - 1; i >= 0; ) {
2473 /* find a token */
2474 while (i >= 0 && ISSPACE(s[i]))
2475 i--;
2476 j = i;
2477 while (i >= 0 && !ISSPACE(s[i]))
2478 i--;
2479 if (j > i) {
2480 if (maxcount-- <= 0)
2481 break;
2482 SPLIT_ADD(s, i + 1, j + 1);
2483 while (i >= 0 && ISSPACE(s[i]))
2484 i--;
2485 j = i;
2486 }
2487 }
2488 if (j >= 0) {
2489 SPLIT_ADD(s, 0, j + 1);
2490 }
2491 FIX_PREALLOC_SIZE(list);
2492 if (PyList_Reverse(list) < 0)
2493 goto onError;
2494
2495 return list;
2496
2497 onError:
2498 Py_DECREF(list);
2499 return NULL;
2500}
2501
2502PyDoc_STRVAR(rsplit__doc__,
2503"B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2504\n\
2505Return a list of the sections in B, using sep as the delimiter,\n\
2506starting at the end of B and working to the front.\n\
2507If sep is not given, B is split on ASCII whitespace characters\n\
2508(space, tab, return, newline, formfeed, vertical tab).\n\
2509If maxsplit is given, at most maxsplit splits are done.");
2510
2511static PyObject *
2512bytes_rsplit(PyBytesObject *self, PyObject *args)
2513{
2514 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2515 Py_ssize_t maxsplit = -1, count = 0;
2516 const char *s = PyBytes_AS_STRING(self), *sub;
2517 PyObject *list, *str, *subobj = Py_None;
2518 Py_buffer vsub;
2519
2520 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
2521 return NULL;
2522 if (maxsplit < 0)
2523 maxsplit = PY_SSIZE_T_MAX;
2524
2525 if (subobj == Py_None)
2526 return rsplit_whitespace(s, len, maxsplit);
2527
2528 if (_getbuffer(subobj, &vsub) < 0)
2529 return NULL;
2530 sub = vsub.buf;
2531 n = vsub.len;
2532
2533 if (n == 0) {
2534 PyErr_SetString(PyExc_ValueError, "empty separator");
2535 PyObject_ReleaseBuffer(subobj, &vsub);
2536 return NULL;
2537 }
2538 else if (n == 1)
2539 return rsplit_char(s, len, sub[0], maxsplit);
2540
2541 list = PyList_New(PREALLOC_SIZE(maxsplit));
2542 if (list == NULL) {
2543 PyObject_ReleaseBuffer(subobj, &vsub);
2544 return NULL;
2545 }
2546
2547 j = len;
2548 i = j - n;
2549
2550 while ( (i >= 0) && (maxsplit-- > 0) ) {
2551 for (; i>=0; i--) {
2552 if (Py_STRING_MATCH(s, i, sub, n)) {
2553 SPLIT_ADD(s, i + n, j);
2554 j = i;
2555 i -= n;
2556 break;
2557 }
2558 }
2559 }
2560 SPLIT_ADD(s, 0, j);
2561 FIX_PREALLOC_SIZE(list);
2562 if (PyList_Reverse(list) < 0)
2563 goto onError;
2564 PyObject_ReleaseBuffer(subobj, &vsub);
2565 return list;
2566
2567onError:
2568 Py_DECREF(list);
2569 PyObject_ReleaseBuffer(subobj, &vsub);
2570 return NULL;
2571}
2572
2573PyDoc_STRVAR(reverse__doc__,
2574"B.reverse() -> None\n\
2575\n\
2576Reverse the order of the values in B in place.");
2577static PyObject *
2578bytes_reverse(PyBytesObject *self, PyObject *unused)
2579{
2580 char swap, *head, *tail;
2581 Py_ssize_t i, j, n = Py_SIZE(self);
2582
2583 j = n / 2;
2584 head = self->ob_bytes;
2585 tail = head + n - 1;
2586 for (i = 0; i < j; i++) {
2587 swap = *head;
2588 *head++ = *tail;
2589 *tail-- = swap;
2590 }
2591
2592 Py_RETURN_NONE;
2593}
2594
2595PyDoc_STRVAR(insert__doc__,
2596"B.insert(index, int) -> None\n\
2597\n\
2598Insert a single item into the bytearray before the given index.");
2599static PyObject *
2600bytes_insert(PyBytesObject *self, PyObject *args)
2601{
2602 int value;
2603 Py_ssize_t where, n = Py_SIZE(self);
2604
2605 if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
2606 return NULL;
2607
2608 if (n == PY_SSIZE_T_MAX) {
2609 PyErr_SetString(PyExc_OverflowError,
2610 "cannot add more objects to bytes");
2611 return NULL;
2612 }
2613 if (value < 0 || value >= 256) {
2614 PyErr_SetString(PyExc_ValueError,
2615 "byte must be in range(0, 256)");
2616 return NULL;
2617 }
2618 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2619 return NULL;
2620
2621 if (where < 0) {
2622 where += n;
2623 if (where < 0)
2624 where = 0;
2625 }
2626 if (where > n)
2627 where = n;
2628 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
2629 self->ob_bytes[where] = value;
2630
2631 Py_RETURN_NONE;
2632}
2633
2634PyDoc_STRVAR(append__doc__,
2635"B.append(int) -> None\n\
2636\n\
2637Append a single item to the end of B.");
2638static PyObject *
2639bytes_append(PyBytesObject *self, PyObject *arg)
2640{
2641 int value;
2642 Py_ssize_t n = Py_SIZE(self);
2643
2644 if (! _getbytevalue(arg, &value))
2645 return NULL;
2646 if (n == PY_SSIZE_T_MAX) {
2647 PyErr_SetString(PyExc_OverflowError,
2648 "cannot add more objects to bytes");
2649 return NULL;
2650 }
2651 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2652 return NULL;
2653
2654 self->ob_bytes[n] = value;
2655
2656 Py_RETURN_NONE;
2657}
2658
2659PyDoc_STRVAR(extend__doc__,
2660"B.extend(iterable int) -> None\n\
2661\n\
2662Append all the elements from the iterator or sequence to the\n\
2663end of B.");
2664static PyObject *
2665bytes_extend(PyBytesObject *self, PyObject *arg)
2666{
2667 PyObject *it, *item, *tmp, *res;
2668 Py_ssize_t buf_size = 0, len = 0;
2669 int value;
2670 char *buf;
2671
2672 /* bytes_setslice code only accepts something supporting PEP 3118. */
2673 if (PyObject_CheckBuffer(arg)) {
2674 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), arg) == -1)
2675 return NULL;
2676
2677 Py_RETURN_NONE;
2678 }
2679
2680 it = PyObject_GetIter(arg);
2681 if (it == NULL)
2682 return NULL;
2683
2684 /* Try to determine the length of the argument. 32 is abitrary. */
2685 buf_size = _PyObject_LengthHint(arg, 32);
2686
2687 buf = (char *)PyMem_Malloc(buf_size * sizeof(char));
2688 if (buf == NULL)
2689 return PyErr_NoMemory();
2690
2691 while ((item = PyIter_Next(it)) != NULL) {
2692 if (! _getbytevalue(item, &value)) {
2693 Py_DECREF(item);
2694 Py_DECREF(it);
2695 return NULL;
2696 }
2697 buf[len++] = value;
2698 Py_DECREF(item);
2699 if (len >= buf_size) {
2700 buf_size = len + (len >> 1) + 1;
2701 buf = (char *)PyMem_Realloc(buf, buf_size * sizeof(char));
2702 if (buf == NULL) {
2703 Py_DECREF(it);
2704 return PyErr_NoMemory();
2705 }
2706 }
2707 }
2708 Py_DECREF(it);
2709
2710 /* XXX: Is possible to avoid a full copy of the buffer? */
2711 tmp = PyBytes_FromStringAndSize(buf, len);
2712 res = bytes_extend(self, tmp);
2713 Py_DECREF(tmp);
2714 PyMem_Free(buf);
2715
2716 return res;
2717}
2718
2719PyDoc_STRVAR(pop__doc__,
2720"B.pop([index]) -> int\n\
2721\n\
2722Remove and return a single item from B. If no index\n\
2723argument is give, will pop the last value.");
2724static PyObject *
2725bytes_pop(PyBytesObject *self, PyObject *args)
2726{
2727 int value;
2728 Py_ssize_t where = -1, n = Py_SIZE(self);
2729
2730 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2731 return NULL;
2732
2733 if (n == 0) {
2734 PyErr_SetString(PyExc_OverflowError,
2735 "cannot pop an empty bytes");
2736 return NULL;
2737 }
2738 if (where < 0)
2739 where += Py_SIZE(self);
2740 if (where < 0 || where >= Py_SIZE(self)) {
2741 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2742 return NULL;
2743 }
2744
2745 value = self->ob_bytes[where];
2746 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2747 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2748 return NULL;
2749
2750 return PyInt_FromLong(value);
2751}
2752
2753PyDoc_STRVAR(remove__doc__,
2754"B.remove(int) -> None\n\
2755\n\
2756Remove the first occurance of a value in B.");
2757static PyObject *
2758bytes_remove(PyBytesObject *self, PyObject *arg)
2759{
2760 int value;
2761 Py_ssize_t where, n = Py_SIZE(self);
2762
2763 if (! _getbytevalue(arg, &value))
2764 return NULL;
2765
2766 for (where = 0; where < n; where++) {
2767 if (self->ob_bytes[where] == value)
2768 break;
2769 }
2770 if (where == n) {
2771 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2772 return NULL;
2773 }
2774
2775 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2776 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2777 return NULL;
2778
2779 Py_RETURN_NONE;
2780}
2781
2782/* XXX These two helpers could be optimized if argsize == 1 */
2783
2784static Py_ssize_t
2785lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2786 void *argptr, Py_ssize_t argsize)
2787{
2788 Py_ssize_t i = 0;
2789 while (i < mysize && memchr(argptr, myptr[i], argsize))
2790 i++;
2791 return i;
2792}
2793
2794static Py_ssize_t
2795rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2796 void *argptr, Py_ssize_t argsize)
2797{
2798 Py_ssize_t i = mysize - 1;
2799 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2800 i--;
2801 return i + 1;
2802}
2803
2804PyDoc_STRVAR(strip__doc__,
2805"B.strip([bytes]) -> bytearray\n\
2806\n\
2807Strip leading and trailing bytes contained in the argument.\n\
2808If the argument is omitted, strip ASCII whitespace.");
2809static PyObject *
2810bytes_strip(PyBytesObject *self, PyObject *args)
2811{
2812 Py_ssize_t left, right, mysize, argsize;
2813 void *myptr, *argptr;
2814 PyObject *arg = Py_None;
2815 Py_buffer varg;
2816 if (!PyArg_ParseTuple(args, "|O:strip", &arg))
2817 return NULL;
2818 if (arg == Py_None) {
2819 argptr = "\t\n\r\f\v ";
2820 argsize = 6;
2821 }
2822 else {
2823 if (_getbuffer(arg, &varg) < 0)
2824 return NULL;
2825 argptr = varg.buf;
2826 argsize = varg.len;
2827 }
2828 myptr = self->ob_bytes;
2829 mysize = Py_SIZE(self);
2830 left = lstrip_helper(myptr, mysize, argptr, argsize);
2831 if (left == mysize)
2832 right = left;
2833 else
2834 right = rstrip_helper(myptr, mysize, argptr, argsize);
2835 if (arg != Py_None)
2836 PyObject_ReleaseBuffer(arg, &varg);
2837 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2838}
2839
2840PyDoc_STRVAR(lstrip__doc__,
2841"B.lstrip([bytes]) -> bytearray\n\
2842\n\
2843Strip leading bytes contained in the argument.\n\
2844If the argument is omitted, strip leading ASCII whitespace.");
2845static PyObject *
2846bytes_lstrip(PyBytesObject *self, PyObject *args)
2847{
2848 Py_ssize_t left, right, mysize, argsize;
2849 void *myptr, *argptr;
2850 PyObject *arg = Py_None;
2851 Py_buffer varg;
2852 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
2853 return NULL;
2854 if (arg == Py_None) {
2855 argptr = "\t\n\r\f\v ";
2856 argsize = 6;
2857 }
2858 else {
2859 if (_getbuffer(arg, &varg) < 0)
2860 return NULL;
2861 argptr = varg.buf;
2862 argsize = varg.len;
2863 }
2864 myptr = self->ob_bytes;
2865 mysize = Py_SIZE(self);
2866 left = lstrip_helper(myptr, mysize, argptr, argsize);
2867 right = mysize;
2868 if (arg != Py_None)
2869 PyObject_ReleaseBuffer(arg, &varg);
2870 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2871}
2872
2873PyDoc_STRVAR(rstrip__doc__,
2874"B.rstrip([bytes]) -> bytearray\n\
2875\n\
2876Strip trailing bytes contained in the argument.\n\
2877If the argument is omitted, strip trailing ASCII whitespace.");
2878static PyObject *
2879bytes_rstrip(PyBytesObject *self, PyObject *args)
2880{
2881 Py_ssize_t left, right, mysize, argsize;
2882 void *myptr, *argptr;
2883 PyObject *arg = Py_None;
2884 Py_buffer varg;
2885 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
2886 return NULL;
2887 if (arg == Py_None) {
2888 argptr = "\t\n\r\f\v ";
2889 argsize = 6;
2890 }
2891 else {
2892 if (_getbuffer(arg, &varg) < 0)
2893 return NULL;
2894 argptr = varg.buf;
2895 argsize = varg.len;
2896 }
2897 myptr = self->ob_bytes;
2898 mysize = Py_SIZE(self);
2899 left = 0;
2900 right = rstrip_helper(myptr, mysize, argptr, argsize);
2901 if (arg != Py_None)
2902 PyObject_ReleaseBuffer(arg, &varg);
2903 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2904}
2905
2906PyDoc_STRVAR(decode_doc,
2907"B.decode([encoding[, errors]]) -> unicode object.\n\
2908\n\
2909Decodes B using the codec registered for encoding. encoding defaults\n\
2910to the default encoding. errors may be given to set a different error\n\
2911handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2912a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2913as well as any other name registered with codecs.register_error that is\n\
2914able to handle UnicodeDecodeErrors.");
2915
2916static PyObject *
2917bytes_decode(PyObject *self, PyObject *args)
2918{
2919 const char *encoding = NULL;
2920 const char *errors = NULL;
2921
2922 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2923 return NULL;
2924 if (encoding == NULL)
2925 encoding = PyUnicode_GetDefaultEncoding();
2926 return PyCodec_Decode(self, encoding, errors);
2927}
2928
2929PyDoc_STRVAR(alloc_doc,
2930"B.__alloc__() -> int\n\
2931\n\
2932Returns the number of bytes actually allocated.");
2933
2934static PyObject *
2935bytes_alloc(PyBytesObject *self)
2936{
2937 return PyInt_FromSsize_t(self->ob_alloc);
2938}
2939
2940PyDoc_STRVAR(join_doc,
2941"B.join(iterable_of_bytes) -> bytes\n\
2942\n\
2943Concatenates any number of bytearray objects, with B in between each pair.");
2944
2945static PyObject *
2946bytes_join(PyBytesObject *self, PyObject *it)
2947{
2948 PyObject *seq;
2949 Py_ssize_t mysize = Py_SIZE(self);
2950 Py_ssize_t i;
2951 Py_ssize_t n;
2952 PyObject **items;
2953 Py_ssize_t totalsize = 0;
2954 PyObject *result;
2955 char *dest;
2956
2957 seq = PySequence_Fast(it, "can only join an iterable");
2958 if (seq == NULL)
2959 return NULL;
2960 n = PySequence_Fast_GET_SIZE(seq);
2961 items = PySequence_Fast_ITEMS(seq);
2962
2963 /* Compute the total size, and check that they are all bytes */
2964 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2965 for (i = 0; i < n; i++) {
2966 PyObject *obj = items[i];
2967 if (!PyBytes_Check(obj) && !PyString_Check(obj)) {
2968 PyErr_Format(PyExc_TypeError,
2969 "can only join an iterable of bytes "
2970 "(item %ld has type '%.100s')",
2971 /* XXX %ld isn't right on Win64 */
2972 (long)i, Py_TYPE(obj)->tp_name);
2973 goto error;
2974 }
2975 if (i > 0)
2976 totalsize += mysize;
2977 totalsize += Py_SIZE(obj);
2978 if (totalsize < 0) {
2979 PyErr_NoMemory();
2980 goto error;
2981 }
2982 }
2983
2984 /* Allocate the result, and copy the bytes */
2985 result = PyBytes_FromStringAndSize(NULL, totalsize);
2986 if (result == NULL)
2987 goto error;
2988 dest = PyBytes_AS_STRING(result);
2989 for (i = 0; i < n; i++) {
2990 PyObject *obj = items[i];
2991 Py_ssize_t size = Py_SIZE(obj);
2992 char *buf;
2993 if (PyBytes_Check(obj))
2994 buf = PyBytes_AS_STRING(obj);
2995 else
2996 buf = PyString_AS_STRING(obj);
2997 if (i) {
2998 memcpy(dest, self->ob_bytes, mysize);
2999 dest += mysize;
3000 }
3001 memcpy(dest, buf, size);
3002 dest += size;
3003 }
3004
3005 /* Done */
3006 Py_DECREF(seq);
3007 return result;
3008
3009 /* Error handling */
3010 error:
3011 Py_DECREF(seq);
3012 return NULL;
3013}
3014
3015PyDoc_STRVAR(fromhex_doc,
3016"bytearray.fromhex(string) -> bytearray\n\
3017\n\
3018Create a bytearray object from a string of hexadecimal numbers.\n\
3019Spaces between two numbers are accepted.\n\
3020Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
3021
3022static int
3023hex_digit_to_int(Py_UNICODE c)
3024{
3025 if (c >= 128)
3026 return -1;
3027 if (ISDIGIT(c))
3028 return c - '0';
3029 else {
3030 if (ISUPPER(c))
3031 c = TOLOWER(c);
3032 if (c >= 'a' && c <= 'f')
3033 return c - 'a' + 10;
3034 }
3035 return -1;
3036}
3037
3038static PyObject *
3039bytes_fromhex(PyObject *cls, PyObject *args)
3040{
3041 PyObject *newbytes, *hexobj;
3042 char *buf;
3043 Py_UNICODE *hex;
3044 Py_ssize_t hexlen, byteslen, i, j;
3045 int top, bot;
3046
3047 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
3048 return NULL;
3049 assert(PyUnicode_Check(hexobj));
3050 hexlen = PyUnicode_GET_SIZE(hexobj);
3051 hex = PyUnicode_AS_UNICODE(hexobj);
3052 byteslen = hexlen/2; /* This overestimates if there are spaces */
3053 newbytes = PyBytes_FromStringAndSize(NULL, byteslen);
3054 if (!newbytes)
3055 return NULL;
3056 buf = PyBytes_AS_STRING(newbytes);
3057 for (i = j = 0; i < hexlen; i += 2) {
3058 /* skip over spaces in the input */
3059 while (hex[i] == ' ')
3060 i++;
3061 if (i >= hexlen)
3062 break;
3063 top = hex_digit_to_int(hex[i]);
3064 bot = hex_digit_to_int(hex[i+1]);
3065 if (top == -1 || bot == -1) {
3066 PyErr_Format(PyExc_ValueError,
3067 "non-hexadecimal number found in "
3068 "fromhex() arg at position %zd", i);
3069 goto error;
3070 }
3071 buf[j++] = (top << 4) + bot;
3072 }
3073 if (PyBytes_Resize(newbytes, j) < 0)
3074 goto error;
3075 return newbytes;
3076
3077 error:
3078 Py_DECREF(newbytes);
3079 return NULL;
3080}
3081
3082PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3083
3084static PyObject *
3085bytes_reduce(PyBytesObject *self)
3086{
3087 PyObject *latin1, *dict;
3088 if (self->ob_bytes)
3089 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
3090 Py_SIZE(self), NULL);
3091 else
3092 latin1 = PyUnicode_FromString("");
3093
3094 dict = PyObject_GetAttrString((PyObject *)self, "__dict__");
3095 if (dict == NULL) {
3096 PyErr_Clear();
3097 dict = Py_None;
3098 Py_INCREF(dict);
3099 }
3100
3101 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self), latin1, "latin-1", dict);
3102}
3103
3104static PySequenceMethods bytes_as_sequence = {
3105 (lenfunc)bytes_length, /* sq_length */
3106 (binaryfunc)PyBytes_Concat, /* sq_concat */
3107 (ssizeargfunc)bytes_repeat, /* sq_repeat */
3108 (ssizeargfunc)bytes_getitem, /* sq_item */
3109 0, /* sq_slice */
3110 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
3111 0, /* sq_ass_slice */
3112 (objobjproc)bytes_contains, /* sq_contains */
3113 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
3114 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
3115};
3116
3117static PyMappingMethods bytes_as_mapping = {
3118 (lenfunc)bytes_length,
3119 (binaryfunc)bytes_subscript,
3120 (objobjargproc)bytes_ass_subscript,
3121};
3122
3123static PyBufferProcs bytes_as_buffer = {
3124 (readbufferproc)bytes_buffer_getreadbuf,
3125 (writebufferproc)bytes_buffer_getwritebuf,
3126 (segcountproc)bytes_buffer_getsegcount,
3127 (charbufferproc)bytes_buffer_getcharbuf,
3128 (getbufferproc)bytes_getbuffer,
3129 (releasebufferproc)bytes_releasebuffer,
3130};
3131
3132static PyMethodDef
3133bytes_methods[] = {
3134 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
3135 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
3136 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
3137 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3138 _Py_capitalize__doc__},
3139 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3140 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
3141 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
3142 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
3143 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
3144 expandtabs__doc__},
3145 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
3146 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
3147 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
3148 fromhex_doc},
3149 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3150 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
3151 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3152 _Py_isalnum__doc__},
3153 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3154 _Py_isalpha__doc__},
3155 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3156 _Py_isdigit__doc__},
3157 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3158 _Py_islower__doc__},
3159 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3160 _Py_isspace__doc__},
3161 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3162 _Py_istitle__doc__},
3163 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3164 _Py_isupper__doc__},
3165 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
3166 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3167 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
3168 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
3169 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
3170 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
3171 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
3172 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
3173 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
3174 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3175 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3176 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
3177 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
3178 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
3179 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
3180 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
3181 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
3182 splitlines__doc__},
3183 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS ,
3184 startswith__doc__},
3185 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
3186 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3187 _Py_swapcase__doc__},
3188 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
3189 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
3190 translate__doc__},
3191 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3192 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
3193 {NULL}
3194};
3195
3196PyDoc_STRVAR(bytes_doc,
3197"bytearray(iterable_of_ints) -> bytearray.\n\
3198bytearray(string, encoding[, errors]) -> bytearray.\n\
3199bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
3200bytearray(memory_view) -> bytearray.\n\
3201\n\
3202Construct an mutable bytearray object from:\n\
3203 - an iterable yielding integers in range(256)\n\
3204 - a text string encoded using the specified encoding\n\
3205 - a bytes or a bytearray object\n\
3206 - any object implementing the buffer API.\n\
3207\n\
3208bytearray(int) -> bytearray.\n\
3209\n\
3210Construct a zero-initialized bytearray of the given length.");
3211
3212
3213static PyObject *bytes_iter(PyObject *seq);
3214
3215PyTypeObject PyBytes_Type = {
3216 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3217 "bytearray",
3218 sizeof(PyBytesObject),
3219 0,
3220 (destructor)bytes_dealloc, /* tp_dealloc */
3221 0, /* tp_print */
3222 0, /* tp_getattr */
3223 0, /* tp_setattr */
3224 0, /* tp_compare */
3225 (reprfunc)bytes_repr, /* tp_repr */
3226 0, /* tp_as_number */
3227 &bytes_as_sequence, /* tp_as_sequence */
3228 &bytes_as_mapping, /* tp_as_mapping */
3229 0, /* tp_hash */
3230 0, /* tp_call */
3231 bytes_str, /* tp_str */
3232 PyObject_GenericGetAttr, /* tp_getattro */
3233 0, /* tp_setattro */
3234 &bytes_as_buffer, /* tp_as_buffer */
3235 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3236 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3237 bytes_doc, /* tp_doc */
3238 0, /* tp_traverse */
3239 0, /* tp_clear */
3240 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3241 0, /* tp_weaklistoffset */
3242 bytes_iter, /* tp_iter */
3243 0, /* tp_iternext */
3244 bytes_methods, /* tp_methods */
3245 0, /* tp_members */
3246 0, /* tp_getset */
3247 0, /* tp_base */
3248 0, /* tp_dict */
3249 0, /* tp_descr_get */
3250 0, /* tp_descr_set */
3251 0, /* tp_dictoffset */
3252 (initproc)bytes_init, /* tp_init */
3253 PyType_GenericAlloc, /* tp_alloc */
3254 PyType_GenericNew, /* tp_new */
3255 PyObject_Del, /* tp_free */
3256};
3257
3258/*********************** Bytes Iterator ****************************/
3259
3260typedef struct {
3261 PyObject_HEAD
3262 Py_ssize_t it_index;
3263 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3264} bytesiterobject;
3265
3266static void
3267bytesiter_dealloc(bytesiterobject *it)
3268{
3269 _PyObject_GC_UNTRACK(it);
3270 Py_XDECREF(it->it_seq);
3271 PyObject_GC_Del(it);
3272}
3273
3274static int
3275bytesiter_traverse(bytesiterobject *it, visitproc visit, void *arg)
3276{
3277 Py_VISIT(it->it_seq);
3278 return 0;
3279}
3280
3281static PyObject *
3282bytesiter_next(bytesiterobject *it)
3283{
3284 PyBytesObject *seq;
3285 PyObject *item;
3286
3287 assert(it != NULL);
3288 seq = it->it_seq;
3289 if (seq == NULL)
3290 return NULL;
3291 assert(PyBytes_Check(seq));
3292
3293 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3294 item = PyInt_FromLong(
3295 (unsigned char)seq->ob_bytes[it->it_index]);
3296 if (item != NULL)
3297 ++it->it_index;
3298 return item;
3299 }
3300
3301 Py_DECREF(seq);
3302 it->it_seq = NULL;
3303 return NULL;
3304}
3305
3306static PyObject *
3307bytesiter_length_hint(bytesiterobject *it)
3308{
3309 Py_ssize_t len = 0;
3310 if (it->it_seq)
3311 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3312 return PyInt_FromSsize_t(len);
3313}
3314
3315PyDoc_STRVAR(length_hint_doc,
3316 "Private method returning an estimate of len(list(it)).");
3317
3318static PyMethodDef bytesiter_methods[] = {
3319 {"__length_hint__", (PyCFunction)bytesiter_length_hint, METH_NOARGS,
3320 length_hint_doc},
3321 {NULL, NULL} /* sentinel */
3322};
3323
3324PyTypeObject PyBytesIter_Type = {
3325 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3326 "bytearray_iterator", /* tp_name */
3327 sizeof(bytesiterobject), /* tp_basicsize */
3328 0, /* tp_itemsize */
3329 /* methods */
3330 (destructor)bytesiter_dealloc, /* tp_dealloc */
3331 0, /* tp_print */
3332 0, /* tp_getattr */
3333 0, /* tp_setattr */
3334 0, /* tp_compare */
3335 0, /* tp_repr */
3336 0, /* tp_as_number */
3337 0, /* tp_as_sequence */
3338 0, /* tp_as_mapping */
3339 0, /* tp_hash */
3340 0, /* tp_call */
3341 0, /* tp_str */
3342 PyObject_GenericGetAttr, /* tp_getattro */
3343 0, /* tp_setattro */
3344 0, /* tp_as_buffer */
3345 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3346 0, /* tp_doc */
3347 (traverseproc)bytesiter_traverse, /* tp_traverse */
3348 0, /* tp_clear */
3349 0, /* tp_richcompare */
3350 0, /* tp_weaklistoffset */
3351 PyObject_SelfIter, /* tp_iter */
3352 (iternextfunc)bytesiter_next, /* tp_iternext */
3353 bytesiter_methods, /* tp_methods */
3354 0,
3355};
3356
3357static PyObject *
3358bytes_iter(PyObject *seq)
3359{
3360 bytesiterobject *it;
3361
3362 if (!PyBytes_Check(seq)) {
3363 PyErr_BadInternalCall();
3364 return NULL;
3365 }
3366 it = PyObject_GC_New(bytesiterobject, &PyBytesIter_Type);
3367 if (it == NULL)
3368 return NULL;
3369 it->it_index = 0;
3370 Py_INCREF(seq);
3371 it->it_seq = (PyBytesObject *)seq;
3372 _PyObject_GC_TRACK(it);
3373 return (PyObject *)it;
3374}