blob: 90990a744cbd1b014c4d1ff598b65106de65041c [file] [log] [blame]
Christian Heimes1a6387e2008-03-26 12:49:49 +00001/* PyBytes (bytearray) implementation */
2
3#define PY_SSIZE_T_CLEAN
4#include "Python.h"
5#include "structmember.h"
6#include "bytes_methods.h"
7
8static PyBytesObject *nullbytes = NULL;
9
10void
11PyBytes_Fini(void)
12{
13 Py_CLEAR(nullbytes);
14}
15
16int
17PyBytes_Init(void)
18{
19 nullbytes = PyObject_New(PyBytesObject, &PyBytes_Type);
20 if (nullbytes == NULL)
21 return 0;
22 nullbytes->ob_bytes = NULL;
23 Py_SIZE(nullbytes) = nullbytes->ob_alloc = 0;
24 nullbytes->ob_exports = 0;
25 return 1;
26}
27
28/* end nullbytes support */
29
30/* Helpers */
31
32static int
33_getbytevalue(PyObject* arg, int *value)
34{
35 long face_value;
36
37 if (PyInt_Check(arg)) {
38 face_value = PyInt_AsLong(arg);
39 if (face_value < 0 || face_value >= 256) {
40 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
41 return 0;
42 }
43 }
44 else if (PyString_CheckExact(arg)) {
45 if (Py_SIZE(arg) != 1) {
46 PyErr_SetString(PyExc_ValueError, "string must be of size 1");
47 return 0;
48 }
49 face_value = Py_CHARMASK(((PyStringObject*)arg)->ob_sval[0]);
50 }
51 else {
52 PyErr_Format(PyExc_TypeError, "an integer or string of size 1 is required");
53 return 0;
54 }
55
56 *value = face_value;
57 return 1;
58}
59
60static Py_ssize_t
61bytes_buffer_getreadbuf(PyBytesObject *self, Py_ssize_t index, const void **ptr)
62{
63 if ( index != 0 ) {
64 PyErr_SetString(PyExc_SystemError,
65 "accessing non-existent bytes segment");
66 return -1;
67 }
68 *ptr = (void *)self->ob_bytes;
69 return Py_SIZE(self);
70}
71
72static Py_ssize_t
73bytes_buffer_getwritebuf(PyBytesObject *self, Py_ssize_t index, const void **ptr)
74{
75 if ( index != 0 ) {
76 PyErr_SetString(PyExc_SystemError,
77 "accessing non-existent bytes segment");
78 return -1;
79 }
80 *ptr = (void *)self->ob_bytes;
81 return Py_SIZE(self);
82}
83
84static Py_ssize_t
85bytes_buffer_getsegcount(PyBytesObject *self, Py_ssize_t *lenp)
86{
87 if ( lenp )
88 *lenp = Py_SIZE(self);
89 return 1;
90}
91
92static Py_ssize_t
93bytes_buffer_getcharbuf(PyBytesObject *self, Py_ssize_t index, const char **ptr)
94{
95 if ( index != 0 ) {
96 PyErr_SetString(PyExc_SystemError,
97 "accessing non-existent bytes segment");
98 return -1;
99 }
100 *ptr = self->ob_bytes;
101 return Py_SIZE(self);
102}
103
104static int
105bytes_getbuffer(PyBytesObject *obj, Py_buffer *view, int flags)
106{
107 int ret;
108 void *ptr;
109 if (view == NULL) {
110 obj->ob_exports++;
111 return 0;
112 }
113 if (obj->ob_bytes == NULL)
114 ptr = "";
115 else
116 ptr = obj->ob_bytes;
117 ret = PyBuffer_FillInfo(view, ptr, Py_SIZE(obj), 0, flags);
118 if (ret >= 0) {
119 obj->ob_exports++;
120 }
121 return ret;
122}
123
124static void
125bytes_releasebuffer(PyBytesObject *obj, Py_buffer *view)
126{
127 obj->ob_exports--;
128}
129
130static Py_ssize_t
131_getbuffer(PyObject *obj, Py_buffer *view)
132{
133 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
134
135 if (buffer == NULL || buffer->bf_getbuffer == NULL)
136 {
137 PyErr_Format(PyExc_TypeError,
138 "Type %.100s doesn't support the buffer API",
139 Py_TYPE(obj)->tp_name);
140 return -1;
141 }
142
143 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
144 return -1;
145 return view->len;
146}
147
148/* Direct API functions */
149
150PyObject *
151PyBytes_FromObject(PyObject *input)
152{
153 return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
154 input, NULL);
155}
156
157PyObject *
158PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
159{
160 PyBytesObject *new;
161 Py_ssize_t alloc;
162
163 assert(size >= 0);
164
165 new = PyObject_New(PyBytesObject, &PyBytes_Type);
166 if (new == NULL)
167 return NULL;
168
169 if (size == 0) {
170 new->ob_bytes = NULL;
171 alloc = 0;
172 }
173 else {
174 alloc = size + 1;
175 new->ob_bytes = PyMem_Malloc(alloc);
176 if (new->ob_bytes == NULL) {
177 Py_DECREF(new);
178 return PyErr_NoMemory();
179 }
180 if (bytes != NULL)
181 memcpy(new->ob_bytes, bytes, size);
182 new->ob_bytes[size] = '\0'; /* Trailing null byte */
183 }
184 Py_SIZE(new) = size;
185 new->ob_alloc = alloc;
186 new->ob_exports = 0;
187
188 return (PyObject *)new;
189}
190
191Py_ssize_t
192PyBytes_Size(PyObject *self)
193{
194 assert(self != NULL);
195 assert(PyBytes_Check(self));
196
197 return PyBytes_GET_SIZE(self);
198}
199
200char *
201PyBytes_AsString(PyObject *self)
202{
203 assert(self != NULL);
204 assert(PyBytes_Check(self));
205
206 return PyBytes_AS_STRING(self);
207}
208
209int
210PyBytes_Resize(PyObject *self, Py_ssize_t size)
211{
212 void *sval;
213 Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
214
215 assert(self != NULL);
216 assert(PyBytes_Check(self));
217 assert(size >= 0);
218
219 if (size < alloc / 2) {
220 /* Major downsize; resize down to exact size */
221 alloc = size + 1;
222 }
223 else if (size < alloc) {
224 /* Within allocated size; quick exit */
225 Py_SIZE(self) = size;
226 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
227 return 0;
228 }
229 else if (size <= alloc * 1.125) {
230 /* Moderate upsize; overallocate similar to list_resize() */
231 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
232 }
233 else {
234 /* Major upsize; resize up to exact size */
235 alloc = size + 1;
236 }
237
238 if (((PyBytesObject *)self)->ob_exports > 0) {
239 /*
240 fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports,
241 ((PyBytesObject *)self)->ob_bytes);
242 */
243 PyErr_SetString(PyExc_BufferError,
244 "Existing exports of data: object cannot be re-sized");
245 return -1;
246 }
247
248 sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
249 if (sval == NULL) {
250 PyErr_NoMemory();
251 return -1;
252 }
253
254 ((PyBytesObject *)self)->ob_bytes = sval;
255 Py_SIZE(self) = size;
256 ((PyBytesObject *)self)->ob_alloc = alloc;
257 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
258
259 return 0;
260}
261
262PyObject *
263PyBytes_Concat(PyObject *a, PyObject *b)
264{
265 Py_ssize_t size;
266 Py_buffer va, vb;
267 PyBytesObject *result = NULL;
268
269 va.len = -1;
270 vb.len = -1;
271 if (_getbuffer(a, &va) < 0 ||
272 _getbuffer(b, &vb) < 0) {
273 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
274 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
275 goto done;
276 }
277
278 size = va.len + vb.len;
279 if (size < 0) {
280 return PyErr_NoMemory();
281 goto done;
282 }
283
284 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
285 if (result != NULL) {
286 memcpy(result->ob_bytes, va.buf, va.len);
287 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
288 }
289
290 done:
291 if (va.len != -1)
292 PyObject_ReleaseBuffer(a, &va);
293 if (vb.len != -1)
294 PyObject_ReleaseBuffer(b, &vb);
295 return (PyObject *)result;
296}
297
298/* Functions stuffed into the type object */
299
300static Py_ssize_t
301bytes_length(PyBytesObject *self)
302{
303 return Py_SIZE(self);
304}
305
306static PyObject *
307bytes_iconcat(PyBytesObject *self, PyObject *other)
308{
309 Py_ssize_t mysize;
310 Py_ssize_t size;
311 Py_buffer vo;
312
313 if (_getbuffer(other, &vo) < 0) {
314 PyErr_Format(PyExc_TypeError, "can't concat bytes to %.100s",
315 Py_TYPE(self)->tp_name);
316 return NULL;
317 }
318
319 mysize = Py_SIZE(self);
320 size = mysize + vo.len;
321 if (size < 0) {
322 PyObject_ReleaseBuffer(other, &vo);
323 return PyErr_NoMemory();
324 }
325 if (size < self->ob_alloc) {
326 Py_SIZE(self) = size;
327 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
328 }
329 else if (PyBytes_Resize((PyObject *)self, size) < 0) {
330 PyObject_ReleaseBuffer(other, &vo);
331 return NULL;
332 }
333 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
334 PyObject_ReleaseBuffer(other, &vo);
335 Py_INCREF(self);
336 return (PyObject *)self;
337}
338
339static PyObject *
340bytes_repeat(PyBytesObject *self, Py_ssize_t count)
341{
342 PyBytesObject *result;
343 Py_ssize_t mysize;
344 Py_ssize_t size;
345
346 if (count < 0)
347 count = 0;
348 mysize = Py_SIZE(self);
349 size = mysize * count;
350 if (count != 0 && size / count != mysize)
351 return PyErr_NoMemory();
352 result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
353 if (result != NULL && size != 0) {
354 if (mysize == 1)
355 memset(result->ob_bytes, self->ob_bytes[0], size);
356 else {
357 Py_ssize_t i;
358 for (i = 0; i < count; i++)
359 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
360 }
361 }
362 return (PyObject *)result;
363}
364
365static PyObject *
366bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
367{
368 Py_ssize_t mysize;
369 Py_ssize_t size;
370
371 if (count < 0)
372 count = 0;
373 mysize = Py_SIZE(self);
374 size = mysize * count;
375 if (count != 0 && size / count != mysize)
376 return PyErr_NoMemory();
377 if (size < self->ob_alloc) {
378 Py_SIZE(self) = size;
379 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
380 }
381 else if (PyBytes_Resize((PyObject *)self, size) < 0)
382 return NULL;
383
384 if (mysize == 1)
385 memset(self->ob_bytes, self->ob_bytes[0], size);
386 else {
387 Py_ssize_t i;
388 for (i = 1; i < count; i++)
389 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
390 }
391
392 Py_INCREF(self);
393 return (PyObject *)self;
394}
395
396static PyObject *
397bytes_getitem(PyBytesObject *self, Py_ssize_t i)
398{
399 if (i < 0)
400 i += Py_SIZE(self);
401 if (i < 0 || i >= Py_SIZE(self)) {
402 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
403 return NULL;
404 }
405 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
406}
407
408static PyObject *
409bytes_subscript(PyBytesObject *self, PyObject *item)
410{
411 if (PyIndex_Check(item)) {
412 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
413
414 if (i == -1 && PyErr_Occurred())
415 return NULL;
416
417 if (i < 0)
418 i += PyBytes_GET_SIZE(self);
419
420 if (i < 0 || i >= Py_SIZE(self)) {
421 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
422 return NULL;
423 }
424 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
425 }
426 else if (PySlice_Check(item)) {
427 Py_ssize_t start, stop, step, slicelength, cur, i;
428 if (PySlice_GetIndicesEx((PySliceObject *)item,
429 PyBytes_GET_SIZE(self),
430 &start, &stop, &step, &slicelength) < 0) {
431 return NULL;
432 }
433
434 if (slicelength <= 0)
435 return PyBytes_FromStringAndSize("", 0);
436 else if (step == 1) {
437 return PyBytes_FromStringAndSize(self->ob_bytes + start,
438 slicelength);
439 }
440 else {
441 char *source_buf = PyBytes_AS_STRING(self);
442 char *result_buf = (char *)PyMem_Malloc(slicelength);
443 PyObject *result;
444
445 if (result_buf == NULL)
446 return PyErr_NoMemory();
447
448 for (cur = start, i = 0; i < slicelength;
449 cur += step, i++) {
450 result_buf[i] = source_buf[cur];
451 }
452 result = PyBytes_FromStringAndSize(result_buf, slicelength);
453 PyMem_Free(result_buf);
454 return result;
455 }
456 }
457 else {
458 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integers");
459 return NULL;
460 }
461}
462
463static int
464bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
465 PyObject *values)
466{
467 Py_ssize_t avail, needed;
468 void *bytes;
469 Py_buffer vbytes;
470 int res = 0;
471
472 vbytes.len = -1;
473 if (values == (PyObject *)self) {
474 /* Make a copy and call this function recursively */
475 int err;
476 values = PyBytes_FromObject(values);
477 if (values == NULL)
478 return -1;
479 err = bytes_setslice(self, lo, hi, values);
480 Py_DECREF(values);
481 return err;
482 }
483 if (values == NULL) {
484 /* del b[lo:hi] */
485 bytes = NULL;
486 needed = 0;
487 }
488 else {
489 if (_getbuffer(values, &vbytes) < 0) {
490 PyErr_Format(PyExc_TypeError,
491 "can't set bytes slice from %.100s",
492 Py_TYPE(values)->tp_name);
493 return -1;
494 }
495 needed = vbytes.len;
496 bytes = vbytes.buf;
497 }
498
499 if (lo < 0)
500 lo = 0;
501 if (hi < lo)
502 hi = lo;
503 if (hi > Py_SIZE(self))
504 hi = Py_SIZE(self);
505
506 avail = hi - lo;
507 if (avail < 0)
508 lo = hi = avail = 0;
509
510 if (avail != needed) {
511 if (avail > needed) {
512 /*
513 0 lo hi old_size
514 | |<----avail----->|<-----tomove------>|
515 | |<-needed->|<-----tomove------>|
516 0 lo new_hi new_size
517 */
518 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
519 Py_SIZE(self) - hi);
520 }
521 /* XXX(nnorwitz): need to verify this can't overflow! */
522 if (PyBytes_Resize((PyObject *)self,
523 Py_SIZE(self) + needed - avail) < 0) {
524 res = -1;
525 goto finish;
526 }
527 if (avail < needed) {
528 /*
529 0 lo hi old_size
530 | |<-avail->|<-----tomove------>|
531 | |<----needed---->|<-----tomove------>|
532 0 lo new_hi new_size
533 */
534 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
535 Py_SIZE(self) - lo - needed);
536 }
537 }
538
539 if (needed > 0)
540 memcpy(self->ob_bytes + lo, bytes, needed);
541
542
543 finish:
544 if (vbytes.len != -1)
545 PyObject_ReleaseBuffer(values, &vbytes);
546 return res;
547}
548
549static int
550bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
551{
Neal Norwitz0bcd6132008-03-27 03:49:54 +0000552 int ival;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000553
554 if (i < 0)
555 i += Py_SIZE(self);
556
557 if (i < 0 || i >= Py_SIZE(self)) {
558 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
559 return -1;
560 }
561
562 if (value == NULL)
563 return bytes_setslice(self, i, i+1, NULL);
564
565 if (!_getbytevalue(value, &ival))
566 return -1;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000567
568 self->ob_bytes[i] = ival;
569 return 0;
570}
571
572static int
573bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
574{
575 Py_ssize_t start, stop, step, slicelen, needed;
576 char *bytes;
577
578 if (PyIndex_Check(item)) {
579 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
580
581 if (i == -1 && PyErr_Occurred())
582 return -1;
583
584 if (i < 0)
585 i += PyBytes_GET_SIZE(self);
586
587 if (i < 0 || i >= Py_SIZE(self)) {
588 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
589 return -1;
590 }
591
592 if (values == NULL) {
593 /* Fall through to slice assignment */
594 start = i;
595 stop = i + 1;
596 step = 1;
597 slicelen = 1;
598 }
599 else {
600 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
601 if (ival == -1 && PyErr_Occurred()) {
Neal Norwitz0bcd6132008-03-27 03:49:54 +0000602 int int_value;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000603 /* Also accept str of size 1 in 2.x */
604 PyErr_Clear();
Neal Norwitz0bcd6132008-03-27 03:49:54 +0000605 if (!_getbytevalue(values, &int_value))
Christian Heimes1a6387e2008-03-26 12:49:49 +0000606 return -1;
Neal Norwitz0bcd6132008-03-27 03:49:54 +0000607 ival = (int) int_value;
608 } else if (ival < 0 || ival >= 256) {
Christian Heimes1a6387e2008-03-26 12:49:49 +0000609 PyErr_SetString(PyExc_ValueError,
610 "byte must be in range(0, 256)");
611 return -1;
612 }
613 self->ob_bytes[i] = (char)ival;
614 return 0;
615 }
616 }
617 else if (PySlice_Check(item)) {
618 if (PySlice_GetIndicesEx((PySliceObject *)item,
619 PyBytes_GET_SIZE(self),
620 &start, &stop, &step, &slicelen) < 0) {
621 return -1;
622 }
623 }
624 else {
625 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integer");
626 return -1;
627 }
628
629 if (values == NULL) {
630 bytes = NULL;
631 needed = 0;
632 }
633 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
634 /* Make a copy an call this function recursively */
635 int err;
636 values = PyBytes_FromObject(values);
637 if (values == NULL)
638 return -1;
639 err = bytes_ass_subscript(self, item, values);
640 Py_DECREF(values);
641 return err;
642 }
643 else {
644 assert(PyBytes_Check(values));
645 bytes = ((PyBytesObject *)values)->ob_bytes;
646 needed = Py_SIZE(values);
647 }
648 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
649 if ((step < 0 && start < stop) ||
650 (step > 0 && start > stop))
651 stop = start;
652 if (step == 1) {
653 if (slicelen != needed) {
654 if (slicelen > needed) {
655 /*
656 0 start stop old_size
657 | |<---slicelen--->|<-----tomove------>|
658 | |<-needed->|<-----tomove------>|
659 0 lo new_hi new_size
660 */
661 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
662 Py_SIZE(self) - stop);
663 }
664 if (PyBytes_Resize((PyObject *)self,
665 Py_SIZE(self) + needed - slicelen) < 0)
666 return -1;
667 if (slicelen < needed) {
668 /*
669 0 lo hi old_size
670 | |<-avail->|<-----tomove------>|
671 | |<----needed---->|<-----tomove------>|
672 0 lo new_hi new_size
673 */
674 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
675 Py_SIZE(self) - start - needed);
676 }
677 }
678
679 if (needed > 0)
680 memcpy(self->ob_bytes + start, bytes, needed);
681
682 return 0;
683 }
684 else {
685 if (needed == 0) {
686 /* Delete slice */
687 Py_ssize_t cur, i;
688
689 if (step < 0) {
690 stop = start + 1;
691 start = stop + step * (slicelen - 1) - 1;
692 step = -step;
693 }
694 for (cur = start, i = 0;
695 i < slicelen; cur += step, i++) {
696 Py_ssize_t lim = step - 1;
697
698 if (cur + step >= PyBytes_GET_SIZE(self))
699 lim = PyBytes_GET_SIZE(self) - cur - 1;
700
701 memmove(self->ob_bytes + cur - i,
702 self->ob_bytes + cur + 1, lim);
703 }
704 /* Move the tail of the bytes, in one chunk */
705 cur = start + slicelen*step;
706 if (cur < PyBytes_GET_SIZE(self)) {
707 memmove(self->ob_bytes + cur - slicelen,
708 self->ob_bytes + cur,
709 PyBytes_GET_SIZE(self) - cur);
710 }
711 if (PyBytes_Resize((PyObject *)self,
712 PyBytes_GET_SIZE(self) - slicelen) < 0)
713 return -1;
714
715 return 0;
716 }
717 else {
718 /* Assign slice */
719 Py_ssize_t cur, i;
720
721 if (needed != slicelen) {
722 PyErr_Format(PyExc_ValueError,
723 "attempt to assign bytes of size %zd "
724 "to extended slice of size %zd",
725 needed, slicelen);
726 return -1;
727 }
728 for (cur = start, i = 0; i < slicelen; cur += step, i++)
729 self->ob_bytes[cur] = bytes[i];
730 return 0;
731 }
732 }
733}
734
735static int
736bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
737{
738 static char *kwlist[] = {"source", "encoding", "errors", 0};
739 PyObject *arg = NULL;
740 const char *encoding = NULL;
741 const char *errors = NULL;
742 Py_ssize_t count;
743 PyObject *it;
744 PyObject *(*iternext)(PyObject *);
745
746 if (Py_SIZE(self) != 0) {
747 /* Empty previous contents (yes, do this first of all!) */
748 if (PyBytes_Resize((PyObject *)self, 0) < 0)
749 return -1;
750 }
751
752 /* Parse arguments */
753 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
754 &arg, &encoding, &errors))
755 return -1;
756
757 /* Make a quick exit if no first argument */
758 if (arg == NULL) {
759 if (encoding != NULL || errors != NULL) {
760 PyErr_SetString(PyExc_TypeError,
761 "encoding or errors without sequence argument");
762 return -1;
763 }
764 return 0;
765 }
766
767 if (PyString_Check(arg)) {
768 PyObject *new, *encoded;
769 if (encoding != NULL) {
770 encoded = PyCodec_Encode(arg, encoding, errors);
771 if (encoded == NULL)
772 return -1;
773 assert(PyString_Check(encoded));
774 }
775 else {
776 encoded = arg;
777 Py_INCREF(arg);
778 }
779 new = bytes_iconcat(self, arg);
780 Py_DECREF(encoded);
781 if (new == NULL)
782 return -1;
783 Py_DECREF(new);
784 return 0;
785 }
786
787 if (PyUnicode_Check(arg)) {
788 /* Encode via the codec registry */
789 PyObject *encoded, *new;
790 if (encoding == NULL) {
791 PyErr_SetString(PyExc_TypeError,
792 "unicode argument without an encoding");
793 return -1;
794 }
795 encoded = PyCodec_Encode(arg, encoding, errors);
796 if (encoded == NULL)
797 return -1;
798 assert(PyString_Check(encoded));
799 new = bytes_iconcat(self, encoded);
800 Py_DECREF(encoded);
801 if (new == NULL)
802 return -1;
803 Py_DECREF(new);
804 return 0;
805 }
806
807 /* If it's not unicode, there can't be encoding or errors */
808 if (encoding != NULL || errors != NULL) {
809 PyErr_SetString(PyExc_TypeError,
810 "encoding or errors without a string argument");
811 return -1;
812 }
813
814 /* Is it an int? */
815 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
816 if (count == -1 && PyErr_Occurred())
817 PyErr_Clear();
818 else {
819 if (count < 0) {
820 PyErr_SetString(PyExc_ValueError, "negative count");
821 return -1;
822 }
823 if (count > 0) {
824 if (PyBytes_Resize((PyObject *)self, count))
825 return -1;
826 memset(self->ob_bytes, 0, count);
827 }
828 return 0;
829 }
830
831 /* Use the buffer API */
832 if (PyObject_CheckBuffer(arg)) {
833 Py_ssize_t size;
834 Py_buffer view;
835 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
836 return -1;
837 size = view.len;
838 if (PyBytes_Resize((PyObject *)self, size) < 0) goto fail;
839 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
840 goto fail;
841 PyObject_ReleaseBuffer(arg, &view);
842 return 0;
843 fail:
844 PyObject_ReleaseBuffer(arg, &view);
845 return -1;
846 }
847
848 /* XXX Optimize this if the arguments is a list, tuple */
849
850 /* Get the iterator */
851 it = PyObject_GetIter(arg);
852 if (it == NULL)
853 return -1;
854 iternext = *Py_TYPE(it)->tp_iternext;
855
856 /* Run the iterator to exhaustion */
857 for (;;) {
858 PyObject *item;
859 Py_ssize_t value;
860
861 /* Get the next item */
862 item = iternext(it);
863 if (item == NULL) {
864 if (PyErr_Occurred()) {
865 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
866 goto error;
867 PyErr_Clear();
868 }
869 break;
870 }
871
872 /* Interpret it as an int (__index__) */
873 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
874 Py_DECREF(item);
875 if (value == -1 && PyErr_Occurred())
876 goto error;
877
878 /* Range check */
879 if (value < 0 || value >= 256) {
880 PyErr_SetString(PyExc_ValueError,
881 "bytes must be in range(0, 256)");
882 goto error;
883 }
884
885 /* Append the byte */
886 if (Py_SIZE(self) < self->ob_alloc)
887 Py_SIZE(self)++;
888 else if (PyBytes_Resize((PyObject *)self, Py_SIZE(self)+1) < 0)
889 goto error;
890 self->ob_bytes[Py_SIZE(self)-1] = value;
891 }
892
893 /* Clean up and return success */
894 Py_DECREF(it);
895 return 0;
896
897 error:
898 /* Error handling when it != NULL */
899 Py_DECREF(it);
900 return -1;
901}
902
903/* Mostly copied from string_repr, but without the
904 "smart quote" functionality. */
905static PyObject *
906bytes_repr(PyBytesObject *self)
907{
908 static const char *hexdigits = "0123456789abcdef";
909 const char *quote_prefix = "bytearray(b";
910 const char *quote_postfix = ")";
911 Py_ssize_t length = Py_SIZE(self);
912 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
913 size_t newsize = 14 + 4 * length;
914 PyObject *v;
915 if (newsize > PY_SSIZE_T_MAX || newsize / 4 - 3 != length) {
916 PyErr_SetString(PyExc_OverflowError,
917 "bytearray object is too large to make repr");
918 return NULL;
919 }
920 v = PyUnicode_FromUnicode(NULL, newsize);
921 if (v == NULL) {
922 return NULL;
923 }
924 else {
925 register Py_ssize_t i;
926 register Py_UNICODE c;
927 register Py_UNICODE *p;
928 int quote;
929
930 /* Figure out which quote to use; single is preferred */
931 quote = '\'';
932 {
933 char *test, *start;
934 start = PyBytes_AS_STRING(self);
935 for (test = start; test < start+length; ++test) {
936 if (*test == '"') {
937 quote = '\''; /* back to single */
938 goto decided;
939 }
940 else if (*test == '\'')
941 quote = '"';
942 }
943 decided:
944 ;
945 }
946
947 p = PyUnicode_AS_UNICODE(v);
948 while (*quote_prefix)
949 *p++ = *quote_prefix++;
950 *p++ = quote;
951
952 for (i = 0; i < length; i++) {
953 /* There's at least enough room for a hex escape
954 and a closing quote. */
955 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
956 c = self->ob_bytes[i];
957 if (c == '\'' || c == '\\')
958 *p++ = '\\', *p++ = c;
959 else if (c == '\t')
960 *p++ = '\\', *p++ = 't';
961 else if (c == '\n')
962 *p++ = '\\', *p++ = 'n';
963 else if (c == '\r')
964 *p++ = '\\', *p++ = 'r';
965 else if (c == 0)
966 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
967 else if (c < ' ' || c >= 0x7f) {
968 *p++ = '\\';
969 *p++ = 'x';
970 *p++ = hexdigits[(c & 0xf0) >> 4];
971 *p++ = hexdigits[c & 0xf];
972 }
973 else
974 *p++ = c;
975 }
976 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
977 *p++ = quote;
978 while (*quote_postfix) {
979 *p++ = *quote_postfix++;
980 }
981 *p = '\0';
982 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
983 Py_DECREF(v);
984 return NULL;
985 }
986 return v;
987 }
988}
989
990static PyObject *
991bytes_str(PyObject *op)
992{
993#if 0
994 if (Py_BytesWarningFlag) {
995 if (PyErr_WarnEx(PyExc_BytesWarning,
996 "str() on a bytearray instance", 1))
997 return NULL;
998 }
999 return bytes_repr((PyBytesObject*)op);
1000#endif
1001 return PyString_FromStringAndSize(((PyBytesObject*)op)->ob_bytes, Py_SIZE(op));
1002}
1003
1004static PyObject *
1005bytes_richcompare(PyObject *self, PyObject *other, int op)
1006{
1007 Py_ssize_t self_size, other_size;
1008 Py_buffer self_bytes, other_bytes;
1009 PyObject *res;
1010 Py_ssize_t minsize;
1011 int cmp;
1012
1013 /* Bytes can be compared to anything that supports the (binary)
1014 buffer API. Except that a comparison with Unicode is always an
1015 error, even if the comparison is for equality. */
1016 if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
1017 PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
1018 if (Py_BytesWarningFlag && op == Py_EQ) {
1019 if (PyErr_WarnEx(PyExc_BytesWarning,
1020 "Comparsion between bytearray and string", 1))
1021 return NULL;
1022 }
1023
1024 Py_INCREF(Py_NotImplemented);
1025 return Py_NotImplemented;
1026 }
1027
1028 self_size = _getbuffer(self, &self_bytes);
1029 if (self_size < 0) {
1030 PyErr_Clear();
1031 Py_INCREF(Py_NotImplemented);
1032 return Py_NotImplemented;
1033 }
1034
1035 other_size = _getbuffer(other, &other_bytes);
1036 if (other_size < 0) {
1037 PyErr_Clear();
1038 PyObject_ReleaseBuffer(self, &self_bytes);
1039 Py_INCREF(Py_NotImplemented);
1040 return Py_NotImplemented;
1041 }
1042
1043 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
1044 /* Shortcut: if the lengths differ, the objects differ */
1045 cmp = (op == Py_NE);
1046 }
1047 else {
1048 minsize = self_size;
1049 if (other_size < minsize)
1050 minsize = other_size;
1051
1052 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
1053 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1054
1055 if (cmp == 0) {
1056 if (self_size < other_size)
1057 cmp = -1;
1058 else if (self_size > other_size)
1059 cmp = 1;
1060 }
1061
1062 switch (op) {
1063 case Py_LT: cmp = cmp < 0; break;
1064 case Py_LE: cmp = cmp <= 0; break;
1065 case Py_EQ: cmp = cmp == 0; break;
1066 case Py_NE: cmp = cmp != 0; break;
1067 case Py_GT: cmp = cmp > 0; break;
1068 case Py_GE: cmp = cmp >= 0; break;
1069 }
1070 }
1071
1072 res = cmp ? Py_True : Py_False;
1073 PyObject_ReleaseBuffer(self, &self_bytes);
1074 PyObject_ReleaseBuffer(other, &other_bytes);
1075 Py_INCREF(res);
1076 return res;
1077}
1078
1079static void
1080bytes_dealloc(PyBytesObject *self)
1081{
1082 if (self->ob_bytes != 0) {
1083 PyMem_Free(self->ob_bytes);
1084 }
1085 Py_TYPE(self)->tp_free((PyObject *)self);
1086}
1087
1088
1089/* -------------------------------------------------------------------- */
1090/* Methods */
1091
1092#define STRINGLIB_CHAR char
1093#define STRINGLIB_CMP memcmp
1094#define STRINGLIB_LEN PyBytes_GET_SIZE
1095#define STRINGLIB_STR PyBytes_AS_STRING
1096#define STRINGLIB_NEW PyBytes_FromStringAndSize
1097#define STRINGLIB_EMPTY nullbytes
1098#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
1099#define STRINGLIB_MUTABLE 1
1100
1101#include "stringlib/fastsearch.h"
1102#include "stringlib/count.h"
1103#include "stringlib/find.h"
1104#include "stringlib/partition.h"
1105#include "stringlib/ctype.h"
1106#include "stringlib/transmogrify.h"
1107
1108
1109/* The following Py_LOCAL_INLINE and Py_LOCAL functions
1110were copied from the old char* style string object. */
1111
1112Py_LOCAL_INLINE(void)
1113_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1114{
1115 if (*end > len)
1116 *end = len;
1117 else if (*end < 0)
1118 *end += len;
1119 if (*end < 0)
1120 *end = 0;
1121 if (*start < 0)
1122 *start += len;
1123 if (*start < 0)
1124 *start = 0;
1125}
1126
1127
1128Py_LOCAL_INLINE(Py_ssize_t)
1129bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
1130{
1131 PyObject *subobj;
1132 Py_buffer subbuf;
1133 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1134 Py_ssize_t res;
1135
1136 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1137 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1138 return -2;
1139 if (_getbuffer(subobj, &subbuf) < 0)
1140 return -2;
1141 if (dir > 0)
1142 res = stringlib_find_slice(
1143 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1144 subbuf.buf, subbuf.len, start, end);
1145 else
1146 res = stringlib_rfind_slice(
1147 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1148 subbuf.buf, subbuf.len, start, end);
1149 PyObject_ReleaseBuffer(subobj, &subbuf);
1150 return res;
1151}
1152
1153PyDoc_STRVAR(find__doc__,
1154"B.find(sub [,start [,end]]) -> int\n\
1155\n\
1156Return the lowest index in B where subsection sub is found,\n\
1157such that sub is contained within s[start,end]. Optional\n\
1158arguments start and end are interpreted as in slice notation.\n\
1159\n\
1160Return -1 on failure.");
1161
1162static PyObject *
1163bytes_find(PyBytesObject *self, PyObject *args)
1164{
1165 Py_ssize_t result = bytes_find_internal(self, args, +1);
1166 if (result == -2)
1167 return NULL;
1168 return PyInt_FromSsize_t(result);
1169}
1170
1171PyDoc_STRVAR(count__doc__,
1172"B.count(sub [,start [,end]]) -> int\n\
1173\n\
1174Return the number of non-overlapping occurrences of subsection sub in\n\
1175bytes B[start:end]. Optional arguments start and end are interpreted\n\
1176as in slice notation.");
1177
1178static PyObject *
1179bytes_count(PyBytesObject *self, PyObject *args)
1180{
1181 PyObject *sub_obj;
1182 const char *str = PyBytes_AS_STRING(self);
1183 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1184 Py_buffer vsub;
1185 PyObject *count_obj;
1186
1187 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1188 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1189 return NULL;
1190
1191 if (_getbuffer(sub_obj, &vsub) < 0)
1192 return NULL;
1193
1194 _adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
1195
1196 count_obj = PyInt_FromSsize_t(
1197 stringlib_count(str + start, end - start, vsub.buf, vsub.len)
1198 );
1199 PyObject_ReleaseBuffer(sub_obj, &vsub);
1200 return count_obj;
1201}
1202
1203
1204PyDoc_STRVAR(index__doc__,
1205"B.index(sub [,start [,end]]) -> int\n\
1206\n\
1207Like B.find() but raise ValueError when the subsection is not found.");
1208
1209static PyObject *
1210bytes_index(PyBytesObject *self, PyObject *args)
1211{
1212 Py_ssize_t result = bytes_find_internal(self, args, +1);
1213 if (result == -2)
1214 return NULL;
1215 if (result == -1) {
1216 PyErr_SetString(PyExc_ValueError,
1217 "subsection not found");
1218 return NULL;
1219 }
1220 return PyInt_FromSsize_t(result);
1221}
1222
1223
1224PyDoc_STRVAR(rfind__doc__,
1225"B.rfind(sub [,start [,end]]) -> int\n\
1226\n\
1227Return the highest index in B where subsection sub is found,\n\
1228such that sub is contained within s[start,end]. Optional\n\
1229arguments start and end are interpreted as in slice notation.\n\
1230\n\
1231Return -1 on failure.");
1232
1233static PyObject *
1234bytes_rfind(PyBytesObject *self, PyObject *args)
1235{
1236 Py_ssize_t result = bytes_find_internal(self, args, -1);
1237 if (result == -2)
1238 return NULL;
1239 return PyInt_FromSsize_t(result);
1240}
1241
1242
1243PyDoc_STRVAR(rindex__doc__,
1244"B.rindex(sub [,start [,end]]) -> int\n\
1245\n\
1246Like B.rfind() but raise ValueError when the subsection is not found.");
1247
1248static PyObject *
1249bytes_rindex(PyBytesObject *self, PyObject *args)
1250{
1251 Py_ssize_t result = bytes_find_internal(self, args, -1);
1252 if (result == -2)
1253 return NULL;
1254 if (result == -1) {
1255 PyErr_SetString(PyExc_ValueError,
1256 "subsection not found");
1257 return NULL;
1258 }
1259 return PyInt_FromSsize_t(result);
1260}
1261
1262
1263static int
1264bytes_contains(PyObject *self, PyObject *arg)
1265{
1266 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1267 if (ival == -1 && PyErr_Occurred()) {
1268 Py_buffer varg;
1269 int pos;
1270 PyErr_Clear();
1271 if (_getbuffer(arg, &varg) < 0)
1272 return -1;
1273 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
1274 varg.buf, varg.len, 0);
1275 PyObject_ReleaseBuffer(arg, &varg);
1276 return pos >= 0;
1277 }
1278 if (ival < 0 || ival >= 256) {
1279 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1280 return -1;
1281 }
1282
1283 return memchr(PyBytes_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
1284}
1285
1286
1287/* Matches the end (direction >= 0) or start (direction < 0) of self
1288 * against substr, using the start and end arguments. Returns
1289 * -1 on error, 0 if not found and 1 if found.
1290 */
1291Py_LOCAL(int)
1292_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
1293 Py_ssize_t end, int direction)
1294{
1295 Py_ssize_t len = PyBytes_GET_SIZE(self);
1296 const char* str;
1297 Py_buffer vsubstr;
1298 int rv = 0;
1299
1300 str = PyBytes_AS_STRING(self);
1301
1302 if (_getbuffer(substr, &vsubstr) < 0)
1303 return -1;
1304
1305 _adjust_indices(&start, &end, len);
1306
1307 if (direction < 0) {
1308 /* startswith */
1309 if (start+vsubstr.len > len) {
1310 goto done;
1311 }
1312 } else {
1313 /* endswith */
1314 if (end-start < vsubstr.len || start > len) {
1315 goto done;
1316 }
1317
1318 if (end-vsubstr.len > start)
1319 start = end - vsubstr.len;
1320 }
1321 if (end-start >= vsubstr.len)
1322 rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1323
1324done:
1325 PyObject_ReleaseBuffer(substr, &vsubstr);
1326 return rv;
1327}
1328
1329
1330PyDoc_STRVAR(startswith__doc__,
1331"B.startswith(prefix [,start [,end]]) -> bool\n\
1332\n\
1333Return True if B starts with the specified prefix, False otherwise.\n\
1334With optional start, test B beginning at that position.\n\
1335With optional end, stop comparing B at that position.\n\
1336prefix can also be a tuple of strings to try.");
1337
1338static PyObject *
1339bytes_startswith(PyBytesObject *self, PyObject *args)
1340{
1341 Py_ssize_t start = 0;
1342 Py_ssize_t end = PY_SSIZE_T_MAX;
1343 PyObject *subobj;
1344 int result;
1345
1346 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1347 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1348 return NULL;
1349 if (PyTuple_Check(subobj)) {
1350 Py_ssize_t i;
1351 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1352 result = _bytes_tailmatch(self,
1353 PyTuple_GET_ITEM(subobj, i),
1354 start, end, -1);
1355 if (result == -1)
1356 return NULL;
1357 else if (result) {
1358 Py_RETURN_TRUE;
1359 }
1360 }
1361 Py_RETURN_FALSE;
1362 }
1363 result = _bytes_tailmatch(self, subobj, start, end, -1);
1364 if (result == -1)
1365 return NULL;
1366 else
1367 return PyBool_FromLong(result);
1368}
1369
1370PyDoc_STRVAR(endswith__doc__,
1371"B.endswith(suffix [,start [,end]]) -> bool\n\
1372\n\
1373Return True if B ends with the specified suffix, False otherwise.\n\
1374With optional start, test B beginning at that position.\n\
1375With optional end, stop comparing B at that position.\n\
1376suffix can also be a tuple of strings to try.");
1377
1378static PyObject *
1379bytes_endswith(PyBytesObject *self, PyObject *args)
1380{
1381 Py_ssize_t start = 0;
1382 Py_ssize_t end = PY_SSIZE_T_MAX;
1383 PyObject *subobj;
1384 int result;
1385
1386 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1387 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1388 return NULL;
1389 if (PyTuple_Check(subobj)) {
1390 Py_ssize_t i;
1391 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1392 result = _bytes_tailmatch(self,
1393 PyTuple_GET_ITEM(subobj, i),
1394 start, end, +1);
1395 if (result == -1)
1396 return NULL;
1397 else if (result) {
1398 Py_RETURN_TRUE;
1399 }
1400 }
1401 Py_RETURN_FALSE;
1402 }
1403 result = _bytes_tailmatch(self, subobj, start, end, +1);
1404 if (result == -1)
1405 return NULL;
1406 else
1407 return PyBool_FromLong(result);
1408}
1409
1410
1411PyDoc_STRVAR(translate__doc__,
1412"B.translate(table[, deletechars]) -> bytearray\n\
1413\n\
1414Return a copy of B, where all characters occurring in the\n\
1415optional argument deletechars are removed, and the remaining\n\
1416characters have been mapped through the given translation\n\
1417table, which must be a bytes object of length 256.");
1418
1419static PyObject *
1420bytes_translate(PyBytesObject *self, PyObject *args)
1421{
1422 register char *input, *output;
1423 register const char *table;
1424 register Py_ssize_t i, c, changed = 0;
1425 PyObject *input_obj = (PyObject*)self;
1426 const char *output_start;
1427 Py_ssize_t inlen;
1428 PyObject *result;
1429 int trans_table[256];
1430 PyObject *tableobj, *delobj = NULL;
1431 Py_buffer vtable, vdel;
1432
1433 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1434 &tableobj, &delobj))
1435 return NULL;
1436
1437 if (_getbuffer(tableobj, &vtable) < 0)
1438 return NULL;
1439
1440 if (vtable.len != 256) {
1441 PyErr_SetString(PyExc_ValueError,
1442 "translation table must be 256 characters long");
1443 result = NULL;
1444 goto done;
1445 }
1446
1447 if (delobj != NULL) {
1448 if (_getbuffer(delobj, &vdel) < 0) {
1449 result = NULL;
1450 goto done;
1451 }
1452 }
1453 else {
1454 vdel.buf = NULL;
1455 vdel.len = 0;
1456 }
1457
1458 table = (const char *)vtable.buf;
1459 inlen = PyBytes_GET_SIZE(input_obj);
1460 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1461 if (result == NULL)
1462 goto done;
1463 output_start = output = PyBytes_AsString(result);
1464 input = PyBytes_AS_STRING(input_obj);
1465
1466 if (vdel.len == 0) {
1467 /* If no deletions are required, use faster code */
1468 for (i = inlen; --i >= 0; ) {
1469 c = Py_CHARMASK(*input++);
1470 if (Py_CHARMASK((*output++ = table[c])) != c)
1471 changed = 1;
1472 }
1473 if (changed || !PyBytes_CheckExact(input_obj))
1474 goto done;
1475 Py_DECREF(result);
1476 Py_INCREF(input_obj);
1477 result = input_obj;
1478 goto done;
1479 }
1480
1481 for (i = 0; i < 256; i++)
1482 trans_table[i] = Py_CHARMASK(table[i]);
1483
1484 for (i = 0; i < vdel.len; i++)
1485 trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
1486
1487 for (i = inlen; --i >= 0; ) {
1488 c = Py_CHARMASK(*input++);
1489 if (trans_table[c] != -1)
1490 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1491 continue;
1492 changed = 1;
1493 }
1494 if (!changed && PyBytes_CheckExact(input_obj)) {
1495 Py_DECREF(result);
1496 Py_INCREF(input_obj);
1497 result = input_obj;
1498 goto done;
1499 }
1500 /* Fix the size of the resulting string */
1501 if (inlen > 0)
1502 PyBytes_Resize(result, output - output_start);
1503
1504done:
1505 PyObject_ReleaseBuffer(tableobj, &vtable);
1506 if (delobj != NULL)
1507 PyObject_ReleaseBuffer(delobj, &vdel);
1508 return result;
1509}
1510
1511
1512#define FORWARD 1
1513#define REVERSE -1
1514
1515/* find and count characters and substrings */
1516
1517#define findchar(target, target_len, c) \
1518 ((char *)memchr((const void *)(target), c, target_len))
1519
1520/* Don't call if length < 2 */
1521#define Py_STRING_MATCH(target, offset, pattern, length) \
1522 (target[offset] == pattern[0] && \
1523 target[offset+length-1] == pattern[length-1] && \
1524 !memcmp(target+offset+1, pattern+1, length-2) )
1525
1526
1527/* Bytes ops must return a string. */
1528/* If the object is subclass of bytes, create a copy */
1529Py_LOCAL(PyBytesObject *)
1530return_self(PyBytesObject *self)
1531{
1532 if (PyBytes_CheckExact(self)) {
1533 Py_INCREF(self);
1534 return (PyBytesObject *)self;
1535 }
1536 return (PyBytesObject *)PyBytes_FromStringAndSize(
1537 PyBytes_AS_STRING(self),
1538 PyBytes_GET_SIZE(self));
1539}
1540
1541Py_LOCAL_INLINE(Py_ssize_t)
1542countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
1543{
1544 Py_ssize_t count=0;
1545 const char *start=target;
1546 const char *end=target+target_len;
1547
1548 while ( (start=findchar(start, end-start, c)) != NULL ) {
1549 count++;
1550 if (count >= maxcount)
1551 break;
1552 start += 1;
1553 }
1554 return count;
1555}
1556
1557Py_LOCAL(Py_ssize_t)
1558findstring(const char *target, Py_ssize_t target_len,
1559 const char *pattern, Py_ssize_t pattern_len,
1560 Py_ssize_t start,
1561 Py_ssize_t end,
1562 int direction)
1563{
1564 if (start < 0) {
1565 start += target_len;
1566 if (start < 0)
1567 start = 0;
1568 }
1569 if (end > target_len) {
1570 end = target_len;
1571 } else if (end < 0) {
1572 end += target_len;
1573 if (end < 0)
1574 end = 0;
1575 }
1576
1577 /* zero-length substrings always match at the first attempt */
1578 if (pattern_len == 0)
1579 return (direction > 0) ? start : end;
1580
1581 end -= pattern_len;
1582
1583 if (direction < 0) {
1584 for (; end >= start; end--)
1585 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1586 return end;
1587 } else {
1588 for (; start <= end; start++)
1589 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1590 return start;
1591 }
1592 return -1;
1593}
1594
1595Py_LOCAL_INLINE(Py_ssize_t)
1596countstring(const char *target, Py_ssize_t target_len,
1597 const char *pattern, Py_ssize_t pattern_len,
1598 Py_ssize_t start,
1599 Py_ssize_t end,
1600 int direction, Py_ssize_t maxcount)
1601{
1602 Py_ssize_t count=0;
1603
1604 if (start < 0) {
1605 start += target_len;
1606 if (start < 0)
1607 start = 0;
1608 }
1609 if (end > target_len) {
1610 end = target_len;
1611 } else if (end < 0) {
1612 end += target_len;
1613 if (end < 0)
1614 end = 0;
1615 }
1616
1617 /* zero-length substrings match everywhere */
1618 if (pattern_len == 0 || maxcount == 0) {
1619 if (target_len+1 < maxcount)
1620 return target_len+1;
1621 return maxcount;
1622 }
1623
1624 end -= pattern_len;
1625 if (direction < 0) {
1626 for (; (end >= start); end--)
1627 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1628 count++;
1629 if (--maxcount <= 0) break;
1630 end -= pattern_len-1;
1631 }
1632 } else {
1633 for (; (start <= end); start++)
1634 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1635 count++;
1636 if (--maxcount <= 0)
1637 break;
1638 start += pattern_len-1;
1639 }
1640 }
1641 return count;
1642}
1643
1644
1645/* Algorithms for different cases of string replacement */
1646
1647/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1648Py_LOCAL(PyBytesObject *)
1649replace_interleave(PyBytesObject *self,
1650 const char *to_s, Py_ssize_t to_len,
1651 Py_ssize_t maxcount)
1652{
1653 char *self_s, *result_s;
1654 Py_ssize_t self_len, result_len;
1655 Py_ssize_t count, i, product;
1656 PyBytesObject *result;
1657
1658 self_len = PyBytes_GET_SIZE(self);
1659
1660 /* 1 at the end plus 1 after every character */
1661 count = self_len+1;
1662 if (maxcount < count)
1663 count = maxcount;
1664
1665 /* Check for overflow */
1666 /* result_len = count * to_len + self_len; */
1667 product = count * to_len;
1668 if (product / to_len != count) {
1669 PyErr_SetString(PyExc_OverflowError,
1670 "replace string is too long");
1671 return NULL;
1672 }
1673 result_len = product + self_len;
1674 if (result_len < 0) {
1675 PyErr_SetString(PyExc_OverflowError,
1676 "replace string is too long");
1677 return NULL;
1678 }
1679
1680 if (! (result = (PyBytesObject *)
1681 PyBytes_FromStringAndSize(NULL, result_len)) )
1682 return NULL;
1683
1684 self_s = PyBytes_AS_STRING(self);
1685 result_s = PyBytes_AS_STRING(result);
1686
1687 /* TODO: special case single character, which doesn't need memcpy */
1688
1689 /* Lay the first one down (guaranteed this will occur) */
1690 Py_MEMCPY(result_s, to_s, to_len);
1691 result_s += to_len;
1692 count -= 1;
1693
1694 for (i=0; i<count; i++) {
1695 *result_s++ = *self_s++;
1696 Py_MEMCPY(result_s, to_s, to_len);
1697 result_s += to_len;
1698 }
1699
1700 /* Copy the rest of the original string */
1701 Py_MEMCPY(result_s, self_s, self_len-i);
1702
1703 return result;
1704}
1705
1706/* Special case for deleting a single character */
1707/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1708Py_LOCAL(PyBytesObject *)
1709replace_delete_single_character(PyBytesObject *self,
1710 char from_c, Py_ssize_t maxcount)
1711{
1712 char *self_s, *result_s;
1713 char *start, *next, *end;
1714 Py_ssize_t self_len, result_len;
1715 Py_ssize_t count;
1716 PyBytesObject *result;
1717
1718 self_len = PyBytes_GET_SIZE(self);
1719 self_s = PyBytes_AS_STRING(self);
1720
1721 count = countchar(self_s, self_len, from_c, maxcount);
1722 if (count == 0) {
1723 return return_self(self);
1724 }
1725
1726 result_len = self_len - count; /* from_len == 1 */
1727 assert(result_len>=0);
1728
1729 if ( (result = (PyBytesObject *)
1730 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1731 return NULL;
1732 result_s = PyBytes_AS_STRING(result);
1733
1734 start = self_s;
1735 end = self_s + self_len;
1736 while (count-- > 0) {
1737 next = findchar(start, end-start, from_c);
1738 if (next == NULL)
1739 break;
1740 Py_MEMCPY(result_s, start, next-start);
1741 result_s += (next-start);
1742 start = next+1;
1743 }
1744 Py_MEMCPY(result_s, start, end-start);
1745
1746 return result;
1747}
1748
1749/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1750
1751Py_LOCAL(PyBytesObject *)
1752replace_delete_substring(PyBytesObject *self,
1753 const char *from_s, Py_ssize_t from_len,
1754 Py_ssize_t maxcount)
1755{
1756 char *self_s, *result_s;
1757 char *start, *next, *end;
1758 Py_ssize_t self_len, result_len;
1759 Py_ssize_t count, offset;
1760 PyBytesObject *result;
1761
1762 self_len = PyBytes_GET_SIZE(self);
1763 self_s = PyBytes_AS_STRING(self);
1764
1765 count = countstring(self_s, self_len,
1766 from_s, from_len,
1767 0, self_len, 1,
1768 maxcount);
1769
1770 if (count == 0) {
1771 /* no matches */
1772 return return_self(self);
1773 }
1774
1775 result_len = self_len - (count * from_len);
1776 assert (result_len>=0);
1777
1778 if ( (result = (PyBytesObject *)
1779 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1780 return NULL;
1781
1782 result_s = PyBytes_AS_STRING(result);
1783
1784 start = self_s;
1785 end = self_s + self_len;
1786 while (count-- > 0) {
1787 offset = findstring(start, end-start,
1788 from_s, from_len,
1789 0, end-start, FORWARD);
1790 if (offset == -1)
1791 break;
1792 next = start + offset;
1793
1794 Py_MEMCPY(result_s, start, next-start);
1795
1796 result_s += (next-start);
1797 start = next+from_len;
1798 }
1799 Py_MEMCPY(result_s, start, end-start);
1800 return result;
1801}
1802
1803/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1804Py_LOCAL(PyBytesObject *)
1805replace_single_character_in_place(PyBytesObject *self,
1806 char from_c, char to_c,
1807 Py_ssize_t maxcount)
1808{
1809 char *self_s, *result_s, *start, *end, *next;
1810 Py_ssize_t self_len;
1811 PyBytesObject *result;
1812
1813 /* The result string will be the same size */
1814 self_s = PyBytes_AS_STRING(self);
1815 self_len = PyBytes_GET_SIZE(self);
1816
1817 next = findchar(self_s, self_len, from_c);
1818
1819 if (next == NULL) {
1820 /* No matches; return the original bytes */
1821 return return_self(self);
1822 }
1823
1824 /* Need to make a new bytes */
1825 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1826 if (result == NULL)
1827 return NULL;
1828 result_s = PyBytes_AS_STRING(result);
1829 Py_MEMCPY(result_s, self_s, self_len);
1830
1831 /* change everything in-place, starting with this one */
1832 start = result_s + (next-self_s);
1833 *start = to_c;
1834 start++;
1835 end = result_s + self_len;
1836
1837 while (--maxcount > 0) {
1838 next = findchar(start, end-start, from_c);
1839 if (next == NULL)
1840 break;
1841 *next = to_c;
1842 start = next+1;
1843 }
1844
1845 return result;
1846}
1847
1848/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1849Py_LOCAL(PyBytesObject *)
1850replace_substring_in_place(PyBytesObject *self,
1851 const char *from_s, Py_ssize_t from_len,
1852 const char *to_s, Py_ssize_t to_len,
1853 Py_ssize_t maxcount)
1854{
1855 char *result_s, *start, *end;
1856 char *self_s;
1857 Py_ssize_t self_len, offset;
1858 PyBytesObject *result;
1859
1860 /* The result bytes will be the same size */
1861
1862 self_s = PyBytes_AS_STRING(self);
1863 self_len = PyBytes_GET_SIZE(self);
1864
1865 offset = findstring(self_s, self_len,
1866 from_s, from_len,
1867 0, self_len, FORWARD);
1868 if (offset == -1) {
1869 /* No matches; return the original bytes */
1870 return return_self(self);
1871 }
1872
1873 /* Need to make a new bytes */
1874 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1875 if (result == NULL)
1876 return NULL;
1877 result_s = PyBytes_AS_STRING(result);
1878 Py_MEMCPY(result_s, self_s, self_len);
1879
1880 /* change everything in-place, starting with this one */
1881 start = result_s + offset;
1882 Py_MEMCPY(start, to_s, from_len);
1883 start += from_len;
1884 end = result_s + self_len;
1885
1886 while ( --maxcount > 0) {
1887 offset = findstring(start, end-start,
1888 from_s, from_len,
1889 0, end-start, FORWARD);
1890 if (offset==-1)
1891 break;
1892 Py_MEMCPY(start+offset, to_s, from_len);
1893 start += offset+from_len;
1894 }
1895
1896 return result;
1897}
1898
1899/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1900Py_LOCAL(PyBytesObject *)
1901replace_single_character(PyBytesObject *self,
1902 char from_c,
1903 const char *to_s, Py_ssize_t to_len,
1904 Py_ssize_t maxcount)
1905{
1906 char *self_s, *result_s;
1907 char *start, *next, *end;
1908 Py_ssize_t self_len, result_len;
1909 Py_ssize_t count, product;
1910 PyBytesObject *result;
1911
1912 self_s = PyBytes_AS_STRING(self);
1913 self_len = PyBytes_GET_SIZE(self);
1914
1915 count = countchar(self_s, self_len, from_c, maxcount);
1916 if (count == 0) {
1917 /* no matches, return unchanged */
1918 return return_self(self);
1919 }
1920
1921 /* use the difference between current and new, hence the "-1" */
1922 /* result_len = self_len + count * (to_len-1) */
1923 product = count * (to_len-1);
1924 if (product / (to_len-1) != count) {
1925 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1926 return NULL;
1927 }
1928 result_len = self_len + product;
1929 if (result_len < 0) {
1930 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1931 return NULL;
1932 }
1933
1934 if ( (result = (PyBytesObject *)
1935 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1936 return NULL;
1937 result_s = PyBytes_AS_STRING(result);
1938
1939 start = self_s;
1940 end = self_s + self_len;
1941 while (count-- > 0) {
1942 next = findchar(start, end-start, from_c);
1943 if (next == NULL)
1944 break;
1945
1946 if (next == start) {
1947 /* replace with the 'to' */
1948 Py_MEMCPY(result_s, to_s, to_len);
1949 result_s += to_len;
1950 start += 1;
1951 } else {
1952 /* copy the unchanged old then the 'to' */
1953 Py_MEMCPY(result_s, start, next-start);
1954 result_s += (next-start);
1955 Py_MEMCPY(result_s, to_s, to_len);
1956 result_s += to_len;
1957 start = next+1;
1958 }
1959 }
1960 /* Copy the remainder of the remaining bytes */
1961 Py_MEMCPY(result_s, start, end-start);
1962
1963 return result;
1964}
1965
1966/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1967Py_LOCAL(PyBytesObject *)
1968replace_substring(PyBytesObject *self,
1969 const char *from_s, Py_ssize_t from_len,
1970 const char *to_s, Py_ssize_t to_len,
1971 Py_ssize_t maxcount)
1972{
1973 char *self_s, *result_s;
1974 char *start, *next, *end;
1975 Py_ssize_t self_len, result_len;
1976 Py_ssize_t count, offset, product;
1977 PyBytesObject *result;
1978
1979 self_s = PyBytes_AS_STRING(self);
1980 self_len = PyBytes_GET_SIZE(self);
1981
1982 count = countstring(self_s, self_len,
1983 from_s, from_len,
1984 0, self_len, FORWARD, maxcount);
1985 if (count == 0) {
1986 /* no matches, return unchanged */
1987 return return_self(self);
1988 }
1989
1990 /* Check for overflow */
1991 /* result_len = self_len + count * (to_len-from_len) */
1992 product = count * (to_len-from_len);
1993 if (product / (to_len-from_len) != count) {
1994 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1995 return NULL;
1996 }
1997 result_len = self_len + product;
1998 if (result_len < 0) {
1999 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
2000 return NULL;
2001 }
2002
2003 if ( (result = (PyBytesObject *)
2004 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2005 return NULL;
2006 result_s = PyBytes_AS_STRING(result);
2007
2008 start = self_s;
2009 end = self_s + self_len;
2010 while (count-- > 0) {
2011 offset = findstring(start, end-start,
2012 from_s, from_len,
2013 0, end-start, FORWARD);
2014 if (offset == -1)
2015 break;
2016 next = start+offset;
2017 if (next == start) {
2018 /* replace with the 'to' */
2019 Py_MEMCPY(result_s, to_s, to_len);
2020 result_s += to_len;
2021 start += from_len;
2022 } else {
2023 /* copy the unchanged old then the 'to' */
2024 Py_MEMCPY(result_s, start, next-start);
2025 result_s += (next-start);
2026 Py_MEMCPY(result_s, to_s, to_len);
2027 result_s += to_len;
2028 start = next+from_len;
2029 }
2030 }
2031 /* Copy the remainder of the remaining bytes */
2032 Py_MEMCPY(result_s, start, end-start);
2033
2034 return result;
2035}
2036
2037
2038Py_LOCAL(PyBytesObject *)
2039replace(PyBytesObject *self,
2040 const char *from_s, Py_ssize_t from_len,
2041 const char *to_s, Py_ssize_t to_len,
2042 Py_ssize_t maxcount)
2043{
2044 if (maxcount < 0) {
2045 maxcount = PY_SSIZE_T_MAX;
2046 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2047 /* nothing to do; return the original bytes */
2048 return return_self(self);
2049 }
2050
2051 if (maxcount == 0 ||
2052 (from_len == 0 && to_len == 0)) {
2053 /* nothing to do; return the original bytes */
2054 return return_self(self);
2055 }
2056
2057 /* Handle zero-length special cases */
2058
2059 if (from_len == 0) {
2060 /* insert the 'to' bytes everywhere. */
2061 /* >>> "Python".replace("", ".") */
2062 /* '.P.y.t.h.o.n.' */
2063 return replace_interleave(self, to_s, to_len, maxcount);
2064 }
2065
2066 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2067 /* point for an empty self bytes to generate a non-empty bytes */
2068 /* Special case so the remaining code always gets a non-empty bytes */
2069 if (PyBytes_GET_SIZE(self) == 0) {
2070 return return_self(self);
2071 }
2072
2073 if (to_len == 0) {
2074 /* delete all occurances of 'from' bytes */
2075 if (from_len == 1) {
2076 return replace_delete_single_character(
2077 self, from_s[0], maxcount);
2078 } else {
2079 return replace_delete_substring(self, from_s, from_len, maxcount);
2080 }
2081 }
2082
2083 /* Handle special case where both bytes have the same length */
2084
2085 if (from_len == to_len) {
2086 if (from_len == 1) {
2087 return replace_single_character_in_place(
2088 self,
2089 from_s[0],
2090 to_s[0],
2091 maxcount);
2092 } else {
2093 return replace_substring_in_place(
2094 self, from_s, from_len, to_s, to_len, maxcount);
2095 }
2096 }
2097
2098 /* Otherwise use the more generic algorithms */
2099 if (from_len == 1) {
2100 return replace_single_character(self, from_s[0],
2101 to_s, to_len, maxcount);
2102 } else {
2103 /* len('from')>=2, len('to')>=1 */
2104 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2105 }
2106}
2107
2108
2109PyDoc_STRVAR(replace__doc__,
2110"B.replace(old, new[, count]) -> bytes\n\
2111\n\
2112Return a copy of B with all occurrences of subsection\n\
2113old replaced by new. If the optional argument count is\n\
2114given, only the first count occurrences are replaced.");
2115
2116static PyObject *
2117bytes_replace(PyBytesObject *self, PyObject *args)
2118{
2119 Py_ssize_t count = -1;
2120 PyObject *from, *to, *res;
2121 Py_buffer vfrom, vto;
2122
2123 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2124 return NULL;
2125
2126 if (_getbuffer(from, &vfrom) < 0)
2127 return NULL;
2128 if (_getbuffer(to, &vto) < 0) {
2129 PyObject_ReleaseBuffer(from, &vfrom);
2130 return NULL;
2131 }
2132
2133 res = (PyObject *)replace((PyBytesObject *) self,
2134 vfrom.buf, vfrom.len,
2135 vto.buf, vto.len, count);
2136
2137 PyObject_ReleaseBuffer(from, &vfrom);
2138 PyObject_ReleaseBuffer(to, &vto);
2139 return res;
2140}
2141
2142
2143/* Overallocate the initial list to reduce the number of reallocs for small
2144 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2145 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2146 text (roughly 11 words per line) and field delimited data (usually 1-10
2147 fields). For large strings the split algorithms are bandwidth limited
2148 so increasing the preallocation likely will not improve things.*/
2149
2150#define MAX_PREALLOC 12
2151
2152/* 5 splits gives 6 elements */
2153#define PREALLOC_SIZE(maxsplit) \
2154 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2155
2156#define SPLIT_APPEND(data, left, right) \
2157 str = PyBytes_FromStringAndSize((data) + (left), \
2158 (right) - (left)); \
2159 if (str == NULL) \
2160 goto onError; \
2161 if (PyList_Append(list, str)) { \
2162 Py_DECREF(str); \
2163 goto onError; \
2164 } \
2165 else \
2166 Py_DECREF(str);
2167
2168#define SPLIT_ADD(data, left, right) { \
2169 str = PyBytes_FromStringAndSize((data) + (left), \
2170 (right) - (left)); \
2171 if (str == NULL) \
2172 goto onError; \
2173 if (count < MAX_PREALLOC) { \
2174 PyList_SET_ITEM(list, count, str); \
2175 } else { \
2176 if (PyList_Append(list, str)) { \
2177 Py_DECREF(str); \
2178 goto onError; \
2179 } \
2180 else \
2181 Py_DECREF(str); \
2182 } \
2183 count++; }
2184
2185/* Always force the list to the expected size. */
2186#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
2187
2188
2189Py_LOCAL_INLINE(PyObject *)
2190split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2191{
2192 register Py_ssize_t i, j, count = 0;
2193 PyObject *str;
2194 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2195
2196 if (list == NULL)
2197 return NULL;
2198
2199 i = j = 0;
2200 while ((j < len) && (maxcount-- > 0)) {
2201 for(; j < len; j++) {
2202 /* I found that using memchr makes no difference */
2203 if (s[j] == ch) {
2204 SPLIT_ADD(s, i, j);
2205 i = j = j + 1;
2206 break;
2207 }
2208 }
2209 }
2210 if (i <= len) {
2211 SPLIT_ADD(s, i, len);
2212 }
2213 FIX_PREALLOC_SIZE(list);
2214 return list;
2215
2216 onError:
2217 Py_DECREF(list);
2218 return NULL;
2219}
2220
2221
2222Py_LOCAL_INLINE(PyObject *)
2223split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2224{
2225 register Py_ssize_t i, j, count = 0;
2226 PyObject *str;
2227 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2228
2229 if (list == NULL)
2230 return NULL;
2231
2232 for (i = j = 0; i < len; ) {
2233 /* find a token */
2234 while (i < len && ISSPACE(s[i]))
2235 i++;
2236 j = i;
2237 while (i < len && !ISSPACE(s[i]))
2238 i++;
2239 if (j < i) {
2240 if (maxcount-- <= 0)
2241 break;
2242 SPLIT_ADD(s, j, i);
2243 while (i < len && ISSPACE(s[i]))
2244 i++;
2245 j = i;
2246 }
2247 }
2248 if (j < len) {
2249 SPLIT_ADD(s, j, len);
2250 }
2251 FIX_PREALLOC_SIZE(list);
2252 return list;
2253
2254 onError:
2255 Py_DECREF(list);
2256 return NULL;
2257}
2258
2259PyDoc_STRVAR(split__doc__,
2260"B.split([sep[, maxsplit]]) -> list of bytearray\n\
2261\n\
2262Return a list of the sections in B, using sep as the delimiter.\n\
2263If sep is not given, B is split on ASCII whitespace characters\n\
2264(space, tab, return, newline, formfeed, vertical tab).\n\
2265If maxsplit is given, at most maxsplit splits are done.");
2266
2267static PyObject *
2268bytes_split(PyBytesObject *self, PyObject *args)
2269{
2270 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2271 Py_ssize_t maxsplit = -1, count = 0;
2272 const char *s = PyBytes_AS_STRING(self), *sub;
2273 PyObject *list, *str, *subobj = Py_None;
2274 Py_buffer vsub;
2275#ifdef USE_FAST
2276 Py_ssize_t pos;
2277#endif
2278
2279 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
2280 return NULL;
2281 if (maxsplit < 0)
2282 maxsplit = PY_SSIZE_T_MAX;
2283
2284 if (subobj == Py_None)
2285 return split_whitespace(s, len, maxsplit);
2286
2287 if (_getbuffer(subobj, &vsub) < 0)
2288 return NULL;
2289 sub = vsub.buf;
2290 n = vsub.len;
2291
2292 if (n == 0) {
2293 PyErr_SetString(PyExc_ValueError, "empty separator");
2294 PyObject_ReleaseBuffer(subobj, &vsub);
2295 return NULL;
2296 }
2297 if (n == 1)
2298 return split_char(s, len, sub[0], maxsplit);
2299
2300 list = PyList_New(PREALLOC_SIZE(maxsplit));
2301 if (list == NULL) {
2302 PyObject_ReleaseBuffer(subobj, &vsub);
2303 return NULL;
2304 }
2305
2306#ifdef USE_FAST
2307 i = j = 0;
2308 while (maxsplit-- > 0) {
2309 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2310 if (pos < 0)
2311 break;
2312 j = i+pos;
2313 SPLIT_ADD(s, i, j);
2314 i = j + n;
2315 }
2316#else
2317 i = j = 0;
2318 while ((j+n <= len) && (maxsplit-- > 0)) {
2319 for (; j+n <= len; j++) {
2320 if (Py_STRING_MATCH(s, j, sub, n)) {
2321 SPLIT_ADD(s, i, j);
2322 i = j = j + n;
2323 break;
2324 }
2325 }
2326 }
2327#endif
2328 SPLIT_ADD(s, i, len);
2329 FIX_PREALLOC_SIZE(list);
2330 PyObject_ReleaseBuffer(subobj, &vsub);
2331 return list;
2332
2333 onError:
2334 Py_DECREF(list);
2335 PyObject_ReleaseBuffer(subobj, &vsub);
2336 return NULL;
2337}
2338
2339/* stringlib's partition shares nullbytes in some cases.
2340 undo this, we don't want the nullbytes to be shared. */
2341static PyObject *
2342make_nullbytes_unique(PyObject *result)
2343{
2344 if (result != NULL) {
2345 int i;
2346 assert(PyTuple_Check(result));
2347 assert(PyTuple_GET_SIZE(result) == 3);
2348 for (i = 0; i < 3; i++) {
2349 if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) {
2350 PyObject *new = PyBytes_FromStringAndSize(NULL, 0);
2351 if (new == NULL) {
2352 Py_DECREF(result);
2353 result = NULL;
2354 break;
2355 }
2356 Py_DECREF(nullbytes);
2357 PyTuple_SET_ITEM(result, i, new);
2358 }
2359 }
2360 }
2361 return result;
2362}
2363
2364PyDoc_STRVAR(partition__doc__,
2365"B.partition(sep) -> (head, sep, tail)\n\
2366\n\
2367Searches for the separator sep in B, and returns the part before it,\n\
2368the separator itself, and the part after it. If the separator is not\n\
2369found, returns B and two empty bytearray objects.");
2370
2371static PyObject *
2372bytes_partition(PyBytesObject *self, PyObject *sep_obj)
2373{
2374 PyObject *bytesep, *result;
2375
2376 bytesep = PyBytes_FromObject(sep_obj);
2377 if (! bytesep)
2378 return NULL;
2379
2380 result = stringlib_partition(
2381 (PyObject*) self,
2382 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2383 bytesep,
2384 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2385 );
2386
2387 Py_DECREF(bytesep);
2388 return make_nullbytes_unique(result);
2389}
2390
2391PyDoc_STRVAR(rpartition__doc__,
2392"B.rpartition(sep) -> (tail, sep, head)\n\
2393\n\
2394Searches for the separator sep in B, starting at the end of B,\n\
2395and returns the part before it, the separator itself, and the\n\
2396part after it. If the separator is not found, returns two empty\n\
2397bytearray objects and B.");
2398
2399static PyObject *
2400bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
2401{
2402 PyObject *bytesep, *result;
2403
2404 bytesep = PyBytes_FromObject(sep_obj);
2405 if (! bytesep)
2406 return NULL;
2407
2408 result = stringlib_rpartition(
2409 (PyObject*) self,
2410 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2411 bytesep,
2412 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2413 );
2414
2415 Py_DECREF(bytesep);
2416 return make_nullbytes_unique(result);
2417}
2418
2419Py_LOCAL_INLINE(PyObject *)
2420rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2421{
2422 register Py_ssize_t i, j, count=0;
2423 PyObject *str;
2424 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2425
2426 if (list == NULL)
2427 return NULL;
2428
2429 i = j = len - 1;
2430 while ((i >= 0) && (maxcount-- > 0)) {
2431 for (; i >= 0; i--) {
2432 if (s[i] == ch) {
2433 SPLIT_ADD(s, i + 1, j + 1);
2434 j = i = i - 1;
2435 break;
2436 }
2437 }
2438 }
2439 if (j >= -1) {
2440 SPLIT_ADD(s, 0, j + 1);
2441 }
2442 FIX_PREALLOC_SIZE(list);
2443 if (PyList_Reverse(list) < 0)
2444 goto onError;
2445
2446 return list;
2447
2448 onError:
2449 Py_DECREF(list);
2450 return NULL;
2451}
2452
2453Py_LOCAL_INLINE(PyObject *)
2454rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2455{
2456 register Py_ssize_t i, j, count = 0;
2457 PyObject *str;
2458 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2459
2460 if (list == NULL)
2461 return NULL;
2462
2463 for (i = j = len - 1; i >= 0; ) {
2464 /* find a token */
2465 while (i >= 0 && ISSPACE(s[i]))
2466 i--;
2467 j = i;
2468 while (i >= 0 && !ISSPACE(s[i]))
2469 i--;
2470 if (j > i) {
2471 if (maxcount-- <= 0)
2472 break;
2473 SPLIT_ADD(s, i + 1, j + 1);
2474 while (i >= 0 && ISSPACE(s[i]))
2475 i--;
2476 j = i;
2477 }
2478 }
2479 if (j >= 0) {
2480 SPLIT_ADD(s, 0, j + 1);
2481 }
2482 FIX_PREALLOC_SIZE(list);
2483 if (PyList_Reverse(list) < 0)
2484 goto onError;
2485
2486 return list;
2487
2488 onError:
2489 Py_DECREF(list);
2490 return NULL;
2491}
2492
2493PyDoc_STRVAR(rsplit__doc__,
2494"B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2495\n\
2496Return a list of the sections in B, using sep as the delimiter,\n\
2497starting at the end of B and working to the front.\n\
2498If sep is not given, B is split on ASCII whitespace characters\n\
2499(space, tab, return, newline, formfeed, vertical tab).\n\
2500If maxsplit is given, at most maxsplit splits are done.");
2501
2502static PyObject *
2503bytes_rsplit(PyBytesObject *self, PyObject *args)
2504{
2505 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2506 Py_ssize_t maxsplit = -1, count = 0;
2507 const char *s = PyBytes_AS_STRING(self), *sub;
2508 PyObject *list, *str, *subobj = Py_None;
2509 Py_buffer vsub;
2510
2511 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
2512 return NULL;
2513 if (maxsplit < 0)
2514 maxsplit = PY_SSIZE_T_MAX;
2515
2516 if (subobj == Py_None)
2517 return rsplit_whitespace(s, len, maxsplit);
2518
2519 if (_getbuffer(subobj, &vsub) < 0)
2520 return NULL;
2521 sub = vsub.buf;
2522 n = vsub.len;
2523
2524 if (n == 0) {
2525 PyErr_SetString(PyExc_ValueError, "empty separator");
2526 PyObject_ReleaseBuffer(subobj, &vsub);
2527 return NULL;
2528 }
2529 else if (n == 1)
2530 return rsplit_char(s, len, sub[0], maxsplit);
2531
2532 list = PyList_New(PREALLOC_SIZE(maxsplit));
2533 if (list == NULL) {
2534 PyObject_ReleaseBuffer(subobj, &vsub);
2535 return NULL;
2536 }
2537
2538 j = len;
2539 i = j - n;
2540
2541 while ( (i >= 0) && (maxsplit-- > 0) ) {
2542 for (; i>=0; i--) {
2543 if (Py_STRING_MATCH(s, i, sub, n)) {
2544 SPLIT_ADD(s, i + n, j);
2545 j = i;
2546 i -= n;
2547 break;
2548 }
2549 }
2550 }
2551 SPLIT_ADD(s, 0, j);
2552 FIX_PREALLOC_SIZE(list);
2553 if (PyList_Reverse(list) < 0)
2554 goto onError;
2555 PyObject_ReleaseBuffer(subobj, &vsub);
2556 return list;
2557
2558onError:
2559 Py_DECREF(list);
2560 PyObject_ReleaseBuffer(subobj, &vsub);
2561 return NULL;
2562}
2563
2564PyDoc_STRVAR(reverse__doc__,
2565"B.reverse() -> None\n\
2566\n\
2567Reverse the order of the values in B in place.");
2568static PyObject *
2569bytes_reverse(PyBytesObject *self, PyObject *unused)
2570{
2571 char swap, *head, *tail;
2572 Py_ssize_t i, j, n = Py_SIZE(self);
2573
2574 j = n / 2;
2575 head = self->ob_bytes;
2576 tail = head + n - 1;
2577 for (i = 0; i < j; i++) {
2578 swap = *head;
2579 *head++ = *tail;
2580 *tail-- = swap;
2581 }
2582
2583 Py_RETURN_NONE;
2584}
2585
2586PyDoc_STRVAR(insert__doc__,
2587"B.insert(index, int) -> None\n\
2588\n\
2589Insert a single item into the bytearray before the given index.");
2590static PyObject *
2591bytes_insert(PyBytesObject *self, PyObject *args)
2592{
2593 int value;
2594 Py_ssize_t where, n = Py_SIZE(self);
2595
2596 if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
2597 return NULL;
2598
2599 if (n == PY_SSIZE_T_MAX) {
2600 PyErr_SetString(PyExc_OverflowError,
2601 "cannot add more objects to bytes");
2602 return NULL;
2603 }
2604 if (value < 0 || value >= 256) {
2605 PyErr_SetString(PyExc_ValueError,
2606 "byte must be in range(0, 256)");
2607 return NULL;
2608 }
2609 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2610 return NULL;
2611
2612 if (where < 0) {
2613 where += n;
2614 if (where < 0)
2615 where = 0;
2616 }
2617 if (where > n)
2618 where = n;
2619 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
2620 self->ob_bytes[where] = value;
2621
2622 Py_RETURN_NONE;
2623}
2624
2625PyDoc_STRVAR(append__doc__,
2626"B.append(int) -> None\n\
2627\n\
2628Append a single item to the end of B.");
2629static PyObject *
2630bytes_append(PyBytesObject *self, PyObject *arg)
2631{
2632 int value;
2633 Py_ssize_t n = Py_SIZE(self);
2634
2635 if (! _getbytevalue(arg, &value))
2636 return NULL;
2637 if (n == PY_SSIZE_T_MAX) {
2638 PyErr_SetString(PyExc_OverflowError,
2639 "cannot add more objects to bytes");
2640 return NULL;
2641 }
2642 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2643 return NULL;
2644
2645 self->ob_bytes[n] = value;
2646
2647 Py_RETURN_NONE;
2648}
2649
2650PyDoc_STRVAR(extend__doc__,
2651"B.extend(iterable int) -> None\n\
2652\n\
2653Append all the elements from the iterator or sequence to the\n\
2654end of B.");
2655static PyObject *
2656bytes_extend(PyBytesObject *self, PyObject *arg)
2657{
2658 PyObject *it, *item, *tmp, *res;
2659 Py_ssize_t buf_size = 0, len = 0;
2660 int value;
2661 char *buf;
2662
2663 /* bytes_setslice code only accepts something supporting PEP 3118. */
2664 if (PyObject_CheckBuffer(arg)) {
2665 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), arg) == -1)
2666 return NULL;
2667
2668 Py_RETURN_NONE;
2669 }
2670
2671 it = PyObject_GetIter(arg);
2672 if (it == NULL)
2673 return NULL;
2674
2675 /* Try to determine the length of the argument. 32 is abitrary. */
2676 buf_size = _PyObject_LengthHint(arg, 32);
2677
2678 buf = (char *)PyMem_Malloc(buf_size * sizeof(char));
2679 if (buf == NULL)
2680 return PyErr_NoMemory();
2681
2682 while ((item = PyIter_Next(it)) != NULL) {
2683 if (! _getbytevalue(item, &value)) {
2684 Py_DECREF(item);
2685 Py_DECREF(it);
2686 return NULL;
2687 }
2688 buf[len++] = value;
2689 Py_DECREF(item);
2690 if (len >= buf_size) {
2691 buf_size = len + (len >> 1) + 1;
2692 buf = (char *)PyMem_Realloc(buf, buf_size * sizeof(char));
2693 if (buf == NULL) {
2694 Py_DECREF(it);
2695 return PyErr_NoMemory();
2696 }
2697 }
2698 }
2699 Py_DECREF(it);
2700
2701 /* XXX: Is possible to avoid a full copy of the buffer? */
2702 tmp = PyBytes_FromStringAndSize(buf, len);
2703 res = bytes_extend(self, tmp);
2704 Py_DECREF(tmp);
2705 PyMem_Free(buf);
2706
2707 return res;
2708}
2709
2710PyDoc_STRVAR(pop__doc__,
2711"B.pop([index]) -> int\n\
2712\n\
2713Remove and return a single item from B. If no index\n\
2714argument is give, will pop the last value.");
2715static PyObject *
2716bytes_pop(PyBytesObject *self, PyObject *args)
2717{
2718 int value;
2719 Py_ssize_t where = -1, n = Py_SIZE(self);
2720
2721 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2722 return NULL;
2723
2724 if (n == 0) {
2725 PyErr_SetString(PyExc_OverflowError,
2726 "cannot pop an empty bytes");
2727 return NULL;
2728 }
2729 if (where < 0)
2730 where += Py_SIZE(self);
2731 if (where < 0 || where >= Py_SIZE(self)) {
2732 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2733 return NULL;
2734 }
2735
2736 value = self->ob_bytes[where];
2737 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2738 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2739 return NULL;
2740
2741 return PyInt_FromLong(value);
2742}
2743
2744PyDoc_STRVAR(remove__doc__,
2745"B.remove(int) -> None\n\
2746\n\
2747Remove the first occurance of a value in B.");
2748static PyObject *
2749bytes_remove(PyBytesObject *self, PyObject *arg)
2750{
2751 int value;
2752 Py_ssize_t where, n = Py_SIZE(self);
2753
2754 if (! _getbytevalue(arg, &value))
2755 return NULL;
2756
2757 for (where = 0; where < n; where++) {
2758 if (self->ob_bytes[where] == value)
2759 break;
2760 }
2761 if (where == n) {
2762 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2763 return NULL;
2764 }
2765
2766 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2767 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2768 return NULL;
2769
2770 Py_RETURN_NONE;
2771}
2772
2773/* XXX These two helpers could be optimized if argsize == 1 */
2774
2775static Py_ssize_t
2776lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2777 void *argptr, Py_ssize_t argsize)
2778{
2779 Py_ssize_t i = 0;
2780 while (i < mysize && memchr(argptr, myptr[i], argsize))
2781 i++;
2782 return i;
2783}
2784
2785static Py_ssize_t
2786rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2787 void *argptr, Py_ssize_t argsize)
2788{
2789 Py_ssize_t i = mysize - 1;
2790 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2791 i--;
2792 return i + 1;
2793}
2794
2795PyDoc_STRVAR(strip__doc__,
2796"B.strip([bytes]) -> bytearray\n\
2797\n\
2798Strip leading and trailing bytes contained in the argument.\n\
2799If the argument is omitted, strip ASCII whitespace.");
2800static PyObject *
2801bytes_strip(PyBytesObject *self, PyObject *args)
2802{
2803 Py_ssize_t left, right, mysize, argsize;
2804 void *myptr, *argptr;
2805 PyObject *arg = Py_None;
2806 Py_buffer varg;
2807 if (!PyArg_ParseTuple(args, "|O:strip", &arg))
2808 return NULL;
2809 if (arg == Py_None) {
2810 argptr = "\t\n\r\f\v ";
2811 argsize = 6;
2812 }
2813 else {
2814 if (_getbuffer(arg, &varg) < 0)
2815 return NULL;
2816 argptr = varg.buf;
2817 argsize = varg.len;
2818 }
2819 myptr = self->ob_bytes;
2820 mysize = Py_SIZE(self);
2821 left = lstrip_helper(myptr, mysize, argptr, argsize);
2822 if (left == mysize)
2823 right = left;
2824 else
2825 right = rstrip_helper(myptr, mysize, argptr, argsize);
2826 if (arg != Py_None)
2827 PyObject_ReleaseBuffer(arg, &varg);
2828 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2829}
2830
2831PyDoc_STRVAR(lstrip__doc__,
2832"B.lstrip([bytes]) -> bytearray\n\
2833\n\
2834Strip leading bytes contained in the argument.\n\
2835If the argument is omitted, strip leading ASCII whitespace.");
2836static PyObject *
2837bytes_lstrip(PyBytesObject *self, PyObject *args)
2838{
2839 Py_ssize_t left, right, mysize, argsize;
2840 void *myptr, *argptr;
2841 PyObject *arg = Py_None;
2842 Py_buffer varg;
2843 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
2844 return NULL;
2845 if (arg == Py_None) {
2846 argptr = "\t\n\r\f\v ";
2847 argsize = 6;
2848 }
2849 else {
2850 if (_getbuffer(arg, &varg) < 0)
2851 return NULL;
2852 argptr = varg.buf;
2853 argsize = varg.len;
2854 }
2855 myptr = self->ob_bytes;
2856 mysize = Py_SIZE(self);
2857 left = lstrip_helper(myptr, mysize, argptr, argsize);
2858 right = mysize;
2859 if (arg != Py_None)
2860 PyObject_ReleaseBuffer(arg, &varg);
2861 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2862}
2863
2864PyDoc_STRVAR(rstrip__doc__,
2865"B.rstrip([bytes]) -> bytearray\n\
2866\n\
2867Strip trailing bytes contained in the argument.\n\
2868If the argument is omitted, strip trailing ASCII whitespace.");
2869static PyObject *
2870bytes_rstrip(PyBytesObject *self, PyObject *args)
2871{
2872 Py_ssize_t left, right, mysize, argsize;
2873 void *myptr, *argptr;
2874 PyObject *arg = Py_None;
2875 Py_buffer varg;
2876 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
2877 return NULL;
2878 if (arg == Py_None) {
2879 argptr = "\t\n\r\f\v ";
2880 argsize = 6;
2881 }
2882 else {
2883 if (_getbuffer(arg, &varg) < 0)
2884 return NULL;
2885 argptr = varg.buf;
2886 argsize = varg.len;
2887 }
2888 myptr = self->ob_bytes;
2889 mysize = Py_SIZE(self);
2890 left = 0;
2891 right = rstrip_helper(myptr, mysize, argptr, argsize);
2892 if (arg != Py_None)
2893 PyObject_ReleaseBuffer(arg, &varg);
2894 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2895}
2896
2897PyDoc_STRVAR(decode_doc,
2898"B.decode([encoding[, errors]]) -> unicode object.\n\
2899\n\
2900Decodes B using the codec registered for encoding. encoding defaults\n\
2901to the default encoding. errors may be given to set a different error\n\
2902handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2903a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2904as well as any other name registered with codecs.register_error that is\n\
2905able to handle UnicodeDecodeErrors.");
2906
2907static PyObject *
2908bytes_decode(PyObject *self, PyObject *args)
2909{
2910 const char *encoding = NULL;
2911 const char *errors = NULL;
2912
2913 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2914 return NULL;
2915 if (encoding == NULL)
2916 encoding = PyUnicode_GetDefaultEncoding();
2917 return PyCodec_Decode(self, encoding, errors);
2918}
2919
2920PyDoc_STRVAR(alloc_doc,
2921"B.__alloc__() -> int\n\
2922\n\
2923Returns the number of bytes actually allocated.");
2924
2925static PyObject *
2926bytes_alloc(PyBytesObject *self)
2927{
2928 return PyInt_FromSsize_t(self->ob_alloc);
2929}
2930
2931PyDoc_STRVAR(join_doc,
2932"B.join(iterable_of_bytes) -> bytes\n\
2933\n\
2934Concatenates any number of bytearray objects, with B in between each pair.");
2935
2936static PyObject *
2937bytes_join(PyBytesObject *self, PyObject *it)
2938{
2939 PyObject *seq;
2940 Py_ssize_t mysize = Py_SIZE(self);
2941 Py_ssize_t i;
2942 Py_ssize_t n;
2943 PyObject **items;
2944 Py_ssize_t totalsize = 0;
2945 PyObject *result;
2946 char *dest;
2947
2948 seq = PySequence_Fast(it, "can only join an iterable");
2949 if (seq == NULL)
2950 return NULL;
2951 n = PySequence_Fast_GET_SIZE(seq);
2952 items = PySequence_Fast_ITEMS(seq);
2953
2954 /* Compute the total size, and check that they are all bytes */
2955 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2956 for (i = 0; i < n; i++) {
2957 PyObject *obj = items[i];
2958 if (!PyBytes_Check(obj) && !PyString_Check(obj)) {
2959 PyErr_Format(PyExc_TypeError,
2960 "can only join an iterable of bytes "
2961 "(item %ld has type '%.100s')",
2962 /* XXX %ld isn't right on Win64 */
2963 (long)i, Py_TYPE(obj)->tp_name);
2964 goto error;
2965 }
2966 if (i > 0)
2967 totalsize += mysize;
2968 totalsize += Py_SIZE(obj);
2969 if (totalsize < 0) {
2970 PyErr_NoMemory();
2971 goto error;
2972 }
2973 }
2974
2975 /* Allocate the result, and copy the bytes */
2976 result = PyBytes_FromStringAndSize(NULL, totalsize);
2977 if (result == NULL)
2978 goto error;
2979 dest = PyBytes_AS_STRING(result);
2980 for (i = 0; i < n; i++) {
2981 PyObject *obj = items[i];
2982 Py_ssize_t size = Py_SIZE(obj);
2983 char *buf;
2984 if (PyBytes_Check(obj))
2985 buf = PyBytes_AS_STRING(obj);
2986 else
2987 buf = PyString_AS_STRING(obj);
2988 if (i) {
2989 memcpy(dest, self->ob_bytes, mysize);
2990 dest += mysize;
2991 }
2992 memcpy(dest, buf, size);
2993 dest += size;
2994 }
2995
2996 /* Done */
2997 Py_DECREF(seq);
2998 return result;
2999
3000 /* Error handling */
3001 error:
3002 Py_DECREF(seq);
3003 return NULL;
3004}
3005
3006PyDoc_STRVAR(fromhex_doc,
3007"bytearray.fromhex(string) -> bytearray\n\
3008\n\
3009Create a bytearray object from a string of hexadecimal numbers.\n\
3010Spaces between two numbers are accepted.\n\
3011Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
3012
3013static int
3014hex_digit_to_int(Py_UNICODE c)
3015{
3016 if (c >= 128)
3017 return -1;
3018 if (ISDIGIT(c))
3019 return c - '0';
3020 else {
3021 if (ISUPPER(c))
3022 c = TOLOWER(c);
3023 if (c >= 'a' && c <= 'f')
3024 return c - 'a' + 10;
3025 }
3026 return -1;
3027}
3028
3029static PyObject *
3030bytes_fromhex(PyObject *cls, PyObject *args)
3031{
3032 PyObject *newbytes, *hexobj;
3033 char *buf;
3034 Py_UNICODE *hex;
3035 Py_ssize_t hexlen, byteslen, i, j;
3036 int top, bot;
3037
3038 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
3039 return NULL;
3040 assert(PyUnicode_Check(hexobj));
3041 hexlen = PyUnicode_GET_SIZE(hexobj);
3042 hex = PyUnicode_AS_UNICODE(hexobj);
3043 byteslen = hexlen/2; /* This overestimates if there are spaces */
3044 newbytes = PyBytes_FromStringAndSize(NULL, byteslen);
3045 if (!newbytes)
3046 return NULL;
3047 buf = PyBytes_AS_STRING(newbytes);
3048 for (i = j = 0; i < hexlen; i += 2) {
3049 /* skip over spaces in the input */
3050 while (hex[i] == ' ')
3051 i++;
3052 if (i >= hexlen)
3053 break;
3054 top = hex_digit_to_int(hex[i]);
3055 bot = hex_digit_to_int(hex[i+1]);
3056 if (top == -1 || bot == -1) {
3057 PyErr_Format(PyExc_ValueError,
3058 "non-hexadecimal number found in "
3059 "fromhex() arg at position %zd", i);
3060 goto error;
3061 }
3062 buf[j++] = (top << 4) + bot;
3063 }
3064 if (PyBytes_Resize(newbytes, j) < 0)
3065 goto error;
3066 return newbytes;
3067
3068 error:
3069 Py_DECREF(newbytes);
3070 return NULL;
3071}
3072
3073PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3074
3075static PyObject *
3076bytes_reduce(PyBytesObject *self)
3077{
3078 PyObject *latin1, *dict;
3079 if (self->ob_bytes)
3080 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
3081 Py_SIZE(self), NULL);
3082 else
3083 latin1 = PyUnicode_FromString("");
3084
3085 dict = PyObject_GetAttrString((PyObject *)self, "__dict__");
3086 if (dict == NULL) {
3087 PyErr_Clear();
3088 dict = Py_None;
3089 Py_INCREF(dict);
3090 }
3091
3092 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self), latin1, "latin-1", dict);
3093}
3094
3095static PySequenceMethods bytes_as_sequence = {
3096 (lenfunc)bytes_length, /* sq_length */
3097 (binaryfunc)PyBytes_Concat, /* sq_concat */
3098 (ssizeargfunc)bytes_repeat, /* sq_repeat */
3099 (ssizeargfunc)bytes_getitem, /* sq_item */
3100 0, /* sq_slice */
3101 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
3102 0, /* sq_ass_slice */
3103 (objobjproc)bytes_contains, /* sq_contains */
3104 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
3105 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
3106};
3107
3108static PyMappingMethods bytes_as_mapping = {
3109 (lenfunc)bytes_length,
3110 (binaryfunc)bytes_subscript,
3111 (objobjargproc)bytes_ass_subscript,
3112};
3113
3114static PyBufferProcs bytes_as_buffer = {
3115 (readbufferproc)bytes_buffer_getreadbuf,
3116 (writebufferproc)bytes_buffer_getwritebuf,
3117 (segcountproc)bytes_buffer_getsegcount,
3118 (charbufferproc)bytes_buffer_getcharbuf,
3119 (getbufferproc)bytes_getbuffer,
3120 (releasebufferproc)bytes_releasebuffer,
3121};
3122
3123static PyMethodDef
3124bytes_methods[] = {
3125 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
3126 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
3127 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
3128 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3129 _Py_capitalize__doc__},
3130 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3131 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
3132 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
3133 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
3134 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
3135 expandtabs__doc__},
3136 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
3137 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
3138 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
3139 fromhex_doc},
3140 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3141 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
3142 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3143 _Py_isalnum__doc__},
3144 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3145 _Py_isalpha__doc__},
3146 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3147 _Py_isdigit__doc__},
3148 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3149 _Py_islower__doc__},
3150 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3151 _Py_isspace__doc__},
3152 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3153 _Py_istitle__doc__},
3154 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3155 _Py_isupper__doc__},
3156 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
3157 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3158 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
3159 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
3160 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
3161 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
3162 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
3163 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
3164 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
3165 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3166 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3167 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
3168 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
3169 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
3170 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
3171 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
3172 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
3173 splitlines__doc__},
3174 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS ,
3175 startswith__doc__},
3176 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
3177 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3178 _Py_swapcase__doc__},
3179 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
3180 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
3181 translate__doc__},
3182 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3183 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
3184 {NULL}
3185};
3186
3187PyDoc_STRVAR(bytes_doc,
3188"bytearray(iterable_of_ints) -> bytearray.\n\
3189bytearray(string, encoding[, errors]) -> bytearray.\n\
3190bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
3191bytearray(memory_view) -> bytearray.\n\
3192\n\
3193Construct an mutable bytearray object from:\n\
3194 - an iterable yielding integers in range(256)\n\
3195 - a text string encoded using the specified encoding\n\
3196 - a bytes or a bytearray object\n\
3197 - any object implementing the buffer API.\n\
3198\n\
3199bytearray(int) -> bytearray.\n\
3200\n\
3201Construct a zero-initialized bytearray of the given length.");
3202
3203
3204static PyObject *bytes_iter(PyObject *seq);
3205
3206PyTypeObject PyBytes_Type = {
3207 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3208 "bytearray",
3209 sizeof(PyBytesObject),
3210 0,
3211 (destructor)bytes_dealloc, /* tp_dealloc */
3212 0, /* tp_print */
3213 0, /* tp_getattr */
3214 0, /* tp_setattr */
3215 0, /* tp_compare */
3216 (reprfunc)bytes_repr, /* tp_repr */
3217 0, /* tp_as_number */
3218 &bytes_as_sequence, /* tp_as_sequence */
3219 &bytes_as_mapping, /* tp_as_mapping */
3220 0, /* tp_hash */
3221 0, /* tp_call */
3222 bytes_str, /* tp_str */
3223 PyObject_GenericGetAttr, /* tp_getattro */
3224 0, /* tp_setattro */
3225 &bytes_as_buffer, /* tp_as_buffer */
3226 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3227 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3228 bytes_doc, /* tp_doc */
3229 0, /* tp_traverse */
3230 0, /* tp_clear */
3231 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3232 0, /* tp_weaklistoffset */
3233 bytes_iter, /* tp_iter */
3234 0, /* tp_iternext */
3235 bytes_methods, /* tp_methods */
3236 0, /* tp_members */
3237 0, /* tp_getset */
3238 0, /* tp_base */
3239 0, /* tp_dict */
3240 0, /* tp_descr_get */
3241 0, /* tp_descr_set */
3242 0, /* tp_dictoffset */
3243 (initproc)bytes_init, /* tp_init */
3244 PyType_GenericAlloc, /* tp_alloc */
3245 PyType_GenericNew, /* tp_new */
3246 PyObject_Del, /* tp_free */
3247};
3248
3249/*********************** Bytes Iterator ****************************/
3250
3251typedef struct {
3252 PyObject_HEAD
3253 Py_ssize_t it_index;
3254 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3255} bytesiterobject;
3256
3257static void
3258bytesiter_dealloc(bytesiterobject *it)
3259{
3260 _PyObject_GC_UNTRACK(it);
3261 Py_XDECREF(it->it_seq);
3262 PyObject_GC_Del(it);
3263}
3264
3265static int
3266bytesiter_traverse(bytesiterobject *it, visitproc visit, void *arg)
3267{
3268 Py_VISIT(it->it_seq);
3269 return 0;
3270}
3271
3272static PyObject *
3273bytesiter_next(bytesiterobject *it)
3274{
3275 PyBytesObject *seq;
3276 PyObject *item;
3277
3278 assert(it != NULL);
3279 seq = it->it_seq;
3280 if (seq == NULL)
3281 return NULL;
3282 assert(PyBytes_Check(seq));
3283
3284 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3285 item = PyInt_FromLong(
3286 (unsigned char)seq->ob_bytes[it->it_index]);
3287 if (item != NULL)
3288 ++it->it_index;
3289 return item;
3290 }
3291
3292 Py_DECREF(seq);
3293 it->it_seq = NULL;
3294 return NULL;
3295}
3296
3297static PyObject *
3298bytesiter_length_hint(bytesiterobject *it)
3299{
3300 Py_ssize_t len = 0;
3301 if (it->it_seq)
3302 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3303 return PyInt_FromSsize_t(len);
3304}
3305
3306PyDoc_STRVAR(length_hint_doc,
3307 "Private method returning an estimate of len(list(it)).");
3308
3309static PyMethodDef bytesiter_methods[] = {
3310 {"__length_hint__", (PyCFunction)bytesiter_length_hint, METH_NOARGS,
3311 length_hint_doc},
3312 {NULL, NULL} /* sentinel */
3313};
3314
3315PyTypeObject PyBytesIter_Type = {
3316 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3317 "bytearray_iterator", /* tp_name */
3318 sizeof(bytesiterobject), /* tp_basicsize */
3319 0, /* tp_itemsize */
3320 /* methods */
3321 (destructor)bytesiter_dealloc, /* tp_dealloc */
3322 0, /* tp_print */
3323 0, /* tp_getattr */
3324 0, /* tp_setattr */
3325 0, /* tp_compare */
3326 0, /* tp_repr */
3327 0, /* tp_as_number */
3328 0, /* tp_as_sequence */
3329 0, /* tp_as_mapping */
3330 0, /* tp_hash */
3331 0, /* tp_call */
3332 0, /* tp_str */
3333 PyObject_GenericGetAttr, /* tp_getattro */
3334 0, /* tp_setattro */
3335 0, /* tp_as_buffer */
3336 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3337 0, /* tp_doc */
3338 (traverseproc)bytesiter_traverse, /* tp_traverse */
3339 0, /* tp_clear */
3340 0, /* tp_richcompare */
3341 0, /* tp_weaklistoffset */
3342 PyObject_SelfIter, /* tp_iter */
3343 (iternextfunc)bytesiter_next, /* tp_iternext */
3344 bytesiter_methods, /* tp_methods */
3345 0,
3346};
3347
3348static PyObject *
3349bytes_iter(PyObject *seq)
3350{
3351 bytesiterobject *it;
3352
3353 if (!PyBytes_Check(seq)) {
3354 PyErr_BadInternalCall();
3355 return NULL;
3356 }
3357 it = PyObject_GC_New(bytesiterobject, &PyBytesIter_Type);
3358 if (it == NULL)
3359 return NULL;
3360 it->it_index = 0;
3361 Py_INCREF(seq);
3362 it->it_seq = (PyBytesObject *)seq;
3363 _PyObject_GC_TRACK(it);
3364 return (PyObject *)it;
3365}