blob: 31b58048711c0b87b00edf1baa9f6bedbf2908e9 [file] [log] [blame]
Christian Heimes44720832008-05-26 13:01:01 +00001/* PyBytes (bytearray) implementation */
2
3#define PY_SSIZE_T_CLEAN
4#include "Python.h"
5#include "structmember.h"
6#include "bytes_methods.h"
7
8static PyByteArrayObject *nullbytes = NULL;
9
10void
11PyByteArray_Fini(void)
12{
13 Py_CLEAR(nullbytes);
14}
15
16int
17PyByteArray_Init(void)
18{
19 nullbytes = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
20 if (nullbytes == NULL)
21 return 0;
22 nullbytes->ob_bytes = NULL;
23 Py_SIZE(nullbytes) = nullbytes->ob_alloc = 0;
24 nullbytes->ob_exports = 0;
25 return 1;
26}
27
28/* end nullbytes support */
29
30/* Helpers */
31
32static int
33_getbytevalue(PyObject* arg, int *value)
34{
35 long face_value;
36
Georg Brandl3e483f62008-07-16 22:57:41 +000037 if (PyBytes_CheckExact(arg)) {
Christian Heimes44720832008-05-26 13:01:01 +000038 if (Py_SIZE(arg) != 1) {
39 PyErr_SetString(PyExc_ValueError, "string must be of size 1");
40 return 0;
41 }
Georg Brandl3e483f62008-07-16 22:57:41 +000042 *value = Py_CHARMASK(((PyBytesObject*)arg)->ob_sval[0]);
43 return 1;
44 }
45 else if (PyInt_Check(arg) || PyLong_Check(arg)) {
46 face_value = PyLong_AsLong(arg);
Christian Heimes44720832008-05-26 13:01:01 +000047 }
48 else {
Georg Brandl3e483f62008-07-16 22:57:41 +000049 PyObject *index = PyNumber_Index(arg);
50 if (index == NULL) {
51 PyErr_Format(PyExc_TypeError,
52 "an integer or string of size 1 is required");
53 return 0;
54 }
55 face_value = PyLong_AsLong(index);
56 Py_DECREF(index);
57 }
58 if (face_value == -1 && PyErr_Occurred())
59 return 0;
60
61 if (face_value < 0 || face_value >= 256) {
62 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
Christian Heimes44720832008-05-26 13:01:01 +000063 return 0;
64 }
65
66 *value = face_value;
67 return 1;
68}
69
70static Py_ssize_t
71bytes_buffer_getreadbuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
72{
73 if ( index != 0 ) {
74 PyErr_SetString(PyExc_SystemError,
75 "accessing non-existent bytes segment");
76 return -1;
77 }
78 *ptr = (void *)self->ob_bytes;
79 return Py_SIZE(self);
80}
81
82static Py_ssize_t
83bytes_buffer_getwritebuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
84{
85 if ( index != 0 ) {
86 PyErr_SetString(PyExc_SystemError,
87 "accessing non-existent bytes segment");
88 return -1;
89 }
90 *ptr = (void *)self->ob_bytes;
91 return Py_SIZE(self);
92}
93
94static Py_ssize_t
95bytes_buffer_getsegcount(PyByteArrayObject *self, Py_ssize_t *lenp)
96{
97 if ( lenp )
98 *lenp = Py_SIZE(self);
99 return 1;
100}
101
102static Py_ssize_t
103bytes_buffer_getcharbuf(PyByteArrayObject *self, Py_ssize_t index, const char **ptr)
104{
105 if ( index != 0 ) {
106 PyErr_SetString(PyExc_SystemError,
107 "accessing non-existent bytes segment");
108 return -1;
109 }
110 *ptr = self->ob_bytes;
111 return Py_SIZE(self);
112}
113
114static int
115bytes_getbuffer(PyByteArrayObject *obj, Py_buffer *view, int flags)
116{
117 int ret;
118 void *ptr;
119 if (view == NULL) {
120 obj->ob_exports++;
121 return 0;
122 }
123 if (obj->ob_bytes == NULL)
124 ptr = "";
125 else
126 ptr = obj->ob_bytes;
127 ret = PyBuffer_FillInfo(view, ptr, Py_SIZE(obj), 0, flags);
128 if (ret >= 0) {
129 obj->ob_exports++;
130 }
131 return ret;
132}
133
134static void
135bytes_releasebuffer(PyByteArrayObject *obj, Py_buffer *view)
136{
137 obj->ob_exports--;
138}
139
140static Py_ssize_t
141_getbuffer(PyObject *obj, Py_buffer *view)
142{
143 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
144
145 if (buffer == NULL || buffer->bf_getbuffer == NULL)
146 {
147 PyErr_Format(PyExc_TypeError,
148 "Type %.100s doesn't support the buffer API",
149 Py_TYPE(obj)->tp_name);
150 return -1;
151 }
152
153 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
154 return -1;
155 return view->len;
156}
157
158/* Direct API functions */
159
160PyObject *
161PyByteArray_FromObject(PyObject *input)
162{
163 return PyObject_CallFunctionObjArgs((PyObject *)&PyByteArray_Type,
164 input, NULL);
165}
166
167PyObject *
168PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
169{
170 PyByteArrayObject *new;
171 Py_ssize_t alloc;
172
173 if (size < 0) {
174 PyErr_SetString(PyExc_SystemError,
175 "Negative size passed to PyByteArray_FromStringAndSize");
176 return NULL;
177 }
178
179 new = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
180 if (new == NULL)
181 return NULL;
182
183 if (size == 0) {
184 new->ob_bytes = NULL;
185 alloc = 0;
186 }
187 else {
188 alloc = size + 1;
189 new->ob_bytes = PyMem_Malloc(alloc);
190 if (new->ob_bytes == NULL) {
191 Py_DECREF(new);
192 return PyErr_NoMemory();
193 }
194 if (bytes != NULL)
195 memcpy(new->ob_bytes, bytes, size);
196 new->ob_bytes[size] = '\0'; /* Trailing null byte */
197 }
198 Py_SIZE(new) = size;
199 new->ob_alloc = alloc;
200 new->ob_exports = 0;
201
202 return (PyObject *)new;
203}
204
205Py_ssize_t
206PyByteArray_Size(PyObject *self)
207{
208 assert(self != NULL);
209 assert(PyByteArray_Check(self));
210
211 return PyByteArray_GET_SIZE(self);
212}
213
214char *
215PyByteArray_AsString(PyObject *self)
216{
217 assert(self != NULL);
218 assert(PyByteArray_Check(self));
219
220 return PyByteArray_AS_STRING(self);
221}
222
223int
224PyByteArray_Resize(PyObject *self, Py_ssize_t size)
225{
226 void *sval;
227 Py_ssize_t alloc = ((PyByteArrayObject *)self)->ob_alloc;
228
229 assert(self != NULL);
230 assert(PyByteArray_Check(self));
231 assert(size >= 0);
232
233 if (size < alloc / 2) {
234 /* Major downsize; resize down to exact size */
235 alloc = size + 1;
236 }
237 else if (size < alloc) {
238 /* Within allocated size; quick exit */
239 Py_SIZE(self) = size;
240 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
241 return 0;
242 }
243 else if (size <= alloc * 1.125) {
244 /* Moderate upsize; overallocate similar to list_resize() */
245 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
246 }
247 else {
248 /* Major upsize; resize up to exact size */
249 alloc = size + 1;
250 }
251
252 if (((PyByteArrayObject *)self)->ob_exports > 0) {
253 /*
254 fprintf(stderr, "%d: %s", ((PyByteArrayObject *)self)->ob_exports,
255 ((PyByteArrayObject *)self)->ob_bytes);
256 */
257 PyErr_SetString(PyExc_BufferError,
258 "Existing exports of data: object cannot be re-sized");
259 return -1;
260 }
261
262 sval = PyMem_Realloc(((PyByteArrayObject *)self)->ob_bytes, alloc);
263 if (sval == NULL) {
264 PyErr_NoMemory();
265 return -1;
266 }
267
268 ((PyByteArrayObject *)self)->ob_bytes = sval;
269 Py_SIZE(self) = size;
270 ((PyByteArrayObject *)self)->ob_alloc = alloc;
271 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
272
273 return 0;
274}
275
276PyObject *
277PyByteArray_Concat(PyObject *a, PyObject *b)
278{
279 Py_ssize_t size;
280 Py_buffer va, vb;
281 PyByteArrayObject *result = NULL;
282
283 va.len = -1;
284 vb.len = -1;
285 if (_getbuffer(a, &va) < 0 ||
286 _getbuffer(b, &vb) < 0) {
287 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
288 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
289 goto done;
290 }
291
292 size = va.len + vb.len;
293 if (size < 0) {
294 return PyErr_NoMemory();
295 goto done;
296 }
297
298 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, size);
299 if (result != NULL) {
300 memcpy(result->ob_bytes, va.buf, va.len);
301 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
302 }
303
304 done:
305 if (va.len != -1)
306 PyObject_ReleaseBuffer(a, &va);
307 if (vb.len != -1)
308 PyObject_ReleaseBuffer(b, &vb);
309 return (PyObject *)result;
310}
311
312/* Functions stuffed into the type object */
313
314static Py_ssize_t
315bytes_length(PyByteArrayObject *self)
316{
317 return Py_SIZE(self);
318}
319
320static PyObject *
321bytes_iconcat(PyByteArrayObject *self, PyObject *other)
322{
323 Py_ssize_t mysize;
324 Py_ssize_t size;
325 Py_buffer vo;
326
327 if (_getbuffer(other, &vo) < 0) {
328 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
329 Py_TYPE(other)->tp_name, Py_TYPE(self)->tp_name);
330 return NULL;
331 }
332
333 mysize = Py_SIZE(self);
334 size = mysize + vo.len;
335 if (size < 0) {
336 PyObject_ReleaseBuffer(other, &vo);
337 return PyErr_NoMemory();
338 }
339 if (size < self->ob_alloc) {
340 Py_SIZE(self) = size;
341 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
342 }
343 else if (PyByteArray_Resize((PyObject *)self, size) < 0) {
344 PyObject_ReleaseBuffer(other, &vo);
345 return NULL;
346 }
347 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
348 PyObject_ReleaseBuffer(other, &vo);
349 Py_INCREF(self);
350 return (PyObject *)self;
351}
352
353static PyObject *
354bytes_repeat(PyByteArrayObject *self, Py_ssize_t count)
355{
356 PyByteArrayObject *result;
357 Py_ssize_t mysize;
358 Py_ssize_t size;
359
360 if (count < 0)
361 count = 0;
362 mysize = Py_SIZE(self);
363 size = mysize * count;
364 if (count != 0 && size / count != mysize)
365 return PyErr_NoMemory();
366 result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size);
367 if (result != NULL && size != 0) {
368 if (mysize == 1)
369 memset(result->ob_bytes, self->ob_bytes[0], size);
370 else {
371 Py_ssize_t i;
372 for (i = 0; i < count; i++)
373 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
374 }
375 }
376 return (PyObject *)result;
377}
378
379static PyObject *
380bytes_irepeat(PyByteArrayObject *self, Py_ssize_t count)
381{
382 Py_ssize_t mysize;
383 Py_ssize_t size;
384
385 if (count < 0)
386 count = 0;
387 mysize = Py_SIZE(self);
388 size = mysize * count;
389 if (count != 0 && size / count != mysize)
390 return PyErr_NoMemory();
391 if (size < self->ob_alloc) {
392 Py_SIZE(self) = size;
393 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
394 }
395 else if (PyByteArray_Resize((PyObject *)self, size) < 0)
396 return NULL;
397
398 if (mysize == 1)
399 memset(self->ob_bytes, self->ob_bytes[0], size);
400 else {
401 Py_ssize_t i;
402 for (i = 1; i < count; i++)
403 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
404 }
405
406 Py_INCREF(self);
407 return (PyObject *)self;
408}
409
410static PyObject *
411bytes_getitem(PyByteArrayObject *self, Py_ssize_t i)
412{
413 if (i < 0)
414 i += Py_SIZE(self);
415 if (i < 0 || i >= Py_SIZE(self)) {
416 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
417 return NULL;
418 }
419 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
420}
421
422static PyObject *
Georg Brandl3e483f62008-07-16 22:57:41 +0000423bytes_subscript(PyByteArrayObject *self, PyObject *index)
Christian Heimes44720832008-05-26 13:01:01 +0000424{
Georg Brandl3e483f62008-07-16 22:57:41 +0000425 if (PyIndex_Check(index)) {
426 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
Christian Heimes44720832008-05-26 13:01:01 +0000427
428 if (i == -1 && PyErr_Occurred())
429 return NULL;
430
431 if (i < 0)
432 i += PyByteArray_GET_SIZE(self);
433
434 if (i < 0 || i >= Py_SIZE(self)) {
435 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
436 return NULL;
437 }
438 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
439 }
Georg Brandl3e483f62008-07-16 22:57:41 +0000440 else if (PySlice_Check(index)) {
Christian Heimes44720832008-05-26 13:01:01 +0000441 Py_ssize_t start, stop, step, slicelength, cur, i;
Georg Brandl3e483f62008-07-16 22:57:41 +0000442 if (PySlice_GetIndicesEx((PySliceObject *)index,
Christian Heimes44720832008-05-26 13:01:01 +0000443 PyByteArray_GET_SIZE(self),
444 &start, &stop, &step, &slicelength) < 0) {
445 return NULL;
446 }
447
448 if (slicelength <= 0)
449 return PyByteArray_FromStringAndSize("", 0);
450 else if (step == 1) {
451 return PyByteArray_FromStringAndSize(self->ob_bytes + start,
452 slicelength);
453 }
454 else {
455 char *source_buf = PyByteArray_AS_STRING(self);
456 char *result_buf = (char *)PyMem_Malloc(slicelength);
457 PyObject *result;
458
459 if (result_buf == NULL)
460 return PyErr_NoMemory();
461
462 for (cur = start, i = 0; i < slicelength;
463 cur += step, i++) {
464 result_buf[i] = source_buf[cur];
465 }
466 result = PyByteArray_FromStringAndSize(result_buf, slicelength);
467 PyMem_Free(result_buf);
468 return result;
469 }
470 }
471 else {
472 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integers");
473 return NULL;
474 }
475}
476
477static int
478bytes_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi,
479 PyObject *values)
480{
481 Py_ssize_t avail, needed;
482 void *bytes;
483 Py_buffer vbytes;
484 int res = 0;
485
486 vbytes.len = -1;
487 if (values == (PyObject *)self) {
488 /* Make a copy and call this function recursively */
489 int err;
490 values = PyByteArray_FromObject(values);
491 if (values == NULL)
492 return -1;
493 err = bytes_setslice(self, lo, hi, values);
494 Py_DECREF(values);
495 return err;
496 }
497 if (values == NULL) {
498 /* del b[lo:hi] */
499 bytes = NULL;
500 needed = 0;
501 }
502 else {
503 if (_getbuffer(values, &vbytes) < 0) {
504 PyErr_Format(PyExc_TypeError,
505 "can't set bytes slice from %.100s",
506 Py_TYPE(values)->tp_name);
507 return -1;
508 }
509 needed = vbytes.len;
510 bytes = vbytes.buf;
511 }
512
513 if (lo < 0)
514 lo = 0;
515 if (hi < lo)
516 hi = lo;
517 if (hi > Py_SIZE(self))
518 hi = Py_SIZE(self);
519
520 avail = hi - lo;
521 if (avail < 0)
522 lo = hi = avail = 0;
523
524 if (avail != needed) {
525 if (avail > needed) {
526 /*
527 0 lo hi old_size
528 | |<----avail----->|<-----tomove------>|
529 | |<-needed->|<-----tomove------>|
530 0 lo new_hi new_size
531 */
532 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
533 Py_SIZE(self) - hi);
534 }
535 /* XXX(nnorwitz): need to verify this can't overflow! */
536 if (PyByteArray_Resize((PyObject *)self,
537 Py_SIZE(self) + needed - avail) < 0) {
538 res = -1;
539 goto finish;
540 }
541 if (avail < needed) {
542 /*
543 0 lo hi old_size
544 | |<-avail->|<-----tomove------>|
545 | |<----needed---->|<-----tomove------>|
546 0 lo new_hi new_size
547 */
548 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
549 Py_SIZE(self) - lo - needed);
550 }
551 }
552
553 if (needed > 0)
554 memcpy(self->ob_bytes + lo, bytes, needed);
555
556
557 finish:
558 if (vbytes.len != -1)
559 PyObject_ReleaseBuffer(values, &vbytes);
560 return res;
561}
562
563static int
564bytes_setitem(PyByteArrayObject *self, Py_ssize_t i, PyObject *value)
565{
566 int ival;
567
568 if (i < 0)
569 i += Py_SIZE(self);
570
571 if (i < 0 || i >= Py_SIZE(self)) {
572 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
573 return -1;
574 }
575
576 if (value == NULL)
577 return bytes_setslice(self, i, i+1, NULL);
578
579 if (!_getbytevalue(value, &ival))
580 return -1;
581
582 self->ob_bytes[i] = ival;
583 return 0;
584}
585
586static int
Georg Brandl3e483f62008-07-16 22:57:41 +0000587bytes_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *values)
Christian Heimes44720832008-05-26 13:01:01 +0000588{
589 Py_ssize_t start, stop, step, slicelen, needed;
590 char *bytes;
591
Georg Brandl3e483f62008-07-16 22:57:41 +0000592 if (PyIndex_Check(index)) {
593 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
Christian Heimes44720832008-05-26 13:01:01 +0000594
595 if (i == -1 && PyErr_Occurred())
596 return -1;
597
598 if (i < 0)
599 i += PyByteArray_GET_SIZE(self);
600
601 if (i < 0 || i >= Py_SIZE(self)) {
602 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
603 return -1;
604 }
605
606 if (values == NULL) {
607 /* Fall through to slice assignment */
608 start = i;
609 stop = i + 1;
610 step = 1;
611 slicelen = 1;
612 }
613 else {
Georg Brandl3e483f62008-07-16 22:57:41 +0000614 int ival;
615 if (!_getbytevalue(values, &ival))
Christian Heimes44720832008-05-26 13:01:01 +0000616 return -1;
Christian Heimes44720832008-05-26 13:01:01 +0000617 self->ob_bytes[i] = (char)ival;
618 return 0;
619 }
620 }
Georg Brandl3e483f62008-07-16 22:57:41 +0000621 else if (PySlice_Check(index)) {
622 if (PySlice_GetIndicesEx((PySliceObject *)index,
Christian Heimes44720832008-05-26 13:01:01 +0000623 PyByteArray_GET_SIZE(self),
624 &start, &stop, &step, &slicelen) < 0) {
625 return -1;
626 }
627 }
628 else {
629 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integer");
630 return -1;
631 }
632
633 if (values == NULL) {
634 bytes = NULL;
635 needed = 0;
636 }
637 else if (values == (PyObject *)self || !PyByteArray_Check(values)) {
638 /* Make a copy an call this function recursively */
639 int err;
640 values = PyByteArray_FromObject(values);
641 if (values == NULL)
642 return -1;
Georg Brandl3e483f62008-07-16 22:57:41 +0000643 err = bytes_ass_subscript(self, index, values);
Christian Heimes44720832008-05-26 13:01:01 +0000644 Py_DECREF(values);
645 return err;
646 }
647 else {
648 assert(PyByteArray_Check(values));
649 bytes = ((PyByteArrayObject *)values)->ob_bytes;
650 needed = Py_SIZE(values);
651 }
652 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
653 if ((step < 0 && start < stop) ||
654 (step > 0 && start > stop))
655 stop = start;
656 if (step == 1) {
657 if (slicelen != needed) {
658 if (slicelen > needed) {
659 /*
660 0 start stop old_size
661 | |<---slicelen--->|<-----tomove------>|
662 | |<-needed->|<-----tomove------>|
663 0 lo new_hi new_size
664 */
665 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
666 Py_SIZE(self) - stop);
667 }
668 if (PyByteArray_Resize((PyObject *)self,
669 Py_SIZE(self) + needed - slicelen) < 0)
670 return -1;
671 if (slicelen < needed) {
672 /*
673 0 lo hi old_size
674 | |<-avail->|<-----tomove------>|
675 | |<----needed---->|<-----tomove------>|
676 0 lo new_hi new_size
677 */
678 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
679 Py_SIZE(self) - start - needed);
680 }
681 }
682
683 if (needed > 0)
684 memcpy(self->ob_bytes + start, bytes, needed);
685
686 return 0;
687 }
688 else {
689 if (needed == 0) {
690 /* Delete slice */
691 Py_ssize_t cur, i;
692
693 if (step < 0) {
694 stop = start + 1;
695 start = stop + step * (slicelen - 1) - 1;
696 step = -step;
697 }
698 for (cur = start, i = 0;
699 i < slicelen; cur += step, i++) {
700 Py_ssize_t lim = step - 1;
701
702 if (cur + step >= PyByteArray_GET_SIZE(self))
703 lim = PyByteArray_GET_SIZE(self) - cur - 1;
704
705 memmove(self->ob_bytes + cur - i,
706 self->ob_bytes + cur + 1, lim);
707 }
708 /* Move the tail of the bytes, in one chunk */
709 cur = start + slicelen*step;
710 if (cur < PyByteArray_GET_SIZE(self)) {
711 memmove(self->ob_bytes + cur - slicelen,
712 self->ob_bytes + cur,
713 PyByteArray_GET_SIZE(self) - cur);
714 }
715 if (PyByteArray_Resize((PyObject *)self,
716 PyByteArray_GET_SIZE(self) - slicelen) < 0)
717 return -1;
718
719 return 0;
720 }
721 else {
722 /* Assign slice */
723 Py_ssize_t cur, i;
724
725 if (needed != slicelen) {
726 PyErr_Format(PyExc_ValueError,
727 "attempt to assign bytes of size %zd "
728 "to extended slice of size %zd",
729 needed, slicelen);
730 return -1;
731 }
732 for (cur = start, i = 0; i < slicelen; cur += step, i++)
733 self->ob_bytes[cur] = bytes[i];
734 return 0;
735 }
736 }
737}
738
739static int
740bytes_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
741{
742 static char *kwlist[] = {"source", "encoding", "errors", 0};
743 PyObject *arg = NULL;
744 const char *encoding = NULL;
745 const char *errors = NULL;
746 Py_ssize_t count;
747 PyObject *it;
748 PyObject *(*iternext)(PyObject *);
749
750 if (Py_SIZE(self) != 0) {
751 /* Empty previous contents (yes, do this first of all!) */
752 if (PyByteArray_Resize((PyObject *)self, 0) < 0)
753 return -1;
754 }
755
756 /* Parse arguments */
757 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
758 &arg, &encoding, &errors))
759 return -1;
760
761 /* Make a quick exit if no first argument */
762 if (arg == NULL) {
763 if (encoding != NULL || errors != NULL) {
764 PyErr_SetString(PyExc_TypeError,
765 "encoding or errors without sequence argument");
766 return -1;
767 }
768 return 0;
769 }
770
771 if (PyBytes_Check(arg)) {
772 PyObject *new, *encoded;
773 if (encoding != NULL) {
774 encoded = PyCodec_Encode(arg, encoding, errors);
775 if (encoded == NULL)
776 return -1;
777 assert(PyBytes_Check(encoded));
778 }
779 else {
780 encoded = arg;
781 Py_INCREF(arg);
782 }
783 new = bytes_iconcat(self, arg);
784 Py_DECREF(encoded);
785 if (new == NULL)
786 return -1;
787 Py_DECREF(new);
788 return 0;
789 }
790
791 if (PyUnicode_Check(arg)) {
792 /* Encode via the codec registry */
793 PyObject *encoded, *new;
794 if (encoding == NULL) {
795 PyErr_SetString(PyExc_TypeError,
796 "unicode argument without an encoding");
797 return -1;
798 }
799 encoded = PyCodec_Encode(arg, encoding, errors);
800 if (encoded == NULL)
801 return -1;
802 assert(PyBytes_Check(encoded));
803 new = bytes_iconcat(self, encoded);
804 Py_DECREF(encoded);
805 if (new == NULL)
806 return -1;
807 Py_DECREF(new);
808 return 0;
809 }
810
811 /* If it's not unicode, there can't be encoding or errors */
812 if (encoding != NULL || errors != NULL) {
813 PyErr_SetString(PyExc_TypeError,
814 "encoding or errors without a string argument");
815 return -1;
816 }
817
818 /* Is it an int? */
819 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
820 if (count == -1 && PyErr_Occurred())
821 PyErr_Clear();
822 else {
823 if (count < 0) {
824 PyErr_SetString(PyExc_ValueError, "negative count");
825 return -1;
826 }
827 if (count > 0) {
828 if (PyByteArray_Resize((PyObject *)self, count))
829 return -1;
830 memset(self->ob_bytes, 0, count);
831 }
832 return 0;
833 }
834
835 /* Use the buffer API */
836 if (PyObject_CheckBuffer(arg)) {
837 Py_ssize_t size;
838 Py_buffer view;
839 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
840 return -1;
841 size = view.len;
842 if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail;
843 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
844 goto fail;
845 PyObject_ReleaseBuffer(arg, &view);
846 return 0;
847 fail:
848 PyObject_ReleaseBuffer(arg, &view);
849 return -1;
850 }
851
852 /* XXX Optimize this if the arguments is a list, tuple */
853
854 /* Get the iterator */
855 it = PyObject_GetIter(arg);
856 if (it == NULL)
857 return -1;
858 iternext = *Py_TYPE(it)->tp_iternext;
859
860 /* Run the iterator to exhaustion */
861 for (;;) {
862 PyObject *item;
Georg Brandl3e758462008-07-16 23:10:05 +0000863 int rc, value;
Christian Heimes44720832008-05-26 13:01:01 +0000864
865 /* Get the next item */
866 item = iternext(it);
867 if (item == NULL) {
868 if (PyErr_Occurred()) {
869 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
870 goto error;
871 PyErr_Clear();
872 }
873 break;
874 }
875
876 /* Interpret it as an int (__index__) */
Georg Brandl3e758462008-07-16 23:10:05 +0000877 rc = _getbytevalue(item, &value);
Christian Heimes44720832008-05-26 13:01:01 +0000878 Py_DECREF(item);
Georg Brandl3e758462008-07-16 23:10:05 +0000879 if (!rc)
Christian Heimes44720832008-05-26 13:01:01 +0000880 goto error;
881
Christian Heimes44720832008-05-26 13:01:01 +0000882 /* Append the byte */
883 if (Py_SIZE(self) < self->ob_alloc)
884 Py_SIZE(self)++;
885 else if (PyByteArray_Resize((PyObject *)self, Py_SIZE(self)+1) < 0)
886 goto error;
887 self->ob_bytes[Py_SIZE(self)-1] = value;
888 }
889
890 /* Clean up and return success */
891 Py_DECREF(it);
892 return 0;
893
894 error:
895 /* Error handling when it != NULL */
896 Py_DECREF(it);
897 return -1;
898}
899
900/* Mostly copied from string_repr, but without the
901 "smart quote" functionality. */
902static PyObject *
903bytes_repr(PyByteArrayObject *self)
904{
905 static const char *hexdigits = "0123456789abcdef";
906 const char *quote_prefix = "bytearray(b";
907 const char *quote_postfix = ")";
908 Py_ssize_t length = Py_SIZE(self);
909 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
910 size_t newsize = 14 + 4 * length;
911 PyObject *v;
912 if (newsize > PY_SSIZE_T_MAX || newsize / 4 - 3 != length) {
913 PyErr_SetString(PyExc_OverflowError,
914 "bytearray object is too large to make repr");
915 return NULL;
916 }
917 v = PyUnicode_FromUnicode(NULL, newsize);
918 if (v == NULL) {
919 return NULL;
920 }
921 else {
922 register Py_ssize_t i;
923 register Py_UNICODE c;
924 register Py_UNICODE *p;
925 int quote;
926
927 /* Figure out which quote to use; single is preferred */
928 quote = '\'';
929 {
930 char *test, *start;
931 start = PyByteArray_AS_STRING(self);
932 for (test = start; test < start+length; ++test) {
933 if (*test == '"') {
934 quote = '\''; /* back to single */
935 goto decided;
936 }
937 else if (*test == '\'')
938 quote = '"';
939 }
940 decided:
941 ;
942 }
943
944 p = PyUnicode_AS_UNICODE(v);
945 while (*quote_prefix)
946 *p++ = *quote_prefix++;
947 *p++ = quote;
948
949 for (i = 0; i < length; i++) {
950 /* There's at least enough room for a hex escape
951 and a closing quote. */
952 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
953 c = self->ob_bytes[i];
954 if (c == '\'' || c == '\\')
955 *p++ = '\\', *p++ = c;
956 else if (c == '\t')
957 *p++ = '\\', *p++ = 't';
958 else if (c == '\n')
959 *p++ = '\\', *p++ = 'n';
960 else if (c == '\r')
961 *p++ = '\\', *p++ = 'r';
962 else if (c == 0)
963 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
964 else if (c < ' ' || c >= 0x7f) {
965 *p++ = '\\';
966 *p++ = 'x';
967 *p++ = hexdigits[(c & 0xf0) >> 4];
968 *p++ = hexdigits[c & 0xf];
969 }
970 else
971 *p++ = c;
972 }
973 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
974 *p++ = quote;
975 while (*quote_postfix) {
976 *p++ = *quote_postfix++;
977 }
978 *p = '\0';
979 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
980 Py_DECREF(v);
981 return NULL;
982 }
983 return v;
984 }
985}
986
987static PyObject *
988bytes_str(PyObject *op)
989{
990#if 0
991 if (Py_BytesWarningFlag) {
992 if (PyErr_WarnEx(PyExc_BytesWarning,
993 "str() on a bytearray instance", 1))
994 return NULL;
995 }
996 return bytes_repr((PyByteArrayObject*)op);
997#endif
998 return PyBytes_FromStringAndSize(((PyByteArrayObject*)op)->ob_bytes, Py_SIZE(op));
999}
1000
1001static PyObject *
1002bytes_richcompare(PyObject *self, PyObject *other, int op)
1003{
1004 Py_ssize_t self_size, other_size;
1005 Py_buffer self_bytes, other_bytes;
1006 PyObject *res;
1007 Py_ssize_t minsize;
1008 int cmp;
1009
1010 /* Bytes can be compared to anything that supports the (binary)
1011 buffer API. Except that a comparison with Unicode is always an
1012 error, even if the comparison is for equality. */
1013 if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
1014 PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
1015 if (Py_BytesWarningFlag && op == Py_EQ) {
1016 if (PyErr_WarnEx(PyExc_BytesWarning,
1017 "Comparsion between bytearray and string", 1))
1018 return NULL;
1019 }
1020
1021 Py_INCREF(Py_NotImplemented);
1022 return Py_NotImplemented;
1023 }
1024
1025 self_size = _getbuffer(self, &self_bytes);
1026 if (self_size < 0) {
1027 PyErr_Clear();
1028 Py_INCREF(Py_NotImplemented);
1029 return Py_NotImplemented;
1030 }
1031
1032 other_size = _getbuffer(other, &other_bytes);
1033 if (other_size < 0) {
1034 PyErr_Clear();
1035 PyObject_ReleaseBuffer(self, &self_bytes);
1036 Py_INCREF(Py_NotImplemented);
1037 return Py_NotImplemented;
1038 }
1039
1040 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
1041 /* Shortcut: if the lengths differ, the objects differ */
1042 cmp = (op == Py_NE);
1043 }
1044 else {
1045 minsize = self_size;
1046 if (other_size < minsize)
1047 minsize = other_size;
1048
1049 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
1050 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1051
1052 if (cmp == 0) {
1053 if (self_size < other_size)
1054 cmp = -1;
1055 else if (self_size > other_size)
1056 cmp = 1;
1057 }
1058
1059 switch (op) {
1060 case Py_LT: cmp = cmp < 0; break;
1061 case Py_LE: cmp = cmp <= 0; break;
1062 case Py_EQ: cmp = cmp == 0; break;
1063 case Py_NE: cmp = cmp != 0; break;
1064 case Py_GT: cmp = cmp > 0; break;
1065 case Py_GE: cmp = cmp >= 0; break;
1066 }
1067 }
1068
1069 res = cmp ? Py_True : Py_False;
1070 PyObject_ReleaseBuffer(self, &self_bytes);
1071 PyObject_ReleaseBuffer(other, &other_bytes);
1072 Py_INCREF(res);
1073 return res;
1074}
1075
1076static void
1077bytes_dealloc(PyByteArrayObject *self)
1078{
1079 if (self->ob_bytes != 0) {
1080 PyMem_Free(self->ob_bytes);
1081 }
1082 Py_TYPE(self)->tp_free((PyObject *)self);
1083}
1084
1085
1086/* -------------------------------------------------------------------- */
1087/* Methods */
1088
1089#define STRINGLIB_CHAR char
1090#define STRINGLIB_CMP memcmp
1091#define STRINGLIB_LEN PyByteArray_GET_SIZE
1092#define STRINGLIB_STR PyByteArray_AS_STRING
1093#define STRINGLIB_NEW PyByteArray_FromStringAndSize
1094#define STRINGLIB_EMPTY nullbytes
1095#define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
1096#define STRINGLIB_MUTABLE 1
1097
1098#include "stringlib/fastsearch.h"
1099#include "stringlib/count.h"
1100#include "stringlib/find.h"
1101#include "stringlib/partition.h"
1102#include "stringlib/ctype.h"
1103#include "stringlib/transmogrify.h"
1104
1105
1106/* The following Py_LOCAL_INLINE and Py_LOCAL functions
1107were copied from the old char* style string object. */
1108
1109Py_LOCAL_INLINE(void)
1110_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1111{
1112 if (*end > len)
1113 *end = len;
1114 else if (*end < 0)
1115 *end += len;
1116 if (*end < 0)
1117 *end = 0;
1118 if (*start < 0)
1119 *start += len;
1120 if (*start < 0)
1121 *start = 0;
1122}
1123
1124
1125Py_LOCAL_INLINE(Py_ssize_t)
1126bytes_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
1127{
1128 PyObject *subobj;
1129 Py_buffer subbuf;
1130 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1131 Py_ssize_t res;
1132
1133 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1134 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1135 return -2;
1136 if (_getbuffer(subobj, &subbuf) < 0)
1137 return -2;
1138 if (dir > 0)
1139 res = stringlib_find_slice(
1140 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1141 subbuf.buf, subbuf.len, start, end);
1142 else
1143 res = stringlib_rfind_slice(
1144 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1145 subbuf.buf, subbuf.len, start, end);
1146 PyObject_ReleaseBuffer(subobj, &subbuf);
1147 return res;
1148}
1149
1150PyDoc_STRVAR(find__doc__,
1151"B.find(sub [,start [,end]]) -> int\n\
1152\n\
1153Return the lowest index in B where subsection sub is found,\n\
1154such that sub is contained within s[start,end]. Optional\n\
1155arguments start and end are interpreted as in slice notation.\n\
1156\n\
1157Return -1 on failure.");
1158
1159static PyObject *
1160bytes_find(PyByteArrayObject *self, PyObject *args)
1161{
1162 Py_ssize_t result = bytes_find_internal(self, args, +1);
1163 if (result == -2)
1164 return NULL;
1165 return PyInt_FromSsize_t(result);
1166}
1167
1168PyDoc_STRVAR(count__doc__,
1169"B.count(sub [,start [,end]]) -> int\n\
1170\n\
1171Return the number of non-overlapping occurrences of subsection sub in\n\
1172bytes B[start:end]. Optional arguments start and end are interpreted\n\
1173as in slice notation.");
1174
1175static PyObject *
1176bytes_count(PyByteArrayObject *self, PyObject *args)
1177{
1178 PyObject *sub_obj;
1179 const char *str = PyByteArray_AS_STRING(self);
1180 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1181 Py_buffer vsub;
1182 PyObject *count_obj;
1183
1184 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1185 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1186 return NULL;
1187
1188 if (_getbuffer(sub_obj, &vsub) < 0)
1189 return NULL;
1190
1191 _adjust_indices(&start, &end, PyByteArray_GET_SIZE(self));
1192
1193 count_obj = PyInt_FromSsize_t(
1194 stringlib_count(str + start, end - start, vsub.buf, vsub.len)
1195 );
1196 PyObject_ReleaseBuffer(sub_obj, &vsub);
1197 return count_obj;
1198}
1199
1200
1201PyDoc_STRVAR(index__doc__,
1202"B.index(sub [,start [,end]]) -> int\n\
1203\n\
1204Like B.find() but raise ValueError when the subsection is not found.");
1205
1206static PyObject *
1207bytes_index(PyByteArrayObject *self, PyObject *args)
1208{
1209 Py_ssize_t result = bytes_find_internal(self, args, +1);
1210 if (result == -2)
1211 return NULL;
1212 if (result == -1) {
1213 PyErr_SetString(PyExc_ValueError,
1214 "subsection not found");
1215 return NULL;
1216 }
1217 return PyInt_FromSsize_t(result);
1218}
1219
1220
1221PyDoc_STRVAR(rfind__doc__,
1222"B.rfind(sub [,start [,end]]) -> int\n\
1223\n\
1224Return the highest index in B where subsection sub is found,\n\
1225such that sub is contained within s[start,end]. Optional\n\
1226arguments start and end are interpreted as in slice notation.\n\
1227\n\
1228Return -1 on failure.");
1229
1230static PyObject *
1231bytes_rfind(PyByteArrayObject *self, PyObject *args)
1232{
1233 Py_ssize_t result = bytes_find_internal(self, args, -1);
1234 if (result == -2)
1235 return NULL;
1236 return PyInt_FromSsize_t(result);
1237}
1238
1239
1240PyDoc_STRVAR(rindex__doc__,
1241"B.rindex(sub [,start [,end]]) -> int\n\
1242\n\
1243Like B.rfind() but raise ValueError when the subsection is not found.");
1244
1245static PyObject *
1246bytes_rindex(PyByteArrayObject *self, PyObject *args)
1247{
1248 Py_ssize_t result = bytes_find_internal(self, args, -1);
1249 if (result == -2)
1250 return NULL;
1251 if (result == -1) {
1252 PyErr_SetString(PyExc_ValueError,
1253 "subsection not found");
1254 return NULL;
1255 }
1256 return PyInt_FromSsize_t(result);
1257}
1258
1259
1260static int
1261bytes_contains(PyObject *self, PyObject *arg)
1262{
1263 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1264 if (ival == -1 && PyErr_Occurred()) {
1265 Py_buffer varg;
1266 int pos;
1267 PyErr_Clear();
1268 if (_getbuffer(arg, &varg) < 0)
1269 return -1;
1270 pos = stringlib_find(PyByteArray_AS_STRING(self), Py_SIZE(self),
1271 varg.buf, varg.len, 0);
1272 PyObject_ReleaseBuffer(arg, &varg);
1273 return pos >= 0;
1274 }
1275 if (ival < 0 || ival >= 256) {
1276 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1277 return -1;
1278 }
1279
1280 return memchr(PyByteArray_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
1281}
1282
1283
1284/* Matches the end (direction >= 0) or start (direction < 0) of self
1285 * against substr, using the start and end arguments. Returns
1286 * -1 on error, 0 if not found and 1 if found.
1287 */
1288Py_LOCAL(int)
1289_bytes_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start,
1290 Py_ssize_t end, int direction)
1291{
1292 Py_ssize_t len = PyByteArray_GET_SIZE(self);
1293 const char* str;
1294 Py_buffer vsubstr;
1295 int rv = 0;
1296
1297 str = PyByteArray_AS_STRING(self);
1298
1299 if (_getbuffer(substr, &vsubstr) < 0)
1300 return -1;
1301
1302 _adjust_indices(&start, &end, len);
1303
1304 if (direction < 0) {
1305 /* startswith */
1306 if (start+vsubstr.len > len) {
1307 goto done;
1308 }
1309 } else {
1310 /* endswith */
1311 if (end-start < vsubstr.len || start > len) {
1312 goto done;
1313 }
1314
1315 if (end-vsubstr.len > start)
1316 start = end - vsubstr.len;
1317 }
1318 if (end-start >= vsubstr.len)
1319 rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1320
1321done:
1322 PyObject_ReleaseBuffer(substr, &vsubstr);
1323 return rv;
1324}
1325
1326
1327PyDoc_STRVAR(startswith__doc__,
1328"B.startswith(prefix [,start [,end]]) -> bool\n\
1329\n\
1330Return True if B starts with the specified prefix, False otherwise.\n\
1331With optional start, test B beginning at that position.\n\
1332With optional end, stop comparing B at that position.\n\
1333prefix can also be a tuple of strings to try.");
1334
1335static PyObject *
1336bytes_startswith(PyByteArrayObject *self, PyObject *args)
1337{
1338 Py_ssize_t start = 0;
1339 Py_ssize_t end = PY_SSIZE_T_MAX;
1340 PyObject *subobj;
1341 int result;
1342
1343 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1344 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1345 return NULL;
1346 if (PyTuple_Check(subobj)) {
1347 Py_ssize_t i;
1348 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1349 result = _bytes_tailmatch(self,
1350 PyTuple_GET_ITEM(subobj, i),
1351 start, end, -1);
1352 if (result == -1)
1353 return NULL;
1354 else if (result) {
1355 Py_RETURN_TRUE;
1356 }
1357 }
1358 Py_RETURN_FALSE;
1359 }
1360 result = _bytes_tailmatch(self, subobj, start, end, -1);
1361 if (result == -1)
1362 return NULL;
1363 else
1364 return PyBool_FromLong(result);
1365}
1366
1367PyDoc_STRVAR(endswith__doc__,
1368"B.endswith(suffix [,start [,end]]) -> bool\n\
1369\n\
1370Return True if B ends with the specified suffix, False otherwise.\n\
1371With optional start, test B beginning at that position.\n\
1372With optional end, stop comparing B at that position.\n\
1373suffix can also be a tuple of strings to try.");
1374
1375static PyObject *
1376bytes_endswith(PyByteArrayObject *self, PyObject *args)
1377{
1378 Py_ssize_t start = 0;
1379 Py_ssize_t end = PY_SSIZE_T_MAX;
1380 PyObject *subobj;
1381 int result;
1382
1383 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1384 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1385 return NULL;
1386 if (PyTuple_Check(subobj)) {
1387 Py_ssize_t i;
1388 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1389 result = _bytes_tailmatch(self,
1390 PyTuple_GET_ITEM(subobj, i),
1391 start, end, +1);
1392 if (result == -1)
1393 return NULL;
1394 else if (result) {
1395 Py_RETURN_TRUE;
1396 }
1397 }
1398 Py_RETURN_FALSE;
1399 }
1400 result = _bytes_tailmatch(self, subobj, start, end, +1);
1401 if (result == -1)
1402 return NULL;
1403 else
1404 return PyBool_FromLong(result);
1405}
1406
1407
1408PyDoc_STRVAR(translate__doc__,
1409"B.translate(table[, deletechars]) -> bytearray\n\
1410\n\
1411Return a copy of B, where all characters occurring in the\n\
1412optional argument deletechars are removed, and the remaining\n\
1413characters have been mapped through the given translation\n\
1414table, which must be a bytes object of length 256.");
1415
1416static PyObject *
1417bytes_translate(PyByteArrayObject *self, PyObject *args)
1418{
1419 register char *input, *output;
1420 register const char *table;
1421 register Py_ssize_t i, c, changed = 0;
1422 PyObject *input_obj = (PyObject*)self;
1423 const char *output_start;
1424 Py_ssize_t inlen;
1425 PyObject *result;
1426 int trans_table[256];
1427 PyObject *tableobj, *delobj = NULL;
1428 Py_buffer vtable, vdel;
1429
1430 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1431 &tableobj, &delobj))
1432 return NULL;
1433
1434 if (_getbuffer(tableobj, &vtable) < 0)
1435 return NULL;
1436
1437 if (vtable.len != 256) {
1438 PyErr_SetString(PyExc_ValueError,
1439 "translation table must be 256 characters long");
1440 result = NULL;
1441 goto done;
1442 }
1443
1444 if (delobj != NULL) {
1445 if (_getbuffer(delobj, &vdel) < 0) {
1446 result = NULL;
1447 goto done;
1448 }
1449 }
1450 else {
1451 vdel.buf = NULL;
1452 vdel.len = 0;
1453 }
1454
1455 table = (const char *)vtable.buf;
1456 inlen = PyByteArray_GET_SIZE(input_obj);
1457 result = PyByteArray_FromStringAndSize((char *)NULL, inlen);
1458 if (result == NULL)
1459 goto done;
1460 output_start = output = PyByteArray_AsString(result);
1461 input = PyByteArray_AS_STRING(input_obj);
1462
1463 if (vdel.len == 0) {
1464 /* If no deletions are required, use faster code */
1465 for (i = inlen; --i >= 0; ) {
1466 c = Py_CHARMASK(*input++);
1467 if (Py_CHARMASK((*output++ = table[c])) != c)
1468 changed = 1;
1469 }
1470 if (changed || !PyByteArray_CheckExact(input_obj))
1471 goto done;
1472 Py_DECREF(result);
1473 Py_INCREF(input_obj);
1474 result = input_obj;
1475 goto done;
1476 }
1477
1478 for (i = 0; i < 256; i++)
1479 trans_table[i] = Py_CHARMASK(table[i]);
1480
1481 for (i = 0; i < vdel.len; i++)
1482 trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
1483
1484 for (i = inlen; --i >= 0; ) {
1485 c = Py_CHARMASK(*input++);
1486 if (trans_table[c] != -1)
1487 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1488 continue;
1489 changed = 1;
1490 }
1491 if (!changed && PyByteArray_CheckExact(input_obj)) {
1492 Py_DECREF(result);
1493 Py_INCREF(input_obj);
1494 result = input_obj;
1495 goto done;
1496 }
1497 /* Fix the size of the resulting string */
1498 if (inlen > 0)
1499 PyByteArray_Resize(result, output - output_start);
1500
1501done:
1502 PyObject_ReleaseBuffer(tableobj, &vtable);
1503 if (delobj != NULL)
1504 PyObject_ReleaseBuffer(delobj, &vdel);
1505 return result;
1506}
1507
1508
1509#define FORWARD 1
1510#define REVERSE -1
1511
1512/* find and count characters and substrings */
1513
1514#define findchar(target, target_len, c) \
1515 ((char *)memchr((const void *)(target), c, target_len))
1516
1517/* Don't call if length < 2 */
1518#define Py_STRING_MATCH(target, offset, pattern, length) \
1519 (target[offset] == pattern[0] && \
1520 target[offset+length-1] == pattern[length-1] && \
1521 !memcmp(target+offset+1, pattern+1, length-2) )
1522
1523
1524/* Bytes ops must return a string. */
1525/* If the object is subclass of bytes, create a copy */
1526Py_LOCAL(PyByteArrayObject *)
1527return_self(PyByteArrayObject *self)
1528{
1529 if (PyByteArray_CheckExact(self)) {
1530 Py_INCREF(self);
1531 return (PyByteArrayObject *)self;
1532 }
1533 return (PyByteArrayObject *)PyByteArray_FromStringAndSize(
1534 PyByteArray_AS_STRING(self),
1535 PyByteArray_GET_SIZE(self));
1536}
1537
1538Py_LOCAL_INLINE(Py_ssize_t)
1539countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
1540{
1541 Py_ssize_t count=0;
1542 const char *start=target;
1543 const char *end=target+target_len;
1544
1545 while ( (start=findchar(start, end-start, c)) != NULL ) {
1546 count++;
1547 if (count >= maxcount)
1548 break;
1549 start += 1;
1550 }
1551 return count;
1552}
1553
1554Py_LOCAL(Py_ssize_t)
1555findstring(const char *target, Py_ssize_t target_len,
1556 const char *pattern, Py_ssize_t pattern_len,
1557 Py_ssize_t start,
1558 Py_ssize_t end,
1559 int direction)
1560{
1561 if (start < 0) {
1562 start += target_len;
1563 if (start < 0)
1564 start = 0;
1565 }
1566 if (end > target_len) {
1567 end = target_len;
1568 } else if (end < 0) {
1569 end += target_len;
1570 if (end < 0)
1571 end = 0;
1572 }
1573
1574 /* zero-length substrings always match at the first attempt */
1575 if (pattern_len == 0)
1576 return (direction > 0) ? start : end;
1577
1578 end -= pattern_len;
1579
1580 if (direction < 0) {
1581 for (; end >= start; end--)
1582 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1583 return end;
1584 } else {
1585 for (; start <= end; start++)
1586 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1587 return start;
1588 }
1589 return -1;
1590}
1591
1592Py_LOCAL_INLINE(Py_ssize_t)
1593countstring(const char *target, Py_ssize_t target_len,
1594 const char *pattern, Py_ssize_t pattern_len,
1595 Py_ssize_t start,
1596 Py_ssize_t end,
1597 int direction, Py_ssize_t maxcount)
1598{
1599 Py_ssize_t count=0;
1600
1601 if (start < 0) {
1602 start += target_len;
1603 if (start < 0)
1604 start = 0;
1605 }
1606 if (end > target_len) {
1607 end = target_len;
1608 } else if (end < 0) {
1609 end += target_len;
1610 if (end < 0)
1611 end = 0;
1612 }
1613
1614 /* zero-length substrings match everywhere */
1615 if (pattern_len == 0 || maxcount == 0) {
1616 if (target_len+1 < maxcount)
1617 return target_len+1;
1618 return maxcount;
1619 }
1620
1621 end -= pattern_len;
1622 if (direction < 0) {
1623 for (; (end >= start); end--)
1624 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1625 count++;
1626 if (--maxcount <= 0) break;
1627 end -= pattern_len-1;
1628 }
1629 } else {
1630 for (; (start <= end); start++)
1631 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1632 count++;
1633 if (--maxcount <= 0)
1634 break;
1635 start += pattern_len-1;
1636 }
1637 }
1638 return count;
1639}
1640
1641
1642/* Algorithms for different cases of string replacement */
1643
1644/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1645Py_LOCAL(PyByteArrayObject *)
1646replace_interleave(PyByteArrayObject *self,
1647 const char *to_s, Py_ssize_t to_len,
1648 Py_ssize_t maxcount)
1649{
1650 char *self_s, *result_s;
1651 Py_ssize_t self_len, result_len;
1652 Py_ssize_t count, i, product;
1653 PyByteArrayObject *result;
1654
1655 self_len = PyByteArray_GET_SIZE(self);
1656
1657 /* 1 at the end plus 1 after every character */
1658 count = self_len+1;
1659 if (maxcount < count)
1660 count = maxcount;
1661
1662 /* Check for overflow */
1663 /* result_len = count * to_len + self_len; */
1664 product = count * to_len;
1665 if (product / to_len != count) {
1666 PyErr_SetString(PyExc_OverflowError,
1667 "replace string is too long");
1668 return NULL;
1669 }
1670 result_len = product + self_len;
1671 if (result_len < 0) {
1672 PyErr_SetString(PyExc_OverflowError,
1673 "replace string is too long");
1674 return NULL;
1675 }
1676
1677 if (! (result = (PyByteArrayObject *)
1678 PyByteArray_FromStringAndSize(NULL, result_len)) )
1679 return NULL;
1680
1681 self_s = PyByteArray_AS_STRING(self);
1682 result_s = PyByteArray_AS_STRING(result);
1683
1684 /* TODO: special case single character, which doesn't need memcpy */
1685
1686 /* Lay the first one down (guaranteed this will occur) */
1687 Py_MEMCPY(result_s, to_s, to_len);
1688 result_s += to_len;
1689 count -= 1;
1690
1691 for (i=0; i<count; i++) {
1692 *result_s++ = *self_s++;
1693 Py_MEMCPY(result_s, to_s, to_len);
1694 result_s += to_len;
1695 }
1696
1697 /* Copy the rest of the original string */
1698 Py_MEMCPY(result_s, self_s, self_len-i);
1699
1700 return result;
1701}
1702
1703/* Special case for deleting a single character */
1704/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1705Py_LOCAL(PyByteArrayObject *)
1706replace_delete_single_character(PyByteArrayObject *self,
1707 char from_c, Py_ssize_t maxcount)
1708{
1709 char *self_s, *result_s;
1710 char *start, *next, *end;
1711 Py_ssize_t self_len, result_len;
1712 Py_ssize_t count;
1713 PyByteArrayObject *result;
1714
1715 self_len = PyByteArray_GET_SIZE(self);
1716 self_s = PyByteArray_AS_STRING(self);
1717
1718 count = countchar(self_s, self_len, from_c, maxcount);
1719 if (count == 0) {
1720 return return_self(self);
1721 }
1722
1723 result_len = self_len - count; /* from_len == 1 */
1724 assert(result_len>=0);
1725
1726 if ( (result = (PyByteArrayObject *)
1727 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1728 return NULL;
1729 result_s = PyByteArray_AS_STRING(result);
1730
1731 start = self_s;
1732 end = self_s + self_len;
1733 while (count-- > 0) {
1734 next = findchar(start, end-start, from_c);
1735 if (next == NULL)
1736 break;
1737 Py_MEMCPY(result_s, start, next-start);
1738 result_s += (next-start);
1739 start = next+1;
1740 }
1741 Py_MEMCPY(result_s, start, end-start);
1742
1743 return result;
1744}
1745
1746/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1747
1748Py_LOCAL(PyByteArrayObject *)
1749replace_delete_substring(PyByteArrayObject *self,
1750 const char *from_s, Py_ssize_t from_len,
1751 Py_ssize_t maxcount)
1752{
1753 char *self_s, *result_s;
1754 char *start, *next, *end;
1755 Py_ssize_t self_len, result_len;
1756 Py_ssize_t count, offset;
1757 PyByteArrayObject *result;
1758
1759 self_len = PyByteArray_GET_SIZE(self);
1760 self_s = PyByteArray_AS_STRING(self);
1761
1762 count = countstring(self_s, self_len,
1763 from_s, from_len,
1764 0, self_len, 1,
1765 maxcount);
1766
1767 if (count == 0) {
1768 /* no matches */
1769 return return_self(self);
1770 }
1771
1772 result_len = self_len - (count * from_len);
1773 assert (result_len>=0);
1774
1775 if ( (result = (PyByteArrayObject *)
1776 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL )
1777 return NULL;
1778
1779 result_s = PyByteArray_AS_STRING(result);
1780
1781 start = self_s;
1782 end = self_s + self_len;
1783 while (count-- > 0) {
1784 offset = findstring(start, end-start,
1785 from_s, from_len,
1786 0, end-start, FORWARD);
1787 if (offset == -1)
1788 break;
1789 next = start + offset;
1790
1791 Py_MEMCPY(result_s, start, next-start);
1792
1793 result_s += (next-start);
1794 start = next+from_len;
1795 }
1796 Py_MEMCPY(result_s, start, end-start);
1797 return result;
1798}
1799
1800/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1801Py_LOCAL(PyByteArrayObject *)
1802replace_single_character_in_place(PyByteArrayObject *self,
1803 char from_c, char to_c,
1804 Py_ssize_t maxcount)
1805{
1806 char *self_s, *result_s, *start, *end, *next;
1807 Py_ssize_t self_len;
1808 PyByteArrayObject *result;
1809
1810 /* The result string will be the same size */
1811 self_s = PyByteArray_AS_STRING(self);
1812 self_len = PyByteArray_GET_SIZE(self);
1813
1814 next = findchar(self_s, self_len, from_c);
1815
1816 if (next == NULL) {
1817 /* No matches; return the original bytes */
1818 return return_self(self);
1819 }
1820
1821 /* Need to make a new bytes */
1822 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1823 if (result == NULL)
1824 return NULL;
1825 result_s = PyByteArray_AS_STRING(result);
1826 Py_MEMCPY(result_s, self_s, self_len);
1827
1828 /* change everything in-place, starting with this one */
1829 start = result_s + (next-self_s);
1830 *start = to_c;
1831 start++;
1832 end = result_s + self_len;
1833
1834 while (--maxcount > 0) {
1835 next = findchar(start, end-start, from_c);
1836 if (next == NULL)
1837 break;
1838 *next = to_c;
1839 start = next+1;
1840 }
1841
1842 return result;
1843}
1844
1845/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1846Py_LOCAL(PyByteArrayObject *)
1847replace_substring_in_place(PyByteArrayObject *self,
1848 const char *from_s, Py_ssize_t from_len,
1849 const char *to_s, Py_ssize_t to_len,
1850 Py_ssize_t maxcount)
1851{
1852 char *result_s, *start, *end;
1853 char *self_s;
1854 Py_ssize_t self_len, offset;
1855 PyByteArrayObject *result;
1856
1857 /* The result bytes will be the same size */
1858
1859 self_s = PyByteArray_AS_STRING(self);
1860 self_len = PyByteArray_GET_SIZE(self);
1861
1862 offset = findstring(self_s, self_len,
1863 from_s, from_len,
1864 0, self_len, FORWARD);
1865 if (offset == -1) {
1866 /* No matches; return the original bytes */
1867 return return_self(self);
1868 }
1869
1870 /* Need to make a new bytes */
1871 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1872 if (result == NULL)
1873 return NULL;
1874 result_s = PyByteArray_AS_STRING(result);
1875 Py_MEMCPY(result_s, self_s, self_len);
1876
1877 /* change everything in-place, starting with this one */
1878 start = result_s + offset;
1879 Py_MEMCPY(start, to_s, from_len);
1880 start += from_len;
1881 end = result_s + self_len;
1882
1883 while ( --maxcount > 0) {
1884 offset = findstring(start, end-start,
1885 from_s, from_len,
1886 0, end-start, FORWARD);
1887 if (offset==-1)
1888 break;
1889 Py_MEMCPY(start+offset, to_s, from_len);
1890 start += offset+from_len;
1891 }
1892
1893 return result;
1894}
1895
1896/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1897Py_LOCAL(PyByteArrayObject *)
1898replace_single_character(PyByteArrayObject *self,
1899 char from_c,
1900 const char *to_s, Py_ssize_t to_len,
1901 Py_ssize_t maxcount)
1902{
1903 char *self_s, *result_s;
1904 char *start, *next, *end;
1905 Py_ssize_t self_len, result_len;
1906 Py_ssize_t count, product;
1907 PyByteArrayObject *result;
1908
1909 self_s = PyByteArray_AS_STRING(self);
1910 self_len = PyByteArray_GET_SIZE(self);
1911
1912 count = countchar(self_s, self_len, from_c, maxcount);
1913 if (count == 0) {
1914 /* no matches, return unchanged */
1915 return return_self(self);
1916 }
1917
1918 /* use the difference between current and new, hence the "-1" */
1919 /* result_len = self_len + count * (to_len-1) */
1920 product = count * (to_len-1);
1921 if (product / (to_len-1) != count) {
1922 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1923 return NULL;
1924 }
1925 result_len = self_len + product;
1926 if (result_len < 0) {
1927 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1928 return NULL;
1929 }
1930
1931 if ( (result = (PyByteArrayObject *)
1932 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1933 return NULL;
1934 result_s = PyByteArray_AS_STRING(result);
1935
1936 start = self_s;
1937 end = self_s + self_len;
1938 while (count-- > 0) {
1939 next = findchar(start, end-start, from_c);
1940 if (next == NULL)
1941 break;
1942
1943 if (next == start) {
1944 /* replace with the 'to' */
1945 Py_MEMCPY(result_s, to_s, to_len);
1946 result_s += to_len;
1947 start += 1;
1948 } else {
1949 /* copy the unchanged old then the 'to' */
1950 Py_MEMCPY(result_s, start, next-start);
1951 result_s += (next-start);
1952 Py_MEMCPY(result_s, to_s, to_len);
1953 result_s += to_len;
1954 start = next+1;
1955 }
1956 }
1957 /* Copy the remainder of the remaining bytes */
1958 Py_MEMCPY(result_s, start, end-start);
1959
1960 return result;
1961}
1962
1963/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1964Py_LOCAL(PyByteArrayObject *)
1965replace_substring(PyByteArrayObject *self,
1966 const char *from_s, Py_ssize_t from_len,
1967 const char *to_s, Py_ssize_t to_len,
1968 Py_ssize_t maxcount)
1969{
1970 char *self_s, *result_s;
1971 char *start, *next, *end;
1972 Py_ssize_t self_len, result_len;
1973 Py_ssize_t count, offset, product;
1974 PyByteArrayObject *result;
1975
1976 self_s = PyByteArray_AS_STRING(self);
1977 self_len = PyByteArray_GET_SIZE(self);
1978
1979 count = countstring(self_s, self_len,
1980 from_s, from_len,
1981 0, self_len, FORWARD, maxcount);
1982 if (count == 0) {
1983 /* no matches, return unchanged */
1984 return return_self(self);
1985 }
1986
1987 /* Check for overflow */
1988 /* result_len = self_len + count * (to_len-from_len) */
1989 product = count * (to_len-from_len);
1990 if (product / (to_len-from_len) != count) {
1991 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1992 return NULL;
1993 }
1994 result_len = self_len + product;
1995 if (result_len < 0) {
1996 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1997 return NULL;
1998 }
1999
2000 if ( (result = (PyByteArrayObject *)
2001 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
2002 return NULL;
2003 result_s = PyByteArray_AS_STRING(result);
2004
2005 start = self_s;
2006 end = self_s + self_len;
2007 while (count-- > 0) {
2008 offset = findstring(start, end-start,
2009 from_s, from_len,
2010 0, end-start, FORWARD);
2011 if (offset == -1)
2012 break;
2013 next = start+offset;
2014 if (next == start) {
2015 /* replace with the 'to' */
2016 Py_MEMCPY(result_s, to_s, to_len);
2017 result_s += to_len;
2018 start += from_len;
2019 } else {
2020 /* copy the unchanged old then the 'to' */
2021 Py_MEMCPY(result_s, start, next-start);
2022 result_s += (next-start);
2023 Py_MEMCPY(result_s, to_s, to_len);
2024 result_s += to_len;
2025 start = next+from_len;
2026 }
2027 }
2028 /* Copy the remainder of the remaining bytes */
2029 Py_MEMCPY(result_s, start, end-start);
2030
2031 return result;
2032}
2033
2034
2035Py_LOCAL(PyByteArrayObject *)
2036replace(PyByteArrayObject *self,
2037 const char *from_s, Py_ssize_t from_len,
2038 const char *to_s, Py_ssize_t to_len,
2039 Py_ssize_t maxcount)
2040{
2041 if (maxcount < 0) {
2042 maxcount = PY_SSIZE_T_MAX;
2043 } else if (maxcount == 0 || PyByteArray_GET_SIZE(self) == 0) {
2044 /* nothing to do; return the original bytes */
2045 return return_self(self);
2046 }
2047
2048 if (maxcount == 0 ||
2049 (from_len == 0 && to_len == 0)) {
2050 /* nothing to do; return the original bytes */
2051 return return_self(self);
2052 }
2053
2054 /* Handle zero-length special cases */
2055
2056 if (from_len == 0) {
2057 /* insert the 'to' bytes everywhere. */
2058 /* >>> "Python".replace("", ".") */
2059 /* '.P.y.t.h.o.n.' */
2060 return replace_interleave(self, to_s, to_len, maxcount);
2061 }
2062
2063 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2064 /* point for an empty self bytes to generate a non-empty bytes */
2065 /* Special case so the remaining code always gets a non-empty bytes */
2066 if (PyByteArray_GET_SIZE(self) == 0) {
2067 return return_self(self);
2068 }
2069
2070 if (to_len == 0) {
2071 /* delete all occurances of 'from' bytes */
2072 if (from_len == 1) {
2073 return replace_delete_single_character(
2074 self, from_s[0], maxcount);
2075 } else {
2076 return replace_delete_substring(self, from_s, from_len, maxcount);
2077 }
2078 }
2079
2080 /* Handle special case where both bytes have the same length */
2081
2082 if (from_len == to_len) {
2083 if (from_len == 1) {
2084 return replace_single_character_in_place(
2085 self,
2086 from_s[0],
2087 to_s[0],
2088 maxcount);
2089 } else {
2090 return replace_substring_in_place(
2091 self, from_s, from_len, to_s, to_len, maxcount);
2092 }
2093 }
2094
2095 /* Otherwise use the more generic algorithms */
2096 if (from_len == 1) {
2097 return replace_single_character(self, from_s[0],
2098 to_s, to_len, maxcount);
2099 } else {
2100 /* len('from')>=2, len('to')>=1 */
2101 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2102 }
2103}
2104
2105
2106PyDoc_STRVAR(replace__doc__,
2107"B.replace(old, new[, count]) -> bytes\n\
2108\n\
2109Return a copy of B with all occurrences of subsection\n\
2110old replaced by new. If the optional argument count is\n\
2111given, only the first count occurrences are replaced.");
2112
2113static PyObject *
2114bytes_replace(PyByteArrayObject *self, PyObject *args)
2115{
2116 Py_ssize_t count = -1;
2117 PyObject *from, *to, *res;
2118 Py_buffer vfrom, vto;
2119
2120 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2121 return NULL;
2122
2123 if (_getbuffer(from, &vfrom) < 0)
2124 return NULL;
2125 if (_getbuffer(to, &vto) < 0) {
2126 PyObject_ReleaseBuffer(from, &vfrom);
2127 return NULL;
2128 }
2129
2130 res = (PyObject *)replace((PyByteArrayObject *) self,
2131 vfrom.buf, vfrom.len,
2132 vto.buf, vto.len, count);
2133
2134 PyObject_ReleaseBuffer(from, &vfrom);
2135 PyObject_ReleaseBuffer(to, &vto);
2136 return res;
2137}
2138
2139
2140/* Overallocate the initial list to reduce the number of reallocs for small
2141 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2142 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2143 text (roughly 11 words per line) and field delimited data (usually 1-10
2144 fields). For large strings the split algorithms are bandwidth limited
2145 so increasing the preallocation likely will not improve things.*/
2146
2147#define MAX_PREALLOC 12
2148
2149/* 5 splits gives 6 elements */
2150#define PREALLOC_SIZE(maxsplit) \
2151 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2152
2153#define SPLIT_APPEND(data, left, right) \
2154 str = PyByteArray_FromStringAndSize((data) + (left), \
2155 (right) - (left)); \
2156 if (str == NULL) \
2157 goto onError; \
2158 if (PyList_Append(list, str)) { \
2159 Py_DECREF(str); \
2160 goto onError; \
2161 } \
2162 else \
2163 Py_DECREF(str);
2164
2165#define SPLIT_ADD(data, left, right) { \
2166 str = PyByteArray_FromStringAndSize((data) + (left), \
2167 (right) - (left)); \
2168 if (str == NULL) \
2169 goto onError; \
2170 if (count < MAX_PREALLOC) { \
2171 PyList_SET_ITEM(list, count, str); \
2172 } else { \
2173 if (PyList_Append(list, str)) { \
2174 Py_DECREF(str); \
2175 goto onError; \
2176 } \
2177 else \
2178 Py_DECREF(str); \
2179 } \
2180 count++; }
2181
2182/* Always force the list to the expected size. */
2183#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
2184
2185
2186Py_LOCAL_INLINE(PyObject *)
2187split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2188{
2189 register Py_ssize_t i, j, count = 0;
2190 PyObject *str;
2191 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2192
2193 if (list == NULL)
2194 return NULL;
2195
2196 i = j = 0;
2197 while ((j < len) && (maxcount-- > 0)) {
2198 for(; j < len; j++) {
2199 /* I found that using memchr makes no difference */
2200 if (s[j] == ch) {
2201 SPLIT_ADD(s, i, j);
2202 i = j = j + 1;
2203 break;
2204 }
2205 }
2206 }
2207 if (i <= len) {
2208 SPLIT_ADD(s, i, len);
2209 }
2210 FIX_PREALLOC_SIZE(list);
2211 return list;
2212
2213 onError:
2214 Py_DECREF(list);
2215 return NULL;
2216}
2217
2218
2219Py_LOCAL_INLINE(PyObject *)
2220split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2221{
2222 register Py_ssize_t i, j, count = 0;
2223 PyObject *str;
2224 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2225
2226 if (list == NULL)
2227 return NULL;
2228
2229 for (i = j = 0; i < len; ) {
2230 /* find a token */
2231 while (i < len && ISSPACE(s[i]))
2232 i++;
2233 j = i;
2234 while (i < len && !ISSPACE(s[i]))
2235 i++;
2236 if (j < i) {
2237 if (maxcount-- <= 0)
2238 break;
2239 SPLIT_ADD(s, j, i);
2240 while (i < len && ISSPACE(s[i]))
2241 i++;
2242 j = i;
2243 }
2244 }
2245 if (j < len) {
2246 SPLIT_ADD(s, j, len);
2247 }
2248 FIX_PREALLOC_SIZE(list);
2249 return list;
2250
2251 onError:
2252 Py_DECREF(list);
2253 return NULL;
2254}
2255
2256PyDoc_STRVAR(split__doc__,
2257"B.split([sep[, maxsplit]]) -> list of bytearray\n\
2258\n\
2259Return a list of the sections in B, using sep as the delimiter.\n\
2260If sep is not given, B is split on ASCII whitespace characters\n\
2261(space, tab, return, newline, formfeed, vertical tab).\n\
2262If maxsplit is given, at most maxsplit splits are done.");
2263
2264static PyObject *
2265bytes_split(PyByteArrayObject *self, PyObject *args)
2266{
2267 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
2268 Py_ssize_t maxsplit = -1, count = 0;
2269 const char *s = PyByteArray_AS_STRING(self), *sub;
2270 PyObject *list, *str, *subobj = Py_None;
2271 Py_buffer vsub;
2272#ifdef USE_FAST
2273 Py_ssize_t pos;
2274#endif
2275
2276 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
2277 return NULL;
2278 if (maxsplit < 0)
2279 maxsplit = PY_SSIZE_T_MAX;
2280
2281 if (subobj == Py_None)
2282 return split_whitespace(s, len, maxsplit);
2283
2284 if (_getbuffer(subobj, &vsub) < 0)
2285 return NULL;
2286 sub = vsub.buf;
2287 n = vsub.len;
2288
2289 if (n == 0) {
2290 PyErr_SetString(PyExc_ValueError, "empty separator");
2291 PyObject_ReleaseBuffer(subobj, &vsub);
2292 return NULL;
2293 }
2294 if (n == 1)
2295 return split_char(s, len, sub[0], maxsplit);
2296
2297 list = PyList_New(PREALLOC_SIZE(maxsplit));
2298 if (list == NULL) {
2299 PyObject_ReleaseBuffer(subobj, &vsub);
2300 return NULL;
2301 }
2302
2303#ifdef USE_FAST
2304 i = j = 0;
2305 while (maxsplit-- > 0) {
2306 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2307 if (pos < 0)
2308 break;
2309 j = i+pos;
2310 SPLIT_ADD(s, i, j);
2311 i = j + n;
2312 }
2313#else
2314 i = j = 0;
2315 while ((j+n <= len) && (maxsplit-- > 0)) {
2316 for (; j+n <= len; j++) {
2317 if (Py_STRING_MATCH(s, j, sub, n)) {
2318 SPLIT_ADD(s, i, j);
2319 i = j = j + n;
2320 break;
2321 }
2322 }
2323 }
2324#endif
2325 SPLIT_ADD(s, i, len);
2326 FIX_PREALLOC_SIZE(list);
2327 PyObject_ReleaseBuffer(subobj, &vsub);
2328 return list;
2329
2330 onError:
2331 Py_DECREF(list);
2332 PyObject_ReleaseBuffer(subobj, &vsub);
2333 return NULL;
2334}
2335
2336/* stringlib's partition shares nullbytes in some cases.
2337 undo this, we don't want the nullbytes to be shared. */
2338static PyObject *
2339make_nullbytes_unique(PyObject *result)
2340{
2341 if (result != NULL) {
2342 int i;
2343 assert(PyTuple_Check(result));
2344 assert(PyTuple_GET_SIZE(result) == 3);
2345 for (i = 0; i < 3; i++) {
2346 if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) {
2347 PyObject *new = PyByteArray_FromStringAndSize(NULL, 0);
2348 if (new == NULL) {
2349 Py_DECREF(result);
2350 result = NULL;
2351 break;
2352 }
2353 Py_DECREF(nullbytes);
2354 PyTuple_SET_ITEM(result, i, new);
2355 }
2356 }
2357 }
2358 return result;
2359}
2360
2361PyDoc_STRVAR(partition__doc__,
2362"B.partition(sep) -> (head, sep, tail)\n\
2363\n\
2364Searches for the separator sep in B, and returns the part before it,\n\
2365the separator itself, and the part after it. If the separator is not\n\
2366found, returns B and two empty bytearray objects.");
2367
2368static PyObject *
2369bytes_partition(PyByteArrayObject *self, PyObject *sep_obj)
2370{
2371 PyObject *bytesep, *result;
2372
2373 bytesep = PyByteArray_FromObject(sep_obj);
2374 if (! bytesep)
2375 return NULL;
2376
2377 result = stringlib_partition(
2378 (PyObject*) self,
2379 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2380 bytesep,
2381 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2382 );
2383
2384 Py_DECREF(bytesep);
2385 return make_nullbytes_unique(result);
2386}
2387
2388PyDoc_STRVAR(rpartition__doc__,
2389"B.rpartition(sep) -> (tail, sep, head)\n\
2390\n\
2391Searches for the separator sep in B, starting at the end of B,\n\
2392and returns the part before it, the separator itself, and the\n\
2393part after it. If the separator is not found, returns two empty\n\
2394bytearray objects and B.");
2395
2396static PyObject *
2397bytes_rpartition(PyByteArrayObject *self, PyObject *sep_obj)
2398{
2399 PyObject *bytesep, *result;
2400
2401 bytesep = PyByteArray_FromObject(sep_obj);
2402 if (! bytesep)
2403 return NULL;
2404
2405 result = stringlib_rpartition(
2406 (PyObject*) self,
2407 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2408 bytesep,
2409 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2410 );
2411
2412 Py_DECREF(bytesep);
2413 return make_nullbytes_unique(result);
2414}
2415
2416Py_LOCAL_INLINE(PyObject *)
2417rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2418{
2419 register Py_ssize_t i, j, count=0;
2420 PyObject *str;
2421 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2422
2423 if (list == NULL)
2424 return NULL;
2425
2426 i = j = len - 1;
2427 while ((i >= 0) && (maxcount-- > 0)) {
2428 for (; i >= 0; i--) {
2429 if (s[i] == ch) {
2430 SPLIT_ADD(s, i + 1, j + 1);
2431 j = i = i - 1;
2432 break;
2433 }
2434 }
2435 }
2436 if (j >= -1) {
2437 SPLIT_ADD(s, 0, j + 1);
2438 }
2439 FIX_PREALLOC_SIZE(list);
2440 if (PyList_Reverse(list) < 0)
2441 goto onError;
2442
2443 return list;
2444
2445 onError:
2446 Py_DECREF(list);
2447 return NULL;
2448}
2449
2450Py_LOCAL_INLINE(PyObject *)
2451rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2452{
2453 register Py_ssize_t i, j, count = 0;
2454 PyObject *str;
2455 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2456
2457 if (list == NULL)
2458 return NULL;
2459
2460 for (i = j = len - 1; i >= 0; ) {
2461 /* find a token */
2462 while (i >= 0 && ISSPACE(s[i]))
2463 i--;
2464 j = i;
2465 while (i >= 0 && !ISSPACE(s[i]))
2466 i--;
2467 if (j > i) {
2468 if (maxcount-- <= 0)
2469 break;
2470 SPLIT_ADD(s, i + 1, j + 1);
2471 while (i >= 0 && ISSPACE(s[i]))
2472 i--;
2473 j = i;
2474 }
2475 }
2476 if (j >= 0) {
2477 SPLIT_ADD(s, 0, j + 1);
2478 }
2479 FIX_PREALLOC_SIZE(list);
2480 if (PyList_Reverse(list) < 0)
2481 goto onError;
2482
2483 return list;
2484
2485 onError:
2486 Py_DECREF(list);
2487 return NULL;
2488}
2489
2490PyDoc_STRVAR(rsplit__doc__,
2491"B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2492\n\
2493Return a list of the sections in B, using sep as the delimiter,\n\
2494starting at the end of B and working to the front.\n\
2495If sep is not given, B is split on ASCII whitespace characters\n\
2496(space, tab, return, newline, formfeed, vertical tab).\n\
2497If maxsplit is given, at most maxsplit splits are done.");
2498
2499static PyObject *
2500bytes_rsplit(PyByteArrayObject *self, PyObject *args)
2501{
2502 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
2503 Py_ssize_t maxsplit = -1, count = 0;
2504 const char *s = PyByteArray_AS_STRING(self), *sub;
2505 PyObject *list, *str, *subobj = Py_None;
2506 Py_buffer vsub;
2507
2508 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
2509 return NULL;
2510 if (maxsplit < 0)
2511 maxsplit = PY_SSIZE_T_MAX;
2512
2513 if (subobj == Py_None)
2514 return rsplit_whitespace(s, len, maxsplit);
2515
2516 if (_getbuffer(subobj, &vsub) < 0)
2517 return NULL;
2518 sub = vsub.buf;
2519 n = vsub.len;
2520
2521 if (n == 0) {
2522 PyErr_SetString(PyExc_ValueError, "empty separator");
2523 PyObject_ReleaseBuffer(subobj, &vsub);
2524 return NULL;
2525 }
2526 else if (n == 1)
2527 return rsplit_char(s, len, sub[0], maxsplit);
2528
2529 list = PyList_New(PREALLOC_SIZE(maxsplit));
2530 if (list == NULL) {
2531 PyObject_ReleaseBuffer(subobj, &vsub);
2532 return NULL;
2533 }
2534
2535 j = len;
2536 i = j - n;
2537
2538 while ( (i >= 0) && (maxsplit-- > 0) ) {
2539 for (; i>=0; i--) {
2540 if (Py_STRING_MATCH(s, i, sub, n)) {
2541 SPLIT_ADD(s, i + n, j);
2542 j = i;
2543 i -= n;
2544 break;
2545 }
2546 }
2547 }
2548 SPLIT_ADD(s, 0, j);
2549 FIX_PREALLOC_SIZE(list);
2550 if (PyList_Reverse(list) < 0)
2551 goto onError;
2552 PyObject_ReleaseBuffer(subobj, &vsub);
2553 return list;
2554
2555onError:
2556 Py_DECREF(list);
2557 PyObject_ReleaseBuffer(subobj, &vsub);
2558 return NULL;
2559}
2560
2561PyDoc_STRVAR(reverse__doc__,
2562"B.reverse() -> None\n\
2563\n\
2564Reverse the order of the values in B in place.");
2565static PyObject *
2566bytes_reverse(PyByteArrayObject *self, PyObject *unused)
2567{
2568 char swap, *head, *tail;
2569 Py_ssize_t i, j, n = Py_SIZE(self);
2570
2571 j = n / 2;
2572 head = self->ob_bytes;
2573 tail = head + n - 1;
2574 for (i = 0; i < j; i++) {
2575 swap = *head;
2576 *head++ = *tail;
2577 *tail-- = swap;
2578 }
2579
2580 Py_RETURN_NONE;
2581}
2582
2583PyDoc_STRVAR(insert__doc__,
2584"B.insert(index, int) -> None\n\
2585\n\
2586Insert a single item into the bytearray before the given index.");
2587static PyObject *
2588bytes_insert(PyByteArrayObject *self, PyObject *args)
2589{
Georg Brandl3e483f62008-07-16 22:57:41 +00002590 PyObject *value;
2591 int ival;
Christian Heimes44720832008-05-26 13:01:01 +00002592 Py_ssize_t where, n = Py_SIZE(self);
2593
Georg Brandl3e483f62008-07-16 22:57:41 +00002594 if (!PyArg_ParseTuple(args, "nO:insert", &where, &value))
Christian Heimes44720832008-05-26 13:01:01 +00002595 return NULL;
2596
2597 if (n == PY_SSIZE_T_MAX) {
2598 PyErr_SetString(PyExc_OverflowError,
2599 "cannot add more objects to bytes");
2600 return NULL;
2601 }
Georg Brandl3e483f62008-07-16 22:57:41 +00002602 if (!_getbytevalue(value, &ival))
Christian Heimes44720832008-05-26 13:01:01 +00002603 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002604 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2605 return NULL;
2606
2607 if (where < 0) {
2608 where += n;
2609 if (where < 0)
2610 where = 0;
2611 }
2612 if (where > n)
2613 where = n;
2614 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
Georg Brandl3e483f62008-07-16 22:57:41 +00002615 self->ob_bytes[where] = ival;
Christian Heimes44720832008-05-26 13:01:01 +00002616
2617 Py_RETURN_NONE;
2618}
2619
2620PyDoc_STRVAR(append__doc__,
2621"B.append(int) -> None\n\
2622\n\
2623Append a single item to the end of B.");
2624static PyObject *
2625bytes_append(PyByteArrayObject *self, PyObject *arg)
2626{
2627 int value;
2628 Py_ssize_t n = Py_SIZE(self);
2629
2630 if (! _getbytevalue(arg, &value))
2631 return NULL;
2632 if (n == PY_SSIZE_T_MAX) {
2633 PyErr_SetString(PyExc_OverflowError,
2634 "cannot add more objects to bytes");
2635 return NULL;
2636 }
2637 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2638 return NULL;
2639
2640 self->ob_bytes[n] = value;
2641
2642 Py_RETURN_NONE;
2643}
2644
2645PyDoc_STRVAR(extend__doc__,
2646"B.extend(iterable int) -> None\n\
2647\n\
2648Append all the elements from the iterator or sequence to the\n\
2649end of B.");
2650static PyObject *
2651bytes_extend(PyByteArrayObject *self, PyObject *arg)
2652{
2653 PyObject *it, *item, *bytes_obj;
2654 Py_ssize_t buf_size = 0, len = 0;
2655 int value;
2656 char *buf;
2657
2658 /* bytes_setslice code only accepts something supporting PEP 3118. */
2659 if (PyObject_CheckBuffer(arg)) {
2660 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), arg) == -1)
2661 return NULL;
2662
2663 Py_RETURN_NONE;
2664 }
2665
2666 it = PyObject_GetIter(arg);
2667 if (it == NULL)
2668 return NULL;
2669
2670 /* Try to determine the length of the argument. 32 is abitrary. */
2671 buf_size = _PyObject_LengthHint(arg, 32);
2672
2673 bytes_obj = PyByteArray_FromStringAndSize(NULL, buf_size);
2674 if (bytes_obj == NULL)
2675 return NULL;
2676 buf = PyByteArray_AS_STRING(bytes_obj);
2677
2678 while ((item = PyIter_Next(it)) != NULL) {
2679 if (! _getbytevalue(item, &value)) {
2680 Py_DECREF(item);
2681 Py_DECREF(it);
2682 Py_DECREF(bytes_obj);
2683 return NULL;
2684 }
2685 buf[len++] = value;
2686 Py_DECREF(item);
2687
2688 if (len >= buf_size) {
2689 buf_size = len + (len >> 1) + 1;
2690 if (PyByteArray_Resize((PyObject *)bytes_obj, buf_size) < 0) {
2691 Py_DECREF(it);
2692 Py_DECREF(bytes_obj);
2693 return NULL;
2694 }
2695 /* Recompute the `buf' pointer, since the resizing operation may
2696 have invalidated it. */
2697 buf = PyByteArray_AS_STRING(bytes_obj);
2698 }
2699 }
2700 Py_DECREF(it);
2701
2702 /* Resize down to exact size. */
2703 if (PyByteArray_Resize((PyObject *)bytes_obj, len) < 0) {
2704 Py_DECREF(bytes_obj);
2705 return NULL;
2706 }
2707
2708 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), bytes_obj) == -1)
2709 return NULL;
2710 Py_DECREF(bytes_obj);
2711
2712 Py_RETURN_NONE;
2713}
2714
2715PyDoc_STRVAR(pop__doc__,
2716"B.pop([index]) -> int\n\
2717\n\
2718Remove and return a single item from B. If no index\n\
Andrew M. Kuchlingd8972642008-06-21 13:29:12 +00002719argument is given, will pop the last value.");
Christian Heimes44720832008-05-26 13:01:01 +00002720static PyObject *
2721bytes_pop(PyByteArrayObject *self, PyObject *args)
2722{
2723 int value;
2724 Py_ssize_t where = -1, n = Py_SIZE(self);
2725
2726 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2727 return NULL;
2728
2729 if (n == 0) {
2730 PyErr_SetString(PyExc_OverflowError,
2731 "cannot pop an empty bytes");
2732 return NULL;
2733 }
2734 if (where < 0)
2735 where += Py_SIZE(self);
2736 if (where < 0 || where >= Py_SIZE(self)) {
2737 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2738 return NULL;
2739 }
2740
2741 value = self->ob_bytes[where];
2742 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2743 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2744 return NULL;
2745
2746 return PyInt_FromLong(value);
2747}
2748
2749PyDoc_STRVAR(remove__doc__,
2750"B.remove(int) -> None\n\
2751\n\
2752Remove the first occurance of a value in B.");
2753static PyObject *
2754bytes_remove(PyByteArrayObject *self, PyObject *arg)
2755{
2756 int value;
2757 Py_ssize_t where, n = Py_SIZE(self);
2758
2759 if (! _getbytevalue(arg, &value))
2760 return NULL;
2761
2762 for (where = 0; where < n; where++) {
2763 if (self->ob_bytes[where] == value)
2764 break;
2765 }
2766 if (where == n) {
2767 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2768 return NULL;
2769 }
2770
2771 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2772 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2773 return NULL;
2774
2775 Py_RETURN_NONE;
2776}
2777
2778/* XXX These two helpers could be optimized if argsize == 1 */
2779
2780static Py_ssize_t
2781lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2782 void *argptr, Py_ssize_t argsize)
2783{
2784 Py_ssize_t i = 0;
2785 while (i < mysize && memchr(argptr, myptr[i], argsize))
2786 i++;
2787 return i;
2788}
2789
2790static Py_ssize_t
2791rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2792 void *argptr, Py_ssize_t argsize)
2793{
2794 Py_ssize_t i = mysize - 1;
2795 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2796 i--;
2797 return i + 1;
2798}
2799
2800PyDoc_STRVAR(strip__doc__,
2801"B.strip([bytes]) -> bytearray\n\
2802\n\
2803Strip leading and trailing bytes contained in the argument.\n\
2804If the argument is omitted, strip ASCII whitespace.");
2805static PyObject *
2806bytes_strip(PyByteArrayObject *self, PyObject *args)
2807{
2808 Py_ssize_t left, right, mysize, argsize;
2809 void *myptr, *argptr;
2810 PyObject *arg = Py_None;
2811 Py_buffer varg;
2812 if (!PyArg_ParseTuple(args, "|O:strip", &arg))
2813 return NULL;
2814 if (arg == Py_None) {
2815 argptr = "\t\n\r\f\v ";
2816 argsize = 6;
2817 }
2818 else {
2819 if (_getbuffer(arg, &varg) < 0)
2820 return NULL;
2821 argptr = varg.buf;
2822 argsize = varg.len;
2823 }
2824 myptr = self->ob_bytes;
2825 mysize = Py_SIZE(self);
2826 left = lstrip_helper(myptr, mysize, argptr, argsize);
2827 if (left == mysize)
2828 right = left;
2829 else
2830 right = rstrip_helper(myptr, mysize, argptr, argsize);
2831 if (arg != Py_None)
2832 PyObject_ReleaseBuffer(arg, &varg);
2833 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2834}
2835
2836PyDoc_STRVAR(lstrip__doc__,
2837"B.lstrip([bytes]) -> bytearray\n\
2838\n\
2839Strip leading bytes contained in the argument.\n\
2840If the argument is omitted, strip leading ASCII whitespace.");
2841static PyObject *
2842bytes_lstrip(PyByteArrayObject *self, PyObject *args)
2843{
2844 Py_ssize_t left, right, mysize, argsize;
2845 void *myptr, *argptr;
2846 PyObject *arg = Py_None;
2847 Py_buffer varg;
2848 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
2849 return NULL;
2850 if (arg == Py_None) {
2851 argptr = "\t\n\r\f\v ";
2852 argsize = 6;
2853 }
2854 else {
2855 if (_getbuffer(arg, &varg) < 0)
2856 return NULL;
2857 argptr = varg.buf;
2858 argsize = varg.len;
2859 }
2860 myptr = self->ob_bytes;
2861 mysize = Py_SIZE(self);
2862 left = lstrip_helper(myptr, mysize, argptr, argsize);
2863 right = mysize;
2864 if (arg != Py_None)
2865 PyObject_ReleaseBuffer(arg, &varg);
2866 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2867}
2868
2869PyDoc_STRVAR(rstrip__doc__,
2870"B.rstrip([bytes]) -> bytearray\n\
2871\n\
2872Strip trailing bytes contained in the argument.\n\
2873If the argument is omitted, strip trailing ASCII whitespace.");
2874static PyObject *
2875bytes_rstrip(PyByteArrayObject *self, PyObject *args)
2876{
2877 Py_ssize_t left, right, mysize, argsize;
2878 void *myptr, *argptr;
2879 PyObject *arg = Py_None;
2880 Py_buffer varg;
2881 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
2882 return NULL;
2883 if (arg == Py_None) {
2884 argptr = "\t\n\r\f\v ";
2885 argsize = 6;
2886 }
2887 else {
2888 if (_getbuffer(arg, &varg) < 0)
2889 return NULL;
2890 argptr = varg.buf;
2891 argsize = varg.len;
2892 }
2893 myptr = self->ob_bytes;
2894 mysize = Py_SIZE(self);
2895 left = 0;
2896 right = rstrip_helper(myptr, mysize, argptr, argsize);
2897 if (arg != Py_None)
2898 PyObject_ReleaseBuffer(arg, &varg);
2899 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2900}
2901
2902PyDoc_STRVAR(decode_doc,
2903"B.decode([encoding[, errors]]) -> unicode object.\n\
2904\n\
2905Decodes B using the codec registered for encoding. encoding defaults\n\
2906to the default encoding. errors may be given to set a different error\n\
2907handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2908a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2909as well as any other name registered with codecs.register_error that is\n\
2910able to handle UnicodeDecodeErrors.");
2911
2912static PyObject *
2913bytes_decode(PyObject *self, PyObject *args)
2914{
2915 const char *encoding = NULL;
2916 const char *errors = NULL;
2917
2918 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2919 return NULL;
2920 if (encoding == NULL)
2921 encoding = PyUnicode_GetDefaultEncoding();
2922 return PyCodec_Decode(self, encoding, errors);
2923}
2924
2925PyDoc_STRVAR(alloc_doc,
2926"B.__alloc__() -> int\n\
2927\n\
2928Returns the number of bytes actually allocated.");
2929
2930static PyObject *
2931bytes_alloc(PyByteArrayObject *self)
2932{
2933 return PyInt_FromSsize_t(self->ob_alloc);
2934}
2935
2936PyDoc_STRVAR(join_doc,
2937"B.join(iterable_of_bytes) -> bytes\n\
2938\n\
2939Concatenates any number of bytearray objects, with B in between each pair.");
2940
2941static PyObject *
2942bytes_join(PyByteArrayObject *self, PyObject *it)
2943{
2944 PyObject *seq;
2945 Py_ssize_t mysize = Py_SIZE(self);
2946 Py_ssize_t i;
2947 Py_ssize_t n;
2948 PyObject **items;
2949 Py_ssize_t totalsize = 0;
2950 PyObject *result;
2951 char *dest;
2952
2953 seq = PySequence_Fast(it, "can only join an iterable");
2954 if (seq == NULL)
2955 return NULL;
2956 n = PySequence_Fast_GET_SIZE(seq);
2957 items = PySequence_Fast_ITEMS(seq);
2958
2959 /* Compute the total size, and check that they are all bytes */
2960 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2961 for (i = 0; i < n; i++) {
2962 PyObject *obj = items[i];
2963 if (!PyByteArray_Check(obj) && !PyBytes_Check(obj)) {
2964 PyErr_Format(PyExc_TypeError,
2965 "can only join an iterable of bytes "
2966 "(item %ld has type '%.100s')",
2967 /* XXX %ld isn't right on Win64 */
2968 (long)i, Py_TYPE(obj)->tp_name);
2969 goto error;
2970 }
2971 if (i > 0)
2972 totalsize += mysize;
2973 totalsize += Py_SIZE(obj);
2974 if (totalsize < 0) {
2975 PyErr_NoMemory();
2976 goto error;
2977 }
2978 }
2979
2980 /* Allocate the result, and copy the bytes */
2981 result = PyByteArray_FromStringAndSize(NULL, totalsize);
2982 if (result == NULL)
2983 goto error;
2984 dest = PyByteArray_AS_STRING(result);
2985 for (i = 0; i < n; i++) {
2986 PyObject *obj = items[i];
2987 Py_ssize_t size = Py_SIZE(obj);
2988 char *buf;
2989 if (PyByteArray_Check(obj))
2990 buf = PyByteArray_AS_STRING(obj);
2991 else
2992 buf = PyBytes_AS_STRING(obj);
2993 if (i) {
2994 memcpy(dest, self->ob_bytes, mysize);
2995 dest += mysize;
2996 }
2997 memcpy(dest, buf, size);
2998 dest += size;
2999 }
3000
3001 /* Done */
3002 Py_DECREF(seq);
3003 return result;
3004
3005 /* Error handling */
3006 error:
3007 Py_DECREF(seq);
3008 return NULL;
3009}
3010
3011PyDoc_STRVAR(fromhex_doc,
3012"bytearray.fromhex(string) -> bytearray\n\
3013\n\
3014Create a bytearray object from a string of hexadecimal numbers.\n\
3015Spaces between two numbers are accepted.\n\
3016Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
3017
3018static int
3019hex_digit_to_int(Py_UNICODE c)
3020{
3021 if (c >= 128)
3022 return -1;
3023 if (ISDIGIT(c))
3024 return c - '0';
3025 else {
3026 if (ISUPPER(c))
3027 c = TOLOWER(c);
3028 if (c >= 'a' && c <= 'f')
3029 return c - 'a' + 10;
3030 }
3031 return -1;
3032}
3033
3034static PyObject *
3035bytes_fromhex(PyObject *cls, PyObject *args)
3036{
3037 PyObject *newbytes, *hexobj;
3038 char *buf;
3039 Py_UNICODE *hex;
3040 Py_ssize_t hexlen, byteslen, i, j;
3041 int top, bot;
3042
3043 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
3044 return NULL;
3045 assert(PyUnicode_Check(hexobj));
3046 hexlen = PyUnicode_GET_SIZE(hexobj);
3047 hex = PyUnicode_AS_UNICODE(hexobj);
3048 byteslen = hexlen/2; /* This overestimates if there are spaces */
3049 newbytes = PyByteArray_FromStringAndSize(NULL, byteslen);
3050 if (!newbytes)
3051 return NULL;
3052 buf = PyByteArray_AS_STRING(newbytes);
3053 for (i = j = 0; i < hexlen; i += 2) {
3054 /* skip over spaces in the input */
3055 while (hex[i] == ' ')
3056 i++;
3057 if (i >= hexlen)
3058 break;
3059 top = hex_digit_to_int(hex[i]);
3060 bot = hex_digit_to_int(hex[i+1]);
3061 if (top == -1 || bot == -1) {
3062 PyErr_Format(PyExc_ValueError,
3063 "non-hexadecimal number found in "
3064 "fromhex() arg at position %zd", i);
3065 goto error;
3066 }
3067 buf[j++] = (top << 4) + bot;
3068 }
3069 if (PyByteArray_Resize(newbytes, j) < 0)
3070 goto error;
3071 return newbytes;
3072
3073 error:
3074 Py_DECREF(newbytes);
3075 return NULL;
3076}
3077
3078PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3079
3080static PyObject *
3081bytes_reduce(PyByteArrayObject *self)
3082{
3083 PyObject *latin1, *dict;
3084 if (self->ob_bytes)
3085 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
3086 Py_SIZE(self), NULL);
3087 else
3088 latin1 = PyUnicode_FromString("");
3089
3090 dict = PyObject_GetAttrString((PyObject *)self, "__dict__");
3091 if (dict == NULL) {
3092 PyErr_Clear();
3093 dict = Py_None;
3094 Py_INCREF(dict);
3095 }
3096
3097 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self), latin1, "latin-1", dict);
3098}
3099
Robert Schuppenies9be2ec12008-07-10 15:24:04 +00003100PyDoc_STRVAR(sizeof_doc,
3101"B.__sizeof__() -> int\n\
3102 \n\
3103Returns the size of B in memory, in bytes");
3104static PyObject *
3105bytes_sizeof(PyByteArrayObject *self)
3106{
3107 Py_ssize_t res;
3108
3109 res = sizeof(PyByteArrayObject) + self->ob_alloc * sizeof(char);
3110 return PyInt_FromSsize_t(res);
3111}
3112
Christian Heimes44720832008-05-26 13:01:01 +00003113static PySequenceMethods bytes_as_sequence = {
3114 (lenfunc)bytes_length, /* sq_length */
3115 (binaryfunc)PyByteArray_Concat, /* sq_concat */
3116 (ssizeargfunc)bytes_repeat, /* sq_repeat */
3117 (ssizeargfunc)bytes_getitem, /* sq_item */
3118 0, /* sq_slice */
3119 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
3120 0, /* sq_ass_slice */
3121 (objobjproc)bytes_contains, /* sq_contains */
3122 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
3123 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
3124};
3125
3126static PyMappingMethods bytes_as_mapping = {
3127 (lenfunc)bytes_length,
3128 (binaryfunc)bytes_subscript,
3129 (objobjargproc)bytes_ass_subscript,
3130};
3131
3132static PyBufferProcs bytes_as_buffer = {
3133 (readbufferproc)bytes_buffer_getreadbuf,
3134 (writebufferproc)bytes_buffer_getwritebuf,
3135 (segcountproc)bytes_buffer_getsegcount,
3136 (charbufferproc)bytes_buffer_getcharbuf,
3137 (getbufferproc)bytes_getbuffer,
3138 (releasebufferproc)bytes_releasebuffer,
3139};
3140
3141static PyMethodDef
3142bytes_methods[] = {
3143 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
3144 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
Robert Schuppenies9be2ec12008-07-10 15:24:04 +00003145 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS, sizeof_doc},
Christian Heimes44720832008-05-26 13:01:01 +00003146 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
3147 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3148 _Py_capitalize__doc__},
3149 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3150 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
3151 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
3152 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
3153 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
3154 expandtabs__doc__},
3155 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
3156 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
3157 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
3158 fromhex_doc},
3159 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3160 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
3161 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3162 _Py_isalnum__doc__},
3163 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3164 _Py_isalpha__doc__},
3165 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3166 _Py_isdigit__doc__},
3167 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3168 _Py_islower__doc__},
3169 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3170 _Py_isspace__doc__},
3171 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3172 _Py_istitle__doc__},
3173 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3174 _Py_isupper__doc__},
3175 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
3176 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3177 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
3178 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
3179 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
3180 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
3181 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
3182 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
3183 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
3184 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3185 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3186 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
3187 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
3188 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
3189 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
3190 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
3191 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
3192 splitlines__doc__},
3193 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS ,
3194 startswith__doc__},
3195 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
3196 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3197 _Py_swapcase__doc__},
3198 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
3199 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
3200 translate__doc__},
3201 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3202 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
3203 {NULL}
3204};
3205
3206PyDoc_STRVAR(bytes_doc,
3207"bytearray(iterable_of_ints) -> bytearray.\n\
3208bytearray(string, encoding[, errors]) -> bytearray.\n\
3209bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
3210bytearray(memory_view) -> bytearray.\n\
3211\n\
3212Construct an mutable bytearray object from:\n\
3213 - an iterable yielding integers in range(256)\n\
3214 - a text string encoded using the specified encoding\n\
3215 - a bytes or a bytearray object\n\
3216 - any object implementing the buffer API.\n\
3217\n\
3218bytearray(int) -> bytearray.\n\
3219\n\
3220Construct a zero-initialized bytearray of the given length.");
3221
3222
3223static PyObject *bytes_iter(PyObject *seq);
3224
3225PyTypeObject PyByteArray_Type = {
3226 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3227 "bytearray",
3228 sizeof(PyByteArrayObject),
3229 0,
3230 (destructor)bytes_dealloc, /* tp_dealloc */
3231 0, /* tp_print */
3232 0, /* tp_getattr */
3233 0, /* tp_setattr */
3234 0, /* tp_compare */
3235 (reprfunc)bytes_repr, /* tp_repr */
3236 0, /* tp_as_number */
3237 &bytes_as_sequence, /* tp_as_sequence */
3238 &bytes_as_mapping, /* tp_as_mapping */
3239 0, /* tp_hash */
3240 0, /* tp_call */
3241 bytes_str, /* tp_str */
3242 PyObject_GenericGetAttr, /* tp_getattro */
3243 0, /* tp_setattro */
3244 &bytes_as_buffer, /* tp_as_buffer */
3245 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3246 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3247 bytes_doc, /* tp_doc */
3248 0, /* tp_traverse */
3249 0, /* tp_clear */
3250 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3251 0, /* tp_weaklistoffset */
3252 bytes_iter, /* tp_iter */
3253 0, /* tp_iternext */
3254 bytes_methods, /* tp_methods */
3255 0, /* tp_members */
3256 0, /* tp_getset */
3257 0, /* tp_base */
3258 0, /* tp_dict */
3259 0, /* tp_descr_get */
3260 0, /* tp_descr_set */
3261 0, /* tp_dictoffset */
3262 (initproc)bytes_init, /* tp_init */
3263 PyType_GenericAlloc, /* tp_alloc */
3264 PyType_GenericNew, /* tp_new */
3265 PyObject_Del, /* tp_free */
3266};
3267
3268/*********************** Bytes Iterator ****************************/
3269
3270typedef struct {
3271 PyObject_HEAD
3272 Py_ssize_t it_index;
3273 PyByteArrayObject *it_seq; /* Set to NULL when iterator is exhausted */
3274} bytesiterobject;
3275
3276static void
3277bytesiter_dealloc(bytesiterobject *it)
3278{
3279 _PyObject_GC_UNTRACK(it);
3280 Py_XDECREF(it->it_seq);
3281 PyObject_GC_Del(it);
3282}
3283
3284static int
3285bytesiter_traverse(bytesiterobject *it, visitproc visit, void *arg)
3286{
3287 Py_VISIT(it->it_seq);
3288 return 0;
3289}
3290
3291static PyObject *
3292bytesiter_next(bytesiterobject *it)
3293{
3294 PyByteArrayObject *seq;
3295 PyObject *item;
3296
3297 assert(it != NULL);
3298 seq = it->it_seq;
3299 if (seq == NULL)
3300 return NULL;
3301 assert(PyByteArray_Check(seq));
3302
3303 if (it->it_index < PyByteArray_GET_SIZE(seq)) {
3304 item = PyInt_FromLong(
3305 (unsigned char)seq->ob_bytes[it->it_index]);
3306 if (item != NULL)
3307 ++it->it_index;
3308 return item;
3309 }
3310
3311 Py_DECREF(seq);
3312 it->it_seq = NULL;
3313 return NULL;
3314}
3315
3316static PyObject *
3317bytesiter_length_hint(bytesiterobject *it)
3318{
3319 Py_ssize_t len = 0;
3320 if (it->it_seq)
3321 len = PyByteArray_GET_SIZE(it->it_seq) - it->it_index;
3322 return PyInt_FromSsize_t(len);
3323}
3324
3325PyDoc_STRVAR(length_hint_doc,
3326 "Private method returning an estimate of len(list(it)).");
3327
3328static PyMethodDef bytesiter_methods[] = {
3329 {"__length_hint__", (PyCFunction)bytesiter_length_hint, METH_NOARGS,
3330 length_hint_doc},
3331 {NULL, NULL} /* sentinel */
3332};
3333
3334PyTypeObject PyByteArrayIter_Type = {
3335 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3336 "bytearray_iterator", /* tp_name */
3337 sizeof(bytesiterobject), /* tp_basicsize */
3338 0, /* tp_itemsize */
3339 /* methods */
3340 (destructor)bytesiter_dealloc, /* tp_dealloc */
3341 0, /* tp_print */
3342 0, /* tp_getattr */
3343 0, /* tp_setattr */
3344 0, /* tp_compare */
3345 0, /* tp_repr */
3346 0, /* tp_as_number */
3347 0, /* tp_as_sequence */
3348 0, /* tp_as_mapping */
3349 0, /* tp_hash */
3350 0, /* tp_call */
3351 0, /* tp_str */
3352 PyObject_GenericGetAttr, /* tp_getattro */
3353 0, /* tp_setattro */
3354 0, /* tp_as_buffer */
3355 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3356 0, /* tp_doc */
3357 (traverseproc)bytesiter_traverse, /* tp_traverse */
3358 0, /* tp_clear */
3359 0, /* tp_richcompare */
3360 0, /* tp_weaklistoffset */
3361 PyObject_SelfIter, /* tp_iter */
3362 (iternextfunc)bytesiter_next, /* tp_iternext */
3363 bytesiter_methods, /* tp_methods */
3364 0,
3365};
3366
3367static PyObject *
3368bytes_iter(PyObject *seq)
3369{
3370 bytesiterobject *it;
3371
3372 if (!PyByteArray_Check(seq)) {
3373 PyErr_BadInternalCall();
3374 return NULL;
3375 }
3376 it = PyObject_GC_New(bytesiterobject, &PyByteArrayIter_Type);
3377 if (it == NULL)
3378 return NULL;
3379 it->it_index = 0;
3380 Py_INCREF(seq);
3381 it->it_seq = (PyByteArrayObject *)seq;
3382 _PyObject_GC_TRACK(it);
3383 return (PyObject *)it;
3384}