blob: bc02106e786a7d41b091d3889c47921a52733283 [file] [log] [blame]
Christian Heimes44720832008-05-26 13:01:01 +00001/* PyBytes (bytearray) implementation */
2
3#define PY_SSIZE_T_CLEAN
4#include "Python.h"
5#include "structmember.h"
6#include "bytes_methods.h"
7
8static PyByteArrayObject *nullbytes = NULL;
9
10void
11PyByteArray_Fini(void)
12{
13 Py_CLEAR(nullbytes);
14}
15
16int
17PyByteArray_Init(void)
18{
19 nullbytes = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
20 if (nullbytes == NULL)
21 return 0;
22 nullbytes->ob_bytes = NULL;
23 Py_SIZE(nullbytes) = nullbytes->ob_alloc = 0;
24 nullbytes->ob_exports = 0;
25 return 1;
26}
27
28/* end nullbytes support */
29
30/* Helpers */
31
32static int
33_getbytevalue(PyObject* arg, int *value)
34{
35 long face_value;
36
Georg Brandl3e483f62008-07-16 22:57:41 +000037 if (PyBytes_CheckExact(arg)) {
Christian Heimes44720832008-05-26 13:01:01 +000038 if (Py_SIZE(arg) != 1) {
39 PyErr_SetString(PyExc_ValueError, "string must be of size 1");
40 return 0;
41 }
Georg Brandl3e483f62008-07-16 22:57:41 +000042 *value = Py_CHARMASK(((PyBytesObject*)arg)->ob_sval[0]);
43 return 1;
44 }
45 else if (PyInt_Check(arg) || PyLong_Check(arg)) {
46 face_value = PyLong_AsLong(arg);
Christian Heimes44720832008-05-26 13:01:01 +000047 }
48 else {
Georg Brandl3e483f62008-07-16 22:57:41 +000049 PyObject *index = PyNumber_Index(arg);
50 if (index == NULL) {
51 PyErr_Format(PyExc_TypeError,
52 "an integer or string of size 1 is required");
53 return 0;
54 }
55 face_value = PyLong_AsLong(index);
56 Py_DECREF(index);
57 }
Georg Brandl3e483f62008-07-16 22:57:41 +000058
59 if (face_value < 0 || face_value >= 256) {
Georg Brandl3238a3e2008-07-16 23:17:46 +000060 /* this includes the OverflowError in case the long is too large */
Georg Brandl3e483f62008-07-16 22:57:41 +000061 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
Christian Heimes44720832008-05-26 13:01:01 +000062 return 0;
63 }
64
65 *value = face_value;
66 return 1;
67}
68
69static Py_ssize_t
70bytes_buffer_getreadbuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
71{
72 if ( index != 0 ) {
73 PyErr_SetString(PyExc_SystemError,
74 "accessing non-existent bytes segment");
75 return -1;
76 }
77 *ptr = (void *)self->ob_bytes;
78 return Py_SIZE(self);
79}
80
81static Py_ssize_t
82bytes_buffer_getwritebuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
83{
84 if ( index != 0 ) {
85 PyErr_SetString(PyExc_SystemError,
86 "accessing non-existent bytes segment");
87 return -1;
88 }
89 *ptr = (void *)self->ob_bytes;
90 return Py_SIZE(self);
91}
92
93static Py_ssize_t
94bytes_buffer_getsegcount(PyByteArrayObject *self, Py_ssize_t *lenp)
95{
96 if ( lenp )
97 *lenp = Py_SIZE(self);
98 return 1;
99}
100
101static Py_ssize_t
102bytes_buffer_getcharbuf(PyByteArrayObject *self, Py_ssize_t index, const char **ptr)
103{
104 if ( index != 0 ) {
105 PyErr_SetString(PyExc_SystemError,
106 "accessing non-existent bytes segment");
107 return -1;
108 }
109 *ptr = self->ob_bytes;
110 return Py_SIZE(self);
111}
112
113static int
114bytes_getbuffer(PyByteArrayObject *obj, Py_buffer *view, int flags)
115{
116 int ret;
117 void *ptr;
118 if (view == NULL) {
119 obj->ob_exports++;
120 return 0;
121 }
122 if (obj->ob_bytes == NULL)
123 ptr = "";
124 else
125 ptr = obj->ob_bytes;
126 ret = PyBuffer_FillInfo(view, ptr, Py_SIZE(obj), 0, flags);
127 if (ret >= 0) {
128 obj->ob_exports++;
129 }
130 return ret;
131}
132
133static void
134bytes_releasebuffer(PyByteArrayObject *obj, Py_buffer *view)
135{
136 obj->ob_exports--;
137}
138
139static Py_ssize_t
140_getbuffer(PyObject *obj, Py_buffer *view)
141{
142 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
143
144 if (buffer == NULL || buffer->bf_getbuffer == NULL)
145 {
146 PyErr_Format(PyExc_TypeError,
147 "Type %.100s doesn't support the buffer API",
148 Py_TYPE(obj)->tp_name);
149 return -1;
150 }
151
152 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
153 return -1;
154 return view->len;
155}
156
157/* Direct API functions */
158
159PyObject *
160PyByteArray_FromObject(PyObject *input)
161{
162 return PyObject_CallFunctionObjArgs((PyObject *)&PyByteArray_Type,
163 input, NULL);
164}
165
166PyObject *
167PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
168{
169 PyByteArrayObject *new;
170 Py_ssize_t alloc;
171
172 if (size < 0) {
173 PyErr_SetString(PyExc_SystemError,
174 "Negative size passed to PyByteArray_FromStringAndSize");
175 return NULL;
176 }
177
178 new = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
179 if (new == NULL)
180 return NULL;
181
182 if (size == 0) {
183 new->ob_bytes = NULL;
184 alloc = 0;
185 }
186 else {
187 alloc = size + 1;
188 new->ob_bytes = PyMem_Malloc(alloc);
189 if (new->ob_bytes == NULL) {
190 Py_DECREF(new);
191 return PyErr_NoMemory();
192 }
193 if (bytes != NULL)
194 memcpy(new->ob_bytes, bytes, size);
195 new->ob_bytes[size] = '\0'; /* Trailing null byte */
196 }
197 Py_SIZE(new) = size;
198 new->ob_alloc = alloc;
199 new->ob_exports = 0;
200
201 return (PyObject *)new;
202}
203
204Py_ssize_t
205PyByteArray_Size(PyObject *self)
206{
207 assert(self != NULL);
208 assert(PyByteArray_Check(self));
209
210 return PyByteArray_GET_SIZE(self);
211}
212
213char *
214PyByteArray_AsString(PyObject *self)
215{
216 assert(self != NULL);
217 assert(PyByteArray_Check(self));
218
219 return PyByteArray_AS_STRING(self);
220}
221
222int
223PyByteArray_Resize(PyObject *self, Py_ssize_t size)
224{
225 void *sval;
226 Py_ssize_t alloc = ((PyByteArrayObject *)self)->ob_alloc;
227
228 assert(self != NULL);
229 assert(PyByteArray_Check(self));
230 assert(size >= 0);
231
232 if (size < alloc / 2) {
233 /* Major downsize; resize down to exact size */
234 alloc = size + 1;
235 }
236 else if (size < alloc) {
237 /* Within allocated size; quick exit */
238 Py_SIZE(self) = size;
239 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
240 return 0;
241 }
242 else if (size <= alloc * 1.125) {
243 /* Moderate upsize; overallocate similar to list_resize() */
244 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
245 }
246 else {
247 /* Major upsize; resize up to exact size */
248 alloc = size + 1;
249 }
250
251 if (((PyByteArrayObject *)self)->ob_exports > 0) {
252 /*
253 fprintf(stderr, "%d: %s", ((PyByteArrayObject *)self)->ob_exports,
254 ((PyByteArrayObject *)self)->ob_bytes);
255 */
256 PyErr_SetString(PyExc_BufferError,
257 "Existing exports of data: object cannot be re-sized");
258 return -1;
259 }
260
261 sval = PyMem_Realloc(((PyByteArrayObject *)self)->ob_bytes, alloc);
262 if (sval == NULL) {
263 PyErr_NoMemory();
264 return -1;
265 }
266
267 ((PyByteArrayObject *)self)->ob_bytes = sval;
268 Py_SIZE(self) = size;
269 ((PyByteArrayObject *)self)->ob_alloc = alloc;
270 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
271
272 return 0;
273}
274
275PyObject *
276PyByteArray_Concat(PyObject *a, PyObject *b)
277{
278 Py_ssize_t size;
279 Py_buffer va, vb;
280 PyByteArrayObject *result = NULL;
281
282 va.len = -1;
283 vb.len = -1;
284 if (_getbuffer(a, &va) < 0 ||
285 _getbuffer(b, &vb) < 0) {
286 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
287 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
288 goto done;
289 }
290
291 size = va.len + vb.len;
292 if (size < 0) {
293 return PyErr_NoMemory();
294 goto done;
295 }
296
297 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, size);
298 if (result != NULL) {
299 memcpy(result->ob_bytes, va.buf, va.len);
300 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
301 }
302
303 done:
304 if (va.len != -1)
305 PyObject_ReleaseBuffer(a, &va);
306 if (vb.len != -1)
307 PyObject_ReleaseBuffer(b, &vb);
308 return (PyObject *)result;
309}
310
311/* Functions stuffed into the type object */
312
313static Py_ssize_t
314bytes_length(PyByteArrayObject *self)
315{
316 return Py_SIZE(self);
317}
318
319static PyObject *
320bytes_iconcat(PyByteArrayObject *self, PyObject *other)
321{
322 Py_ssize_t mysize;
323 Py_ssize_t size;
324 Py_buffer vo;
325
326 if (_getbuffer(other, &vo) < 0) {
327 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
328 Py_TYPE(other)->tp_name, Py_TYPE(self)->tp_name);
329 return NULL;
330 }
331
332 mysize = Py_SIZE(self);
333 size = mysize + vo.len;
334 if (size < 0) {
335 PyObject_ReleaseBuffer(other, &vo);
336 return PyErr_NoMemory();
337 }
338 if (size < self->ob_alloc) {
339 Py_SIZE(self) = size;
340 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
341 }
342 else if (PyByteArray_Resize((PyObject *)self, size) < 0) {
343 PyObject_ReleaseBuffer(other, &vo);
344 return NULL;
345 }
346 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
347 PyObject_ReleaseBuffer(other, &vo);
348 Py_INCREF(self);
349 return (PyObject *)self;
350}
351
352static PyObject *
353bytes_repeat(PyByteArrayObject *self, Py_ssize_t count)
354{
355 PyByteArrayObject *result;
356 Py_ssize_t mysize;
357 Py_ssize_t size;
358
359 if (count < 0)
360 count = 0;
361 mysize = Py_SIZE(self);
362 size = mysize * count;
363 if (count != 0 && size / count != mysize)
364 return PyErr_NoMemory();
365 result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size);
366 if (result != NULL && size != 0) {
367 if (mysize == 1)
368 memset(result->ob_bytes, self->ob_bytes[0], size);
369 else {
370 Py_ssize_t i;
371 for (i = 0; i < count; i++)
372 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
373 }
374 }
375 return (PyObject *)result;
376}
377
378static PyObject *
379bytes_irepeat(PyByteArrayObject *self, Py_ssize_t count)
380{
381 Py_ssize_t mysize;
382 Py_ssize_t size;
383
384 if (count < 0)
385 count = 0;
386 mysize = Py_SIZE(self);
387 size = mysize * count;
388 if (count != 0 && size / count != mysize)
389 return PyErr_NoMemory();
390 if (size < self->ob_alloc) {
391 Py_SIZE(self) = size;
392 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
393 }
394 else if (PyByteArray_Resize((PyObject *)self, size) < 0)
395 return NULL;
396
397 if (mysize == 1)
398 memset(self->ob_bytes, self->ob_bytes[0], size);
399 else {
400 Py_ssize_t i;
401 for (i = 1; i < count; i++)
402 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
403 }
404
405 Py_INCREF(self);
406 return (PyObject *)self;
407}
408
409static PyObject *
410bytes_getitem(PyByteArrayObject *self, Py_ssize_t i)
411{
412 if (i < 0)
413 i += Py_SIZE(self);
414 if (i < 0 || i >= Py_SIZE(self)) {
415 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
416 return NULL;
417 }
418 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
419}
420
421static PyObject *
Georg Brandl3e483f62008-07-16 22:57:41 +0000422bytes_subscript(PyByteArrayObject *self, PyObject *index)
Christian Heimes44720832008-05-26 13:01:01 +0000423{
Georg Brandl3e483f62008-07-16 22:57:41 +0000424 if (PyIndex_Check(index)) {
425 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
Christian Heimes44720832008-05-26 13:01:01 +0000426
427 if (i == -1 && PyErr_Occurred())
428 return NULL;
429
430 if (i < 0)
431 i += PyByteArray_GET_SIZE(self);
432
433 if (i < 0 || i >= Py_SIZE(self)) {
434 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
435 return NULL;
436 }
437 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
438 }
Georg Brandl3e483f62008-07-16 22:57:41 +0000439 else if (PySlice_Check(index)) {
Christian Heimes44720832008-05-26 13:01:01 +0000440 Py_ssize_t start, stop, step, slicelength, cur, i;
Georg Brandl3e483f62008-07-16 22:57:41 +0000441 if (PySlice_GetIndicesEx((PySliceObject *)index,
Christian Heimes44720832008-05-26 13:01:01 +0000442 PyByteArray_GET_SIZE(self),
443 &start, &stop, &step, &slicelength) < 0) {
444 return NULL;
445 }
446
447 if (slicelength <= 0)
448 return PyByteArray_FromStringAndSize("", 0);
449 else if (step == 1) {
450 return PyByteArray_FromStringAndSize(self->ob_bytes + start,
451 slicelength);
452 }
453 else {
454 char *source_buf = PyByteArray_AS_STRING(self);
455 char *result_buf = (char *)PyMem_Malloc(slicelength);
456 PyObject *result;
457
458 if (result_buf == NULL)
459 return PyErr_NoMemory();
460
461 for (cur = start, i = 0; i < slicelength;
462 cur += step, i++) {
463 result_buf[i] = source_buf[cur];
464 }
465 result = PyByteArray_FromStringAndSize(result_buf, slicelength);
466 PyMem_Free(result_buf);
467 return result;
468 }
469 }
470 else {
471 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integers");
472 return NULL;
473 }
474}
475
476static int
477bytes_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi,
478 PyObject *values)
479{
480 Py_ssize_t avail, needed;
481 void *bytes;
482 Py_buffer vbytes;
483 int res = 0;
484
485 vbytes.len = -1;
486 if (values == (PyObject *)self) {
487 /* Make a copy and call this function recursively */
488 int err;
489 values = PyByteArray_FromObject(values);
490 if (values == NULL)
491 return -1;
492 err = bytes_setslice(self, lo, hi, values);
493 Py_DECREF(values);
494 return err;
495 }
496 if (values == NULL) {
497 /* del b[lo:hi] */
498 bytes = NULL;
499 needed = 0;
500 }
501 else {
502 if (_getbuffer(values, &vbytes) < 0) {
503 PyErr_Format(PyExc_TypeError,
Neal Norwitzc86b54c2008-07-20 19:35:23 +0000504 "can't set bytearray slice from %.100s",
Christian Heimes44720832008-05-26 13:01:01 +0000505 Py_TYPE(values)->tp_name);
506 return -1;
507 }
508 needed = vbytes.len;
509 bytes = vbytes.buf;
510 }
511
512 if (lo < 0)
513 lo = 0;
514 if (hi < lo)
515 hi = lo;
516 if (hi > Py_SIZE(self))
517 hi = Py_SIZE(self);
518
519 avail = hi - lo;
520 if (avail < 0)
521 lo = hi = avail = 0;
522
523 if (avail != needed) {
524 if (avail > needed) {
525 /*
526 0 lo hi old_size
527 | |<----avail----->|<-----tomove------>|
528 | |<-needed->|<-----tomove------>|
529 0 lo new_hi new_size
530 */
531 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
532 Py_SIZE(self) - hi);
533 }
534 /* XXX(nnorwitz): need to verify this can't overflow! */
535 if (PyByteArray_Resize((PyObject *)self,
536 Py_SIZE(self) + needed - avail) < 0) {
537 res = -1;
538 goto finish;
539 }
540 if (avail < needed) {
541 /*
542 0 lo hi old_size
543 | |<-avail->|<-----tomove------>|
544 | |<----needed---->|<-----tomove------>|
545 0 lo new_hi new_size
546 */
547 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
548 Py_SIZE(self) - lo - needed);
549 }
550 }
551
552 if (needed > 0)
553 memcpy(self->ob_bytes + lo, bytes, needed);
554
555
556 finish:
557 if (vbytes.len != -1)
558 PyObject_ReleaseBuffer(values, &vbytes);
559 return res;
560}
561
562static int
563bytes_setitem(PyByteArrayObject *self, Py_ssize_t i, PyObject *value)
564{
565 int ival;
566
567 if (i < 0)
568 i += Py_SIZE(self);
569
570 if (i < 0 || i >= Py_SIZE(self)) {
571 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
572 return -1;
573 }
574
575 if (value == NULL)
576 return bytes_setslice(self, i, i+1, NULL);
577
578 if (!_getbytevalue(value, &ival))
579 return -1;
580
581 self->ob_bytes[i] = ival;
582 return 0;
583}
584
585static int
Georg Brandl3e483f62008-07-16 22:57:41 +0000586bytes_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *values)
Christian Heimes44720832008-05-26 13:01:01 +0000587{
588 Py_ssize_t start, stop, step, slicelen, needed;
589 char *bytes;
590
Georg Brandl3e483f62008-07-16 22:57:41 +0000591 if (PyIndex_Check(index)) {
592 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
Christian Heimes44720832008-05-26 13:01:01 +0000593
594 if (i == -1 && PyErr_Occurred())
595 return -1;
596
597 if (i < 0)
598 i += PyByteArray_GET_SIZE(self);
599
600 if (i < 0 || i >= Py_SIZE(self)) {
601 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
602 return -1;
603 }
604
605 if (values == NULL) {
606 /* Fall through to slice assignment */
607 start = i;
608 stop = i + 1;
609 step = 1;
610 slicelen = 1;
611 }
612 else {
Georg Brandl3e483f62008-07-16 22:57:41 +0000613 int ival;
614 if (!_getbytevalue(values, &ival))
Christian Heimes44720832008-05-26 13:01:01 +0000615 return -1;
Christian Heimes44720832008-05-26 13:01:01 +0000616 self->ob_bytes[i] = (char)ival;
617 return 0;
618 }
619 }
Georg Brandl3e483f62008-07-16 22:57:41 +0000620 else if (PySlice_Check(index)) {
621 if (PySlice_GetIndicesEx((PySliceObject *)index,
Christian Heimes44720832008-05-26 13:01:01 +0000622 PyByteArray_GET_SIZE(self),
623 &start, &stop, &step, &slicelen) < 0) {
624 return -1;
625 }
626 }
627 else {
628 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integer");
629 return -1;
630 }
631
632 if (values == NULL) {
633 bytes = NULL;
634 needed = 0;
635 }
636 else if (values == (PyObject *)self || !PyByteArray_Check(values)) {
637 /* Make a copy an call this function recursively */
638 int err;
639 values = PyByteArray_FromObject(values);
640 if (values == NULL)
641 return -1;
Georg Brandl3e483f62008-07-16 22:57:41 +0000642 err = bytes_ass_subscript(self, index, values);
Christian Heimes44720832008-05-26 13:01:01 +0000643 Py_DECREF(values);
644 return err;
645 }
646 else {
647 assert(PyByteArray_Check(values));
648 bytes = ((PyByteArrayObject *)values)->ob_bytes;
649 needed = Py_SIZE(values);
650 }
651 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
652 if ((step < 0 && start < stop) ||
653 (step > 0 && start > stop))
654 stop = start;
655 if (step == 1) {
656 if (slicelen != needed) {
657 if (slicelen > needed) {
658 /*
659 0 start stop old_size
660 | |<---slicelen--->|<-----tomove------>|
661 | |<-needed->|<-----tomove------>|
662 0 lo new_hi new_size
663 */
664 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
665 Py_SIZE(self) - stop);
666 }
667 if (PyByteArray_Resize((PyObject *)self,
668 Py_SIZE(self) + needed - slicelen) < 0)
669 return -1;
670 if (slicelen < needed) {
671 /*
672 0 lo hi old_size
673 | |<-avail->|<-----tomove------>|
674 | |<----needed---->|<-----tomove------>|
675 0 lo new_hi new_size
676 */
677 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
678 Py_SIZE(self) - start - needed);
679 }
680 }
681
682 if (needed > 0)
683 memcpy(self->ob_bytes + start, bytes, needed);
684
685 return 0;
686 }
687 else {
688 if (needed == 0) {
689 /* Delete slice */
690 Py_ssize_t cur, i;
691
692 if (step < 0) {
693 stop = start + 1;
694 start = stop + step * (slicelen - 1) - 1;
695 step = -step;
696 }
697 for (cur = start, i = 0;
698 i < slicelen; cur += step, i++) {
699 Py_ssize_t lim = step - 1;
700
701 if (cur + step >= PyByteArray_GET_SIZE(self))
702 lim = PyByteArray_GET_SIZE(self) - cur - 1;
703
704 memmove(self->ob_bytes + cur - i,
705 self->ob_bytes + cur + 1, lim);
706 }
707 /* Move the tail of the bytes, in one chunk */
708 cur = start + slicelen*step;
709 if (cur < PyByteArray_GET_SIZE(self)) {
710 memmove(self->ob_bytes + cur - slicelen,
711 self->ob_bytes + cur,
712 PyByteArray_GET_SIZE(self) - cur);
713 }
714 if (PyByteArray_Resize((PyObject *)self,
715 PyByteArray_GET_SIZE(self) - slicelen) < 0)
716 return -1;
717
718 return 0;
719 }
720 else {
721 /* Assign slice */
722 Py_ssize_t cur, i;
723
724 if (needed != slicelen) {
725 PyErr_Format(PyExc_ValueError,
726 "attempt to assign bytes of size %zd "
727 "to extended slice of size %zd",
728 needed, slicelen);
729 return -1;
730 }
731 for (cur = start, i = 0; i < slicelen; cur += step, i++)
732 self->ob_bytes[cur] = bytes[i];
733 return 0;
734 }
735 }
736}
737
738static int
739bytes_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
740{
741 static char *kwlist[] = {"source", "encoding", "errors", 0};
742 PyObject *arg = NULL;
743 const char *encoding = NULL;
744 const char *errors = NULL;
745 Py_ssize_t count;
746 PyObject *it;
747 PyObject *(*iternext)(PyObject *);
748
749 if (Py_SIZE(self) != 0) {
750 /* Empty previous contents (yes, do this first of all!) */
751 if (PyByteArray_Resize((PyObject *)self, 0) < 0)
752 return -1;
753 }
754
755 /* Parse arguments */
Neal Norwitzc86b54c2008-07-20 19:35:23 +0000756 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytearray", kwlist,
Christian Heimes44720832008-05-26 13:01:01 +0000757 &arg, &encoding, &errors))
758 return -1;
759
760 /* Make a quick exit if no first argument */
761 if (arg == NULL) {
762 if (encoding != NULL || errors != NULL) {
763 PyErr_SetString(PyExc_TypeError,
764 "encoding or errors without sequence argument");
765 return -1;
766 }
767 return 0;
768 }
769
770 if (PyBytes_Check(arg)) {
771 PyObject *new, *encoded;
772 if (encoding != NULL) {
773 encoded = PyCodec_Encode(arg, encoding, errors);
774 if (encoded == NULL)
775 return -1;
776 assert(PyBytes_Check(encoded));
777 }
778 else {
779 encoded = arg;
780 Py_INCREF(arg);
781 }
782 new = bytes_iconcat(self, arg);
783 Py_DECREF(encoded);
784 if (new == NULL)
785 return -1;
786 Py_DECREF(new);
787 return 0;
788 }
789
790 if (PyUnicode_Check(arg)) {
791 /* Encode via the codec registry */
792 PyObject *encoded, *new;
793 if (encoding == NULL) {
794 PyErr_SetString(PyExc_TypeError,
795 "unicode argument without an encoding");
796 return -1;
797 }
798 encoded = PyCodec_Encode(arg, encoding, errors);
799 if (encoded == NULL)
800 return -1;
801 assert(PyBytes_Check(encoded));
802 new = bytes_iconcat(self, encoded);
803 Py_DECREF(encoded);
804 if (new == NULL)
805 return -1;
806 Py_DECREF(new);
807 return 0;
808 }
809
810 /* If it's not unicode, there can't be encoding or errors */
811 if (encoding != NULL || errors != NULL) {
812 PyErr_SetString(PyExc_TypeError,
813 "encoding or errors without a string argument");
814 return -1;
815 }
816
817 /* Is it an int? */
818 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
819 if (count == -1 && PyErr_Occurred())
820 PyErr_Clear();
821 else {
822 if (count < 0) {
823 PyErr_SetString(PyExc_ValueError, "negative count");
824 return -1;
825 }
826 if (count > 0) {
827 if (PyByteArray_Resize((PyObject *)self, count))
828 return -1;
829 memset(self->ob_bytes, 0, count);
830 }
831 return 0;
832 }
833
834 /* Use the buffer API */
835 if (PyObject_CheckBuffer(arg)) {
836 Py_ssize_t size;
837 Py_buffer view;
838 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
839 return -1;
840 size = view.len;
841 if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail;
842 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
843 goto fail;
844 PyObject_ReleaseBuffer(arg, &view);
845 return 0;
846 fail:
847 PyObject_ReleaseBuffer(arg, &view);
848 return -1;
849 }
850
851 /* XXX Optimize this if the arguments is a list, tuple */
852
853 /* Get the iterator */
854 it = PyObject_GetIter(arg);
855 if (it == NULL)
856 return -1;
857 iternext = *Py_TYPE(it)->tp_iternext;
858
859 /* Run the iterator to exhaustion */
860 for (;;) {
861 PyObject *item;
Georg Brandl3e758462008-07-16 23:10:05 +0000862 int rc, value;
Christian Heimes44720832008-05-26 13:01:01 +0000863
864 /* Get the next item */
865 item = iternext(it);
866 if (item == NULL) {
867 if (PyErr_Occurred()) {
868 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
869 goto error;
870 PyErr_Clear();
871 }
872 break;
873 }
874
875 /* Interpret it as an int (__index__) */
Georg Brandl3e758462008-07-16 23:10:05 +0000876 rc = _getbytevalue(item, &value);
Christian Heimes44720832008-05-26 13:01:01 +0000877 Py_DECREF(item);
Georg Brandl3e758462008-07-16 23:10:05 +0000878 if (!rc)
Christian Heimes44720832008-05-26 13:01:01 +0000879 goto error;
880
Christian Heimes44720832008-05-26 13:01:01 +0000881 /* Append the byte */
882 if (Py_SIZE(self) < self->ob_alloc)
883 Py_SIZE(self)++;
884 else if (PyByteArray_Resize((PyObject *)self, Py_SIZE(self)+1) < 0)
885 goto error;
886 self->ob_bytes[Py_SIZE(self)-1] = value;
887 }
888
889 /* Clean up and return success */
890 Py_DECREF(it);
891 return 0;
892
893 error:
894 /* Error handling when it != NULL */
895 Py_DECREF(it);
896 return -1;
897}
898
899/* Mostly copied from string_repr, but without the
900 "smart quote" functionality. */
901static PyObject *
902bytes_repr(PyByteArrayObject *self)
903{
904 static const char *hexdigits = "0123456789abcdef";
905 const char *quote_prefix = "bytearray(b";
906 const char *quote_postfix = ")";
907 Py_ssize_t length = Py_SIZE(self);
908 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
909 size_t newsize = 14 + 4 * length;
910 PyObject *v;
911 if (newsize > PY_SSIZE_T_MAX || newsize / 4 - 3 != length) {
912 PyErr_SetString(PyExc_OverflowError,
913 "bytearray object is too large to make repr");
914 return NULL;
915 }
916 v = PyUnicode_FromUnicode(NULL, newsize);
917 if (v == NULL) {
918 return NULL;
919 }
920 else {
921 register Py_ssize_t i;
922 register Py_UNICODE c;
923 register Py_UNICODE *p;
924 int quote;
925
926 /* Figure out which quote to use; single is preferred */
927 quote = '\'';
928 {
929 char *test, *start;
930 start = PyByteArray_AS_STRING(self);
931 for (test = start; test < start+length; ++test) {
932 if (*test == '"') {
933 quote = '\''; /* back to single */
934 goto decided;
935 }
936 else if (*test == '\'')
937 quote = '"';
938 }
939 decided:
940 ;
941 }
942
943 p = PyUnicode_AS_UNICODE(v);
944 while (*quote_prefix)
945 *p++ = *quote_prefix++;
946 *p++ = quote;
947
948 for (i = 0; i < length; i++) {
949 /* There's at least enough room for a hex escape
950 and a closing quote. */
951 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
952 c = self->ob_bytes[i];
953 if (c == '\'' || c == '\\')
954 *p++ = '\\', *p++ = c;
955 else if (c == '\t')
956 *p++ = '\\', *p++ = 't';
957 else if (c == '\n')
958 *p++ = '\\', *p++ = 'n';
959 else if (c == '\r')
960 *p++ = '\\', *p++ = 'r';
961 else if (c == 0)
962 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
963 else if (c < ' ' || c >= 0x7f) {
964 *p++ = '\\';
965 *p++ = 'x';
966 *p++ = hexdigits[(c & 0xf0) >> 4];
967 *p++ = hexdigits[c & 0xf];
968 }
969 else
970 *p++ = c;
971 }
972 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
973 *p++ = quote;
974 while (*quote_postfix) {
975 *p++ = *quote_postfix++;
976 }
977 *p = '\0';
978 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
979 Py_DECREF(v);
980 return NULL;
981 }
982 return v;
983 }
984}
985
986static PyObject *
987bytes_str(PyObject *op)
988{
989#if 0
990 if (Py_BytesWarningFlag) {
991 if (PyErr_WarnEx(PyExc_BytesWarning,
992 "str() on a bytearray instance", 1))
993 return NULL;
994 }
995 return bytes_repr((PyByteArrayObject*)op);
996#endif
997 return PyBytes_FromStringAndSize(((PyByteArrayObject*)op)->ob_bytes, Py_SIZE(op));
998}
999
1000static PyObject *
1001bytes_richcompare(PyObject *self, PyObject *other, int op)
1002{
1003 Py_ssize_t self_size, other_size;
1004 Py_buffer self_bytes, other_bytes;
1005 PyObject *res;
1006 Py_ssize_t minsize;
1007 int cmp;
1008
1009 /* Bytes can be compared to anything that supports the (binary)
1010 buffer API. Except that a comparison with Unicode is always an
1011 error, even if the comparison is for equality. */
1012 if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
1013 PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
1014 if (Py_BytesWarningFlag && op == Py_EQ) {
1015 if (PyErr_WarnEx(PyExc_BytesWarning,
1016 "Comparsion between bytearray and string", 1))
1017 return NULL;
1018 }
1019
1020 Py_INCREF(Py_NotImplemented);
1021 return Py_NotImplemented;
1022 }
1023
1024 self_size = _getbuffer(self, &self_bytes);
1025 if (self_size < 0) {
1026 PyErr_Clear();
1027 Py_INCREF(Py_NotImplemented);
1028 return Py_NotImplemented;
1029 }
1030
1031 other_size = _getbuffer(other, &other_bytes);
1032 if (other_size < 0) {
1033 PyErr_Clear();
1034 PyObject_ReleaseBuffer(self, &self_bytes);
1035 Py_INCREF(Py_NotImplemented);
1036 return Py_NotImplemented;
1037 }
1038
1039 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
1040 /* Shortcut: if the lengths differ, the objects differ */
1041 cmp = (op == Py_NE);
1042 }
1043 else {
1044 minsize = self_size;
1045 if (other_size < minsize)
1046 minsize = other_size;
1047
1048 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
1049 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1050
1051 if (cmp == 0) {
1052 if (self_size < other_size)
1053 cmp = -1;
1054 else if (self_size > other_size)
1055 cmp = 1;
1056 }
1057
1058 switch (op) {
1059 case Py_LT: cmp = cmp < 0; break;
1060 case Py_LE: cmp = cmp <= 0; break;
1061 case Py_EQ: cmp = cmp == 0; break;
1062 case Py_NE: cmp = cmp != 0; break;
1063 case Py_GT: cmp = cmp > 0; break;
1064 case Py_GE: cmp = cmp >= 0; break;
1065 }
1066 }
1067
1068 res = cmp ? Py_True : Py_False;
1069 PyObject_ReleaseBuffer(self, &self_bytes);
1070 PyObject_ReleaseBuffer(other, &other_bytes);
1071 Py_INCREF(res);
1072 return res;
1073}
1074
1075static void
1076bytes_dealloc(PyByteArrayObject *self)
1077{
1078 if (self->ob_bytes != 0) {
1079 PyMem_Free(self->ob_bytes);
1080 }
1081 Py_TYPE(self)->tp_free((PyObject *)self);
1082}
1083
1084
1085/* -------------------------------------------------------------------- */
1086/* Methods */
1087
1088#define STRINGLIB_CHAR char
1089#define STRINGLIB_CMP memcmp
1090#define STRINGLIB_LEN PyByteArray_GET_SIZE
1091#define STRINGLIB_STR PyByteArray_AS_STRING
1092#define STRINGLIB_NEW PyByteArray_FromStringAndSize
1093#define STRINGLIB_EMPTY nullbytes
1094#define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
1095#define STRINGLIB_MUTABLE 1
1096
1097#include "stringlib/fastsearch.h"
1098#include "stringlib/count.h"
1099#include "stringlib/find.h"
1100#include "stringlib/partition.h"
1101#include "stringlib/ctype.h"
1102#include "stringlib/transmogrify.h"
1103
1104
1105/* The following Py_LOCAL_INLINE and Py_LOCAL functions
1106were copied from the old char* style string object. */
1107
1108Py_LOCAL_INLINE(void)
1109_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1110{
1111 if (*end > len)
1112 *end = len;
1113 else if (*end < 0)
1114 *end += len;
1115 if (*end < 0)
1116 *end = 0;
1117 if (*start < 0)
1118 *start += len;
1119 if (*start < 0)
1120 *start = 0;
1121}
1122
1123
1124Py_LOCAL_INLINE(Py_ssize_t)
1125bytes_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
1126{
1127 PyObject *subobj;
1128 Py_buffer subbuf;
1129 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1130 Py_ssize_t res;
1131
1132 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1133 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1134 return -2;
1135 if (_getbuffer(subobj, &subbuf) < 0)
1136 return -2;
1137 if (dir > 0)
1138 res = stringlib_find_slice(
1139 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1140 subbuf.buf, subbuf.len, start, end);
1141 else
1142 res = stringlib_rfind_slice(
1143 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1144 subbuf.buf, subbuf.len, start, end);
1145 PyObject_ReleaseBuffer(subobj, &subbuf);
1146 return res;
1147}
1148
1149PyDoc_STRVAR(find__doc__,
1150"B.find(sub [,start [,end]]) -> int\n\
1151\n\
1152Return the lowest index in B where subsection sub is found,\n\
1153such that sub is contained within s[start,end]. Optional\n\
1154arguments start and end are interpreted as in slice notation.\n\
1155\n\
1156Return -1 on failure.");
1157
1158static PyObject *
1159bytes_find(PyByteArrayObject *self, PyObject *args)
1160{
1161 Py_ssize_t result = bytes_find_internal(self, args, +1);
1162 if (result == -2)
1163 return NULL;
1164 return PyInt_FromSsize_t(result);
1165}
1166
1167PyDoc_STRVAR(count__doc__,
1168"B.count(sub [,start [,end]]) -> int\n\
1169\n\
1170Return the number of non-overlapping occurrences of subsection sub in\n\
1171bytes B[start:end]. Optional arguments start and end are interpreted\n\
1172as in slice notation.");
1173
1174static PyObject *
1175bytes_count(PyByteArrayObject *self, PyObject *args)
1176{
1177 PyObject *sub_obj;
1178 const char *str = PyByteArray_AS_STRING(self);
1179 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1180 Py_buffer vsub;
1181 PyObject *count_obj;
1182
1183 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1184 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1185 return NULL;
1186
1187 if (_getbuffer(sub_obj, &vsub) < 0)
1188 return NULL;
1189
1190 _adjust_indices(&start, &end, PyByteArray_GET_SIZE(self));
1191
1192 count_obj = PyInt_FromSsize_t(
1193 stringlib_count(str + start, end - start, vsub.buf, vsub.len)
1194 );
1195 PyObject_ReleaseBuffer(sub_obj, &vsub);
1196 return count_obj;
1197}
1198
1199
1200PyDoc_STRVAR(index__doc__,
1201"B.index(sub [,start [,end]]) -> int\n\
1202\n\
1203Like B.find() but raise ValueError when the subsection is not found.");
1204
1205static PyObject *
1206bytes_index(PyByteArrayObject *self, PyObject *args)
1207{
1208 Py_ssize_t result = bytes_find_internal(self, args, +1);
1209 if (result == -2)
1210 return NULL;
1211 if (result == -1) {
1212 PyErr_SetString(PyExc_ValueError,
1213 "subsection not found");
1214 return NULL;
1215 }
1216 return PyInt_FromSsize_t(result);
1217}
1218
1219
1220PyDoc_STRVAR(rfind__doc__,
1221"B.rfind(sub [,start [,end]]) -> int\n\
1222\n\
1223Return the highest index in B where subsection sub is found,\n\
1224such that sub is contained within s[start,end]. Optional\n\
1225arguments start and end are interpreted as in slice notation.\n\
1226\n\
1227Return -1 on failure.");
1228
1229static PyObject *
1230bytes_rfind(PyByteArrayObject *self, PyObject *args)
1231{
1232 Py_ssize_t result = bytes_find_internal(self, args, -1);
1233 if (result == -2)
1234 return NULL;
1235 return PyInt_FromSsize_t(result);
1236}
1237
1238
1239PyDoc_STRVAR(rindex__doc__,
1240"B.rindex(sub [,start [,end]]) -> int\n\
1241\n\
1242Like B.rfind() but raise ValueError when the subsection is not found.");
1243
1244static PyObject *
1245bytes_rindex(PyByteArrayObject *self, PyObject *args)
1246{
1247 Py_ssize_t result = bytes_find_internal(self, args, -1);
1248 if (result == -2)
1249 return NULL;
1250 if (result == -1) {
1251 PyErr_SetString(PyExc_ValueError,
1252 "subsection not found");
1253 return NULL;
1254 }
1255 return PyInt_FromSsize_t(result);
1256}
1257
1258
1259static int
1260bytes_contains(PyObject *self, PyObject *arg)
1261{
1262 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1263 if (ival == -1 && PyErr_Occurred()) {
1264 Py_buffer varg;
1265 int pos;
1266 PyErr_Clear();
1267 if (_getbuffer(arg, &varg) < 0)
1268 return -1;
1269 pos = stringlib_find(PyByteArray_AS_STRING(self), Py_SIZE(self),
1270 varg.buf, varg.len, 0);
1271 PyObject_ReleaseBuffer(arg, &varg);
1272 return pos >= 0;
1273 }
1274 if (ival < 0 || ival >= 256) {
1275 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1276 return -1;
1277 }
1278
1279 return memchr(PyByteArray_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
1280}
1281
1282
1283/* Matches the end (direction >= 0) or start (direction < 0) of self
1284 * against substr, using the start and end arguments. Returns
1285 * -1 on error, 0 if not found and 1 if found.
1286 */
1287Py_LOCAL(int)
1288_bytes_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start,
1289 Py_ssize_t end, int direction)
1290{
1291 Py_ssize_t len = PyByteArray_GET_SIZE(self);
1292 const char* str;
1293 Py_buffer vsubstr;
1294 int rv = 0;
1295
1296 str = PyByteArray_AS_STRING(self);
1297
1298 if (_getbuffer(substr, &vsubstr) < 0)
1299 return -1;
1300
1301 _adjust_indices(&start, &end, len);
1302
1303 if (direction < 0) {
1304 /* startswith */
1305 if (start+vsubstr.len > len) {
1306 goto done;
1307 }
1308 } else {
1309 /* endswith */
1310 if (end-start < vsubstr.len || start > len) {
1311 goto done;
1312 }
1313
1314 if (end-vsubstr.len > start)
1315 start = end - vsubstr.len;
1316 }
1317 if (end-start >= vsubstr.len)
1318 rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1319
1320done:
1321 PyObject_ReleaseBuffer(substr, &vsubstr);
1322 return rv;
1323}
1324
1325
1326PyDoc_STRVAR(startswith__doc__,
1327"B.startswith(prefix [,start [,end]]) -> bool\n\
1328\n\
1329Return True if B starts with the specified prefix, False otherwise.\n\
1330With optional start, test B beginning at that position.\n\
1331With optional end, stop comparing B at that position.\n\
1332prefix can also be a tuple of strings to try.");
1333
1334static PyObject *
1335bytes_startswith(PyByteArrayObject *self, PyObject *args)
1336{
1337 Py_ssize_t start = 0;
1338 Py_ssize_t end = PY_SSIZE_T_MAX;
1339 PyObject *subobj;
1340 int result;
1341
1342 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1343 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1344 return NULL;
1345 if (PyTuple_Check(subobj)) {
1346 Py_ssize_t i;
1347 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1348 result = _bytes_tailmatch(self,
1349 PyTuple_GET_ITEM(subobj, i),
1350 start, end, -1);
1351 if (result == -1)
1352 return NULL;
1353 else if (result) {
1354 Py_RETURN_TRUE;
1355 }
1356 }
1357 Py_RETURN_FALSE;
1358 }
1359 result = _bytes_tailmatch(self, subobj, start, end, -1);
1360 if (result == -1)
1361 return NULL;
1362 else
1363 return PyBool_FromLong(result);
1364}
1365
1366PyDoc_STRVAR(endswith__doc__,
1367"B.endswith(suffix [,start [,end]]) -> bool\n\
1368\n\
1369Return True if B ends with the specified suffix, False otherwise.\n\
1370With optional start, test B beginning at that position.\n\
1371With optional end, stop comparing B at that position.\n\
1372suffix can also be a tuple of strings to try.");
1373
1374static PyObject *
1375bytes_endswith(PyByteArrayObject *self, PyObject *args)
1376{
1377 Py_ssize_t start = 0;
1378 Py_ssize_t end = PY_SSIZE_T_MAX;
1379 PyObject *subobj;
1380 int result;
1381
1382 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1383 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1384 return NULL;
1385 if (PyTuple_Check(subobj)) {
1386 Py_ssize_t i;
1387 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1388 result = _bytes_tailmatch(self,
1389 PyTuple_GET_ITEM(subobj, i),
1390 start, end, +1);
1391 if (result == -1)
1392 return NULL;
1393 else if (result) {
1394 Py_RETURN_TRUE;
1395 }
1396 }
1397 Py_RETURN_FALSE;
1398 }
1399 result = _bytes_tailmatch(self, subobj, start, end, +1);
1400 if (result == -1)
1401 return NULL;
1402 else
1403 return PyBool_FromLong(result);
1404}
1405
1406
1407PyDoc_STRVAR(translate__doc__,
1408"B.translate(table[, deletechars]) -> bytearray\n\
1409\n\
1410Return a copy of B, where all characters occurring in the\n\
1411optional argument deletechars are removed, and the remaining\n\
1412characters have been mapped through the given translation\n\
1413table, which must be a bytes object of length 256.");
1414
1415static PyObject *
1416bytes_translate(PyByteArrayObject *self, PyObject *args)
1417{
1418 register char *input, *output;
1419 register const char *table;
1420 register Py_ssize_t i, c, changed = 0;
1421 PyObject *input_obj = (PyObject*)self;
1422 const char *output_start;
1423 Py_ssize_t inlen;
1424 PyObject *result;
1425 int trans_table[256];
1426 PyObject *tableobj, *delobj = NULL;
1427 Py_buffer vtable, vdel;
1428
1429 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1430 &tableobj, &delobj))
1431 return NULL;
1432
1433 if (_getbuffer(tableobj, &vtable) < 0)
1434 return NULL;
1435
1436 if (vtable.len != 256) {
1437 PyErr_SetString(PyExc_ValueError,
1438 "translation table must be 256 characters long");
1439 result = NULL;
1440 goto done;
1441 }
1442
1443 if (delobj != NULL) {
1444 if (_getbuffer(delobj, &vdel) < 0) {
1445 result = NULL;
1446 goto done;
1447 }
1448 }
1449 else {
1450 vdel.buf = NULL;
1451 vdel.len = 0;
1452 }
1453
1454 table = (const char *)vtable.buf;
1455 inlen = PyByteArray_GET_SIZE(input_obj);
1456 result = PyByteArray_FromStringAndSize((char *)NULL, inlen);
1457 if (result == NULL)
1458 goto done;
1459 output_start = output = PyByteArray_AsString(result);
1460 input = PyByteArray_AS_STRING(input_obj);
1461
1462 if (vdel.len == 0) {
1463 /* If no deletions are required, use faster code */
1464 for (i = inlen; --i >= 0; ) {
1465 c = Py_CHARMASK(*input++);
1466 if (Py_CHARMASK((*output++ = table[c])) != c)
1467 changed = 1;
1468 }
1469 if (changed || !PyByteArray_CheckExact(input_obj))
1470 goto done;
1471 Py_DECREF(result);
1472 Py_INCREF(input_obj);
1473 result = input_obj;
1474 goto done;
1475 }
1476
1477 for (i = 0; i < 256; i++)
1478 trans_table[i] = Py_CHARMASK(table[i]);
1479
1480 for (i = 0; i < vdel.len; i++)
1481 trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
1482
1483 for (i = inlen; --i >= 0; ) {
1484 c = Py_CHARMASK(*input++);
1485 if (trans_table[c] != -1)
1486 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1487 continue;
1488 changed = 1;
1489 }
1490 if (!changed && PyByteArray_CheckExact(input_obj)) {
1491 Py_DECREF(result);
1492 Py_INCREF(input_obj);
1493 result = input_obj;
1494 goto done;
1495 }
1496 /* Fix the size of the resulting string */
1497 if (inlen > 0)
1498 PyByteArray_Resize(result, output - output_start);
1499
1500done:
1501 PyObject_ReleaseBuffer(tableobj, &vtable);
1502 if (delobj != NULL)
1503 PyObject_ReleaseBuffer(delobj, &vdel);
1504 return result;
1505}
1506
1507
1508#define FORWARD 1
1509#define REVERSE -1
1510
1511/* find and count characters and substrings */
1512
1513#define findchar(target, target_len, c) \
1514 ((char *)memchr((const void *)(target), c, target_len))
1515
1516/* Don't call if length < 2 */
1517#define Py_STRING_MATCH(target, offset, pattern, length) \
1518 (target[offset] == pattern[0] && \
1519 target[offset+length-1] == pattern[length-1] && \
1520 !memcmp(target+offset+1, pattern+1, length-2) )
1521
1522
1523/* Bytes ops must return a string. */
1524/* If the object is subclass of bytes, create a copy */
1525Py_LOCAL(PyByteArrayObject *)
1526return_self(PyByteArrayObject *self)
1527{
1528 if (PyByteArray_CheckExact(self)) {
1529 Py_INCREF(self);
1530 return (PyByteArrayObject *)self;
1531 }
1532 return (PyByteArrayObject *)PyByteArray_FromStringAndSize(
1533 PyByteArray_AS_STRING(self),
1534 PyByteArray_GET_SIZE(self));
1535}
1536
1537Py_LOCAL_INLINE(Py_ssize_t)
1538countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
1539{
1540 Py_ssize_t count=0;
1541 const char *start=target;
1542 const char *end=target+target_len;
1543
1544 while ( (start=findchar(start, end-start, c)) != NULL ) {
1545 count++;
1546 if (count >= maxcount)
1547 break;
1548 start += 1;
1549 }
1550 return count;
1551}
1552
1553Py_LOCAL(Py_ssize_t)
1554findstring(const char *target, Py_ssize_t target_len,
1555 const char *pattern, Py_ssize_t pattern_len,
1556 Py_ssize_t start,
1557 Py_ssize_t end,
1558 int direction)
1559{
1560 if (start < 0) {
1561 start += target_len;
1562 if (start < 0)
1563 start = 0;
1564 }
1565 if (end > target_len) {
1566 end = target_len;
1567 } else if (end < 0) {
1568 end += target_len;
1569 if (end < 0)
1570 end = 0;
1571 }
1572
1573 /* zero-length substrings always match at the first attempt */
1574 if (pattern_len == 0)
1575 return (direction > 0) ? start : end;
1576
1577 end -= pattern_len;
1578
1579 if (direction < 0) {
1580 for (; end >= start; end--)
1581 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1582 return end;
1583 } else {
1584 for (; start <= end; start++)
1585 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1586 return start;
1587 }
1588 return -1;
1589}
1590
1591Py_LOCAL_INLINE(Py_ssize_t)
1592countstring(const char *target, Py_ssize_t target_len,
1593 const char *pattern, Py_ssize_t pattern_len,
1594 Py_ssize_t start,
1595 Py_ssize_t end,
1596 int direction, Py_ssize_t maxcount)
1597{
1598 Py_ssize_t count=0;
1599
1600 if (start < 0) {
1601 start += target_len;
1602 if (start < 0)
1603 start = 0;
1604 }
1605 if (end > target_len) {
1606 end = target_len;
1607 } else if (end < 0) {
1608 end += target_len;
1609 if (end < 0)
1610 end = 0;
1611 }
1612
1613 /* zero-length substrings match everywhere */
1614 if (pattern_len == 0 || maxcount == 0) {
1615 if (target_len+1 < maxcount)
1616 return target_len+1;
1617 return maxcount;
1618 }
1619
1620 end -= pattern_len;
1621 if (direction < 0) {
1622 for (; (end >= start); end--)
1623 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1624 count++;
1625 if (--maxcount <= 0) break;
1626 end -= pattern_len-1;
1627 }
1628 } else {
1629 for (; (start <= end); start++)
1630 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1631 count++;
1632 if (--maxcount <= 0)
1633 break;
1634 start += pattern_len-1;
1635 }
1636 }
1637 return count;
1638}
1639
1640
1641/* Algorithms for different cases of string replacement */
1642
1643/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1644Py_LOCAL(PyByteArrayObject *)
1645replace_interleave(PyByteArrayObject *self,
1646 const char *to_s, Py_ssize_t to_len,
1647 Py_ssize_t maxcount)
1648{
1649 char *self_s, *result_s;
1650 Py_ssize_t self_len, result_len;
1651 Py_ssize_t count, i, product;
1652 PyByteArrayObject *result;
1653
1654 self_len = PyByteArray_GET_SIZE(self);
1655
1656 /* 1 at the end plus 1 after every character */
1657 count = self_len+1;
1658 if (maxcount < count)
1659 count = maxcount;
1660
1661 /* Check for overflow */
1662 /* result_len = count * to_len + self_len; */
1663 product = count * to_len;
1664 if (product / to_len != count) {
1665 PyErr_SetString(PyExc_OverflowError,
1666 "replace string is too long");
1667 return NULL;
1668 }
1669 result_len = product + self_len;
1670 if (result_len < 0) {
1671 PyErr_SetString(PyExc_OverflowError,
1672 "replace string is too long");
1673 return NULL;
1674 }
1675
1676 if (! (result = (PyByteArrayObject *)
1677 PyByteArray_FromStringAndSize(NULL, result_len)) )
1678 return NULL;
1679
1680 self_s = PyByteArray_AS_STRING(self);
1681 result_s = PyByteArray_AS_STRING(result);
1682
1683 /* TODO: special case single character, which doesn't need memcpy */
1684
1685 /* Lay the first one down (guaranteed this will occur) */
1686 Py_MEMCPY(result_s, to_s, to_len);
1687 result_s += to_len;
1688 count -= 1;
1689
1690 for (i=0; i<count; i++) {
1691 *result_s++ = *self_s++;
1692 Py_MEMCPY(result_s, to_s, to_len);
1693 result_s += to_len;
1694 }
1695
1696 /* Copy the rest of the original string */
1697 Py_MEMCPY(result_s, self_s, self_len-i);
1698
1699 return result;
1700}
1701
1702/* Special case for deleting a single character */
1703/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1704Py_LOCAL(PyByteArrayObject *)
1705replace_delete_single_character(PyByteArrayObject *self,
1706 char from_c, Py_ssize_t maxcount)
1707{
1708 char *self_s, *result_s;
1709 char *start, *next, *end;
1710 Py_ssize_t self_len, result_len;
1711 Py_ssize_t count;
1712 PyByteArrayObject *result;
1713
1714 self_len = PyByteArray_GET_SIZE(self);
1715 self_s = PyByteArray_AS_STRING(self);
1716
1717 count = countchar(self_s, self_len, from_c, maxcount);
1718 if (count == 0) {
1719 return return_self(self);
1720 }
1721
1722 result_len = self_len - count; /* from_len == 1 */
1723 assert(result_len>=0);
1724
1725 if ( (result = (PyByteArrayObject *)
1726 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1727 return NULL;
1728 result_s = PyByteArray_AS_STRING(result);
1729
1730 start = self_s;
1731 end = self_s + self_len;
1732 while (count-- > 0) {
1733 next = findchar(start, end-start, from_c);
1734 if (next == NULL)
1735 break;
1736 Py_MEMCPY(result_s, start, next-start);
1737 result_s += (next-start);
1738 start = next+1;
1739 }
1740 Py_MEMCPY(result_s, start, end-start);
1741
1742 return result;
1743}
1744
1745/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1746
1747Py_LOCAL(PyByteArrayObject *)
1748replace_delete_substring(PyByteArrayObject *self,
1749 const char *from_s, Py_ssize_t from_len,
1750 Py_ssize_t maxcount)
1751{
1752 char *self_s, *result_s;
1753 char *start, *next, *end;
1754 Py_ssize_t self_len, result_len;
1755 Py_ssize_t count, offset;
1756 PyByteArrayObject *result;
1757
1758 self_len = PyByteArray_GET_SIZE(self);
1759 self_s = PyByteArray_AS_STRING(self);
1760
1761 count = countstring(self_s, self_len,
1762 from_s, from_len,
1763 0, self_len, 1,
1764 maxcount);
1765
1766 if (count == 0) {
1767 /* no matches */
1768 return return_self(self);
1769 }
1770
1771 result_len = self_len - (count * from_len);
1772 assert (result_len>=0);
1773
1774 if ( (result = (PyByteArrayObject *)
1775 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL )
1776 return NULL;
1777
1778 result_s = PyByteArray_AS_STRING(result);
1779
1780 start = self_s;
1781 end = self_s + self_len;
1782 while (count-- > 0) {
1783 offset = findstring(start, end-start,
1784 from_s, from_len,
1785 0, end-start, FORWARD);
1786 if (offset == -1)
1787 break;
1788 next = start + offset;
1789
1790 Py_MEMCPY(result_s, start, next-start);
1791
1792 result_s += (next-start);
1793 start = next+from_len;
1794 }
1795 Py_MEMCPY(result_s, start, end-start);
1796 return result;
1797}
1798
1799/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1800Py_LOCAL(PyByteArrayObject *)
1801replace_single_character_in_place(PyByteArrayObject *self,
1802 char from_c, char to_c,
1803 Py_ssize_t maxcount)
1804{
1805 char *self_s, *result_s, *start, *end, *next;
1806 Py_ssize_t self_len;
1807 PyByteArrayObject *result;
1808
1809 /* The result string will be the same size */
1810 self_s = PyByteArray_AS_STRING(self);
1811 self_len = PyByteArray_GET_SIZE(self);
1812
1813 next = findchar(self_s, self_len, from_c);
1814
1815 if (next == NULL) {
1816 /* No matches; return the original bytes */
1817 return return_self(self);
1818 }
1819
1820 /* Need to make a new bytes */
1821 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1822 if (result == NULL)
1823 return NULL;
1824 result_s = PyByteArray_AS_STRING(result);
1825 Py_MEMCPY(result_s, self_s, self_len);
1826
1827 /* change everything in-place, starting with this one */
1828 start = result_s + (next-self_s);
1829 *start = to_c;
1830 start++;
1831 end = result_s + self_len;
1832
1833 while (--maxcount > 0) {
1834 next = findchar(start, end-start, from_c);
1835 if (next == NULL)
1836 break;
1837 *next = to_c;
1838 start = next+1;
1839 }
1840
1841 return result;
1842}
1843
1844/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1845Py_LOCAL(PyByteArrayObject *)
1846replace_substring_in_place(PyByteArrayObject *self,
1847 const char *from_s, Py_ssize_t from_len,
1848 const char *to_s, Py_ssize_t to_len,
1849 Py_ssize_t maxcount)
1850{
1851 char *result_s, *start, *end;
1852 char *self_s;
1853 Py_ssize_t self_len, offset;
1854 PyByteArrayObject *result;
1855
1856 /* The result bytes will be the same size */
1857
1858 self_s = PyByteArray_AS_STRING(self);
1859 self_len = PyByteArray_GET_SIZE(self);
1860
1861 offset = findstring(self_s, self_len,
1862 from_s, from_len,
1863 0, self_len, FORWARD);
1864 if (offset == -1) {
1865 /* No matches; return the original bytes */
1866 return return_self(self);
1867 }
1868
1869 /* Need to make a new bytes */
1870 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1871 if (result == NULL)
1872 return NULL;
1873 result_s = PyByteArray_AS_STRING(result);
1874 Py_MEMCPY(result_s, self_s, self_len);
1875
1876 /* change everything in-place, starting with this one */
1877 start = result_s + offset;
1878 Py_MEMCPY(start, to_s, from_len);
1879 start += from_len;
1880 end = result_s + self_len;
1881
1882 while ( --maxcount > 0) {
1883 offset = findstring(start, end-start,
1884 from_s, from_len,
1885 0, end-start, FORWARD);
1886 if (offset==-1)
1887 break;
1888 Py_MEMCPY(start+offset, to_s, from_len);
1889 start += offset+from_len;
1890 }
1891
1892 return result;
1893}
1894
1895/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1896Py_LOCAL(PyByteArrayObject *)
1897replace_single_character(PyByteArrayObject *self,
1898 char from_c,
1899 const char *to_s, Py_ssize_t to_len,
1900 Py_ssize_t maxcount)
1901{
1902 char *self_s, *result_s;
1903 char *start, *next, *end;
1904 Py_ssize_t self_len, result_len;
1905 Py_ssize_t count, product;
1906 PyByteArrayObject *result;
1907
1908 self_s = PyByteArray_AS_STRING(self);
1909 self_len = PyByteArray_GET_SIZE(self);
1910
1911 count = countchar(self_s, self_len, from_c, maxcount);
1912 if (count == 0) {
1913 /* no matches, return unchanged */
1914 return return_self(self);
1915 }
1916
1917 /* use the difference between current and new, hence the "-1" */
1918 /* result_len = self_len + count * (to_len-1) */
1919 product = count * (to_len-1);
1920 if (product / (to_len-1) != count) {
1921 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1922 return NULL;
1923 }
1924 result_len = self_len + product;
1925 if (result_len < 0) {
1926 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1927 return NULL;
1928 }
1929
1930 if ( (result = (PyByteArrayObject *)
1931 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1932 return NULL;
1933 result_s = PyByteArray_AS_STRING(result);
1934
1935 start = self_s;
1936 end = self_s + self_len;
1937 while (count-- > 0) {
1938 next = findchar(start, end-start, from_c);
1939 if (next == NULL)
1940 break;
1941
1942 if (next == start) {
1943 /* replace with the 'to' */
1944 Py_MEMCPY(result_s, to_s, to_len);
1945 result_s += to_len;
1946 start += 1;
1947 } else {
1948 /* copy the unchanged old then the 'to' */
1949 Py_MEMCPY(result_s, start, next-start);
1950 result_s += (next-start);
1951 Py_MEMCPY(result_s, to_s, to_len);
1952 result_s += to_len;
1953 start = next+1;
1954 }
1955 }
1956 /* Copy the remainder of the remaining bytes */
1957 Py_MEMCPY(result_s, start, end-start);
1958
1959 return result;
1960}
1961
1962/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1963Py_LOCAL(PyByteArrayObject *)
1964replace_substring(PyByteArrayObject *self,
1965 const char *from_s, Py_ssize_t from_len,
1966 const char *to_s, Py_ssize_t to_len,
1967 Py_ssize_t maxcount)
1968{
1969 char *self_s, *result_s;
1970 char *start, *next, *end;
1971 Py_ssize_t self_len, result_len;
1972 Py_ssize_t count, offset, product;
1973 PyByteArrayObject *result;
1974
1975 self_s = PyByteArray_AS_STRING(self);
1976 self_len = PyByteArray_GET_SIZE(self);
1977
1978 count = countstring(self_s, self_len,
1979 from_s, from_len,
1980 0, self_len, FORWARD, maxcount);
1981 if (count == 0) {
1982 /* no matches, return unchanged */
1983 return return_self(self);
1984 }
1985
1986 /* Check for overflow */
1987 /* result_len = self_len + count * (to_len-from_len) */
1988 product = count * (to_len-from_len);
1989 if (product / (to_len-from_len) != count) {
1990 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1991 return NULL;
1992 }
1993 result_len = self_len + product;
1994 if (result_len < 0) {
1995 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1996 return NULL;
1997 }
1998
1999 if ( (result = (PyByteArrayObject *)
2000 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
2001 return NULL;
2002 result_s = PyByteArray_AS_STRING(result);
2003
2004 start = self_s;
2005 end = self_s + self_len;
2006 while (count-- > 0) {
2007 offset = findstring(start, end-start,
2008 from_s, from_len,
2009 0, end-start, FORWARD);
2010 if (offset == -1)
2011 break;
2012 next = start+offset;
2013 if (next == start) {
2014 /* replace with the 'to' */
2015 Py_MEMCPY(result_s, to_s, to_len);
2016 result_s += to_len;
2017 start += from_len;
2018 } else {
2019 /* copy the unchanged old then the 'to' */
2020 Py_MEMCPY(result_s, start, next-start);
2021 result_s += (next-start);
2022 Py_MEMCPY(result_s, to_s, to_len);
2023 result_s += to_len;
2024 start = next+from_len;
2025 }
2026 }
2027 /* Copy the remainder of the remaining bytes */
2028 Py_MEMCPY(result_s, start, end-start);
2029
2030 return result;
2031}
2032
2033
2034Py_LOCAL(PyByteArrayObject *)
2035replace(PyByteArrayObject *self,
2036 const char *from_s, Py_ssize_t from_len,
2037 const char *to_s, Py_ssize_t to_len,
2038 Py_ssize_t maxcount)
2039{
2040 if (maxcount < 0) {
2041 maxcount = PY_SSIZE_T_MAX;
2042 } else if (maxcount == 0 || PyByteArray_GET_SIZE(self) == 0) {
2043 /* nothing to do; return the original bytes */
2044 return return_self(self);
2045 }
2046
2047 if (maxcount == 0 ||
2048 (from_len == 0 && to_len == 0)) {
2049 /* nothing to do; return the original bytes */
2050 return return_self(self);
2051 }
2052
2053 /* Handle zero-length special cases */
2054
2055 if (from_len == 0) {
2056 /* insert the 'to' bytes everywhere. */
2057 /* >>> "Python".replace("", ".") */
2058 /* '.P.y.t.h.o.n.' */
2059 return replace_interleave(self, to_s, to_len, maxcount);
2060 }
2061
2062 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2063 /* point for an empty self bytes to generate a non-empty bytes */
2064 /* Special case so the remaining code always gets a non-empty bytes */
2065 if (PyByteArray_GET_SIZE(self) == 0) {
2066 return return_self(self);
2067 }
2068
2069 if (to_len == 0) {
2070 /* delete all occurances of 'from' bytes */
2071 if (from_len == 1) {
2072 return replace_delete_single_character(
2073 self, from_s[0], maxcount);
2074 } else {
2075 return replace_delete_substring(self, from_s, from_len, maxcount);
2076 }
2077 }
2078
2079 /* Handle special case where both bytes have the same length */
2080
2081 if (from_len == to_len) {
2082 if (from_len == 1) {
2083 return replace_single_character_in_place(
2084 self,
2085 from_s[0],
2086 to_s[0],
2087 maxcount);
2088 } else {
2089 return replace_substring_in_place(
2090 self, from_s, from_len, to_s, to_len, maxcount);
2091 }
2092 }
2093
2094 /* Otherwise use the more generic algorithms */
2095 if (from_len == 1) {
2096 return replace_single_character(self, from_s[0],
2097 to_s, to_len, maxcount);
2098 } else {
2099 /* len('from')>=2, len('to')>=1 */
2100 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2101 }
2102}
2103
2104
2105PyDoc_STRVAR(replace__doc__,
2106"B.replace(old, new[, count]) -> bytes\n\
2107\n\
2108Return a copy of B with all occurrences of subsection\n\
2109old replaced by new. If the optional argument count is\n\
2110given, only the first count occurrences are replaced.");
2111
2112static PyObject *
2113bytes_replace(PyByteArrayObject *self, PyObject *args)
2114{
2115 Py_ssize_t count = -1;
2116 PyObject *from, *to, *res;
2117 Py_buffer vfrom, vto;
2118
2119 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2120 return NULL;
2121
2122 if (_getbuffer(from, &vfrom) < 0)
2123 return NULL;
2124 if (_getbuffer(to, &vto) < 0) {
2125 PyObject_ReleaseBuffer(from, &vfrom);
2126 return NULL;
2127 }
2128
2129 res = (PyObject *)replace((PyByteArrayObject *) self,
2130 vfrom.buf, vfrom.len,
2131 vto.buf, vto.len, count);
2132
2133 PyObject_ReleaseBuffer(from, &vfrom);
2134 PyObject_ReleaseBuffer(to, &vto);
2135 return res;
2136}
2137
2138
2139/* Overallocate the initial list to reduce the number of reallocs for small
2140 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2141 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2142 text (roughly 11 words per line) and field delimited data (usually 1-10
2143 fields). For large strings the split algorithms are bandwidth limited
2144 so increasing the preallocation likely will not improve things.*/
2145
2146#define MAX_PREALLOC 12
2147
2148/* 5 splits gives 6 elements */
2149#define PREALLOC_SIZE(maxsplit) \
2150 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2151
2152#define SPLIT_APPEND(data, left, right) \
2153 str = PyByteArray_FromStringAndSize((data) + (left), \
2154 (right) - (left)); \
2155 if (str == NULL) \
2156 goto onError; \
2157 if (PyList_Append(list, str)) { \
2158 Py_DECREF(str); \
2159 goto onError; \
2160 } \
2161 else \
2162 Py_DECREF(str);
2163
2164#define SPLIT_ADD(data, left, right) { \
2165 str = PyByteArray_FromStringAndSize((data) + (left), \
2166 (right) - (left)); \
2167 if (str == NULL) \
2168 goto onError; \
2169 if (count < MAX_PREALLOC) { \
2170 PyList_SET_ITEM(list, count, str); \
2171 } else { \
2172 if (PyList_Append(list, str)) { \
2173 Py_DECREF(str); \
2174 goto onError; \
2175 } \
2176 else \
2177 Py_DECREF(str); \
2178 } \
2179 count++; }
2180
2181/* Always force the list to the expected size. */
2182#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
2183
2184
2185Py_LOCAL_INLINE(PyObject *)
2186split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2187{
2188 register Py_ssize_t i, j, count = 0;
2189 PyObject *str;
2190 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2191
2192 if (list == NULL)
2193 return NULL;
2194
2195 i = j = 0;
2196 while ((j < len) && (maxcount-- > 0)) {
2197 for(; j < len; j++) {
2198 /* I found that using memchr makes no difference */
2199 if (s[j] == ch) {
2200 SPLIT_ADD(s, i, j);
2201 i = j = j + 1;
2202 break;
2203 }
2204 }
2205 }
2206 if (i <= len) {
2207 SPLIT_ADD(s, i, len);
2208 }
2209 FIX_PREALLOC_SIZE(list);
2210 return list;
2211
2212 onError:
2213 Py_DECREF(list);
2214 return NULL;
2215}
2216
2217
2218Py_LOCAL_INLINE(PyObject *)
2219split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2220{
2221 register Py_ssize_t i, j, count = 0;
2222 PyObject *str;
2223 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2224
2225 if (list == NULL)
2226 return NULL;
2227
2228 for (i = j = 0; i < len; ) {
2229 /* find a token */
2230 while (i < len && ISSPACE(s[i]))
2231 i++;
2232 j = i;
2233 while (i < len && !ISSPACE(s[i]))
2234 i++;
2235 if (j < i) {
2236 if (maxcount-- <= 0)
2237 break;
2238 SPLIT_ADD(s, j, i);
2239 while (i < len && ISSPACE(s[i]))
2240 i++;
2241 j = i;
2242 }
2243 }
2244 if (j < len) {
2245 SPLIT_ADD(s, j, len);
2246 }
2247 FIX_PREALLOC_SIZE(list);
2248 return list;
2249
2250 onError:
2251 Py_DECREF(list);
2252 return NULL;
2253}
2254
2255PyDoc_STRVAR(split__doc__,
2256"B.split([sep[, maxsplit]]) -> list of bytearray\n\
2257\n\
2258Return a list of the sections in B, using sep as the delimiter.\n\
2259If sep is not given, B is split on ASCII whitespace characters\n\
2260(space, tab, return, newline, formfeed, vertical tab).\n\
2261If maxsplit is given, at most maxsplit splits are done.");
2262
2263static PyObject *
2264bytes_split(PyByteArrayObject *self, PyObject *args)
2265{
2266 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
2267 Py_ssize_t maxsplit = -1, count = 0;
2268 const char *s = PyByteArray_AS_STRING(self), *sub;
2269 PyObject *list, *str, *subobj = Py_None;
2270 Py_buffer vsub;
2271#ifdef USE_FAST
2272 Py_ssize_t pos;
2273#endif
2274
2275 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
2276 return NULL;
2277 if (maxsplit < 0)
2278 maxsplit = PY_SSIZE_T_MAX;
2279
2280 if (subobj == Py_None)
2281 return split_whitespace(s, len, maxsplit);
2282
2283 if (_getbuffer(subobj, &vsub) < 0)
2284 return NULL;
2285 sub = vsub.buf;
2286 n = vsub.len;
2287
2288 if (n == 0) {
2289 PyErr_SetString(PyExc_ValueError, "empty separator");
2290 PyObject_ReleaseBuffer(subobj, &vsub);
2291 return NULL;
2292 }
2293 if (n == 1)
2294 return split_char(s, len, sub[0], maxsplit);
2295
2296 list = PyList_New(PREALLOC_SIZE(maxsplit));
2297 if (list == NULL) {
2298 PyObject_ReleaseBuffer(subobj, &vsub);
2299 return NULL;
2300 }
2301
2302#ifdef USE_FAST
2303 i = j = 0;
2304 while (maxsplit-- > 0) {
2305 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2306 if (pos < 0)
2307 break;
2308 j = i+pos;
2309 SPLIT_ADD(s, i, j);
2310 i = j + n;
2311 }
2312#else
2313 i = j = 0;
2314 while ((j+n <= len) && (maxsplit-- > 0)) {
2315 for (; j+n <= len; j++) {
2316 if (Py_STRING_MATCH(s, j, sub, n)) {
2317 SPLIT_ADD(s, i, j);
2318 i = j = j + n;
2319 break;
2320 }
2321 }
2322 }
2323#endif
2324 SPLIT_ADD(s, i, len);
2325 FIX_PREALLOC_SIZE(list);
2326 PyObject_ReleaseBuffer(subobj, &vsub);
2327 return list;
2328
2329 onError:
2330 Py_DECREF(list);
2331 PyObject_ReleaseBuffer(subobj, &vsub);
2332 return NULL;
2333}
2334
2335/* stringlib's partition shares nullbytes in some cases.
2336 undo this, we don't want the nullbytes to be shared. */
2337static PyObject *
2338make_nullbytes_unique(PyObject *result)
2339{
2340 if (result != NULL) {
2341 int i;
2342 assert(PyTuple_Check(result));
2343 assert(PyTuple_GET_SIZE(result) == 3);
2344 for (i = 0; i < 3; i++) {
2345 if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) {
2346 PyObject *new = PyByteArray_FromStringAndSize(NULL, 0);
2347 if (new == NULL) {
2348 Py_DECREF(result);
2349 result = NULL;
2350 break;
2351 }
2352 Py_DECREF(nullbytes);
2353 PyTuple_SET_ITEM(result, i, new);
2354 }
2355 }
2356 }
2357 return result;
2358}
2359
2360PyDoc_STRVAR(partition__doc__,
2361"B.partition(sep) -> (head, sep, tail)\n\
2362\n\
2363Searches for the separator sep in B, and returns the part before it,\n\
2364the separator itself, and the part after it. If the separator is not\n\
2365found, returns B and two empty bytearray objects.");
2366
2367static PyObject *
2368bytes_partition(PyByteArrayObject *self, PyObject *sep_obj)
2369{
2370 PyObject *bytesep, *result;
2371
2372 bytesep = PyByteArray_FromObject(sep_obj);
2373 if (! bytesep)
2374 return NULL;
2375
2376 result = stringlib_partition(
2377 (PyObject*) self,
2378 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2379 bytesep,
2380 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2381 );
2382
2383 Py_DECREF(bytesep);
2384 return make_nullbytes_unique(result);
2385}
2386
2387PyDoc_STRVAR(rpartition__doc__,
2388"B.rpartition(sep) -> (tail, sep, head)\n\
2389\n\
2390Searches for the separator sep in B, starting at the end of B,\n\
2391and returns the part before it, the separator itself, and the\n\
2392part after it. If the separator is not found, returns two empty\n\
2393bytearray objects and B.");
2394
2395static PyObject *
2396bytes_rpartition(PyByteArrayObject *self, PyObject *sep_obj)
2397{
2398 PyObject *bytesep, *result;
2399
2400 bytesep = PyByteArray_FromObject(sep_obj);
2401 if (! bytesep)
2402 return NULL;
2403
2404 result = stringlib_rpartition(
2405 (PyObject*) self,
2406 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2407 bytesep,
2408 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2409 );
2410
2411 Py_DECREF(bytesep);
2412 return make_nullbytes_unique(result);
2413}
2414
2415Py_LOCAL_INLINE(PyObject *)
2416rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2417{
2418 register Py_ssize_t i, j, count=0;
2419 PyObject *str;
2420 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2421
2422 if (list == NULL)
2423 return NULL;
2424
2425 i = j = len - 1;
2426 while ((i >= 0) && (maxcount-- > 0)) {
2427 for (; i >= 0; i--) {
2428 if (s[i] == ch) {
2429 SPLIT_ADD(s, i + 1, j + 1);
2430 j = i = i - 1;
2431 break;
2432 }
2433 }
2434 }
2435 if (j >= -1) {
2436 SPLIT_ADD(s, 0, j + 1);
2437 }
2438 FIX_PREALLOC_SIZE(list);
2439 if (PyList_Reverse(list) < 0)
2440 goto onError;
2441
2442 return list;
2443
2444 onError:
2445 Py_DECREF(list);
2446 return NULL;
2447}
2448
2449Py_LOCAL_INLINE(PyObject *)
2450rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2451{
2452 register Py_ssize_t i, j, count = 0;
2453 PyObject *str;
2454 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2455
2456 if (list == NULL)
2457 return NULL;
2458
2459 for (i = j = len - 1; i >= 0; ) {
2460 /* find a token */
2461 while (i >= 0 && ISSPACE(s[i]))
2462 i--;
2463 j = i;
2464 while (i >= 0 && !ISSPACE(s[i]))
2465 i--;
2466 if (j > i) {
2467 if (maxcount-- <= 0)
2468 break;
2469 SPLIT_ADD(s, i + 1, j + 1);
2470 while (i >= 0 && ISSPACE(s[i]))
2471 i--;
2472 j = i;
2473 }
2474 }
2475 if (j >= 0) {
2476 SPLIT_ADD(s, 0, j + 1);
2477 }
2478 FIX_PREALLOC_SIZE(list);
2479 if (PyList_Reverse(list) < 0)
2480 goto onError;
2481
2482 return list;
2483
2484 onError:
2485 Py_DECREF(list);
2486 return NULL;
2487}
2488
2489PyDoc_STRVAR(rsplit__doc__,
2490"B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2491\n\
2492Return a list of the sections in B, using sep as the delimiter,\n\
2493starting at the end of B and working to the front.\n\
2494If sep is not given, B is split on ASCII whitespace characters\n\
2495(space, tab, return, newline, formfeed, vertical tab).\n\
2496If maxsplit is given, at most maxsplit splits are done.");
2497
2498static PyObject *
2499bytes_rsplit(PyByteArrayObject *self, PyObject *args)
2500{
2501 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
2502 Py_ssize_t maxsplit = -1, count = 0;
2503 const char *s = PyByteArray_AS_STRING(self), *sub;
2504 PyObject *list, *str, *subobj = Py_None;
2505 Py_buffer vsub;
2506
2507 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
2508 return NULL;
2509 if (maxsplit < 0)
2510 maxsplit = PY_SSIZE_T_MAX;
2511
2512 if (subobj == Py_None)
2513 return rsplit_whitespace(s, len, maxsplit);
2514
2515 if (_getbuffer(subobj, &vsub) < 0)
2516 return NULL;
2517 sub = vsub.buf;
2518 n = vsub.len;
2519
2520 if (n == 0) {
2521 PyErr_SetString(PyExc_ValueError, "empty separator");
2522 PyObject_ReleaseBuffer(subobj, &vsub);
2523 return NULL;
2524 }
2525 else if (n == 1)
2526 return rsplit_char(s, len, sub[0], maxsplit);
2527
2528 list = PyList_New(PREALLOC_SIZE(maxsplit));
2529 if (list == NULL) {
2530 PyObject_ReleaseBuffer(subobj, &vsub);
2531 return NULL;
2532 }
2533
2534 j = len;
2535 i = j - n;
2536
2537 while ( (i >= 0) && (maxsplit-- > 0) ) {
2538 for (; i>=0; i--) {
2539 if (Py_STRING_MATCH(s, i, sub, n)) {
2540 SPLIT_ADD(s, i + n, j);
2541 j = i;
2542 i -= n;
2543 break;
2544 }
2545 }
2546 }
2547 SPLIT_ADD(s, 0, j);
2548 FIX_PREALLOC_SIZE(list);
2549 if (PyList_Reverse(list) < 0)
2550 goto onError;
2551 PyObject_ReleaseBuffer(subobj, &vsub);
2552 return list;
2553
2554onError:
2555 Py_DECREF(list);
2556 PyObject_ReleaseBuffer(subobj, &vsub);
2557 return NULL;
2558}
2559
2560PyDoc_STRVAR(reverse__doc__,
2561"B.reverse() -> None\n\
2562\n\
2563Reverse the order of the values in B in place.");
2564static PyObject *
2565bytes_reverse(PyByteArrayObject *self, PyObject *unused)
2566{
2567 char swap, *head, *tail;
2568 Py_ssize_t i, j, n = Py_SIZE(self);
2569
2570 j = n / 2;
2571 head = self->ob_bytes;
2572 tail = head + n - 1;
2573 for (i = 0; i < j; i++) {
2574 swap = *head;
2575 *head++ = *tail;
2576 *tail-- = swap;
2577 }
2578
2579 Py_RETURN_NONE;
2580}
2581
2582PyDoc_STRVAR(insert__doc__,
2583"B.insert(index, int) -> None\n\
2584\n\
2585Insert a single item into the bytearray before the given index.");
2586static PyObject *
2587bytes_insert(PyByteArrayObject *self, PyObject *args)
2588{
Georg Brandl3e483f62008-07-16 22:57:41 +00002589 PyObject *value;
2590 int ival;
Christian Heimes44720832008-05-26 13:01:01 +00002591 Py_ssize_t where, n = Py_SIZE(self);
2592
Georg Brandl3e483f62008-07-16 22:57:41 +00002593 if (!PyArg_ParseTuple(args, "nO:insert", &where, &value))
Christian Heimes44720832008-05-26 13:01:01 +00002594 return NULL;
2595
2596 if (n == PY_SSIZE_T_MAX) {
2597 PyErr_SetString(PyExc_OverflowError,
2598 "cannot add more objects to bytes");
2599 return NULL;
2600 }
Georg Brandl3e483f62008-07-16 22:57:41 +00002601 if (!_getbytevalue(value, &ival))
Christian Heimes44720832008-05-26 13:01:01 +00002602 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002603 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2604 return NULL;
2605
2606 if (where < 0) {
2607 where += n;
2608 if (where < 0)
2609 where = 0;
2610 }
2611 if (where > n)
2612 where = n;
2613 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
Georg Brandl3e483f62008-07-16 22:57:41 +00002614 self->ob_bytes[where] = ival;
Christian Heimes44720832008-05-26 13:01:01 +00002615
2616 Py_RETURN_NONE;
2617}
2618
2619PyDoc_STRVAR(append__doc__,
2620"B.append(int) -> None\n\
2621\n\
2622Append a single item to the end of B.");
2623static PyObject *
2624bytes_append(PyByteArrayObject *self, PyObject *arg)
2625{
2626 int value;
2627 Py_ssize_t n = Py_SIZE(self);
2628
2629 if (! _getbytevalue(arg, &value))
2630 return NULL;
2631 if (n == PY_SSIZE_T_MAX) {
2632 PyErr_SetString(PyExc_OverflowError,
2633 "cannot add more objects to bytes");
2634 return NULL;
2635 }
2636 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2637 return NULL;
2638
2639 self->ob_bytes[n] = value;
2640
2641 Py_RETURN_NONE;
2642}
2643
2644PyDoc_STRVAR(extend__doc__,
2645"B.extend(iterable int) -> None\n\
2646\n\
2647Append all the elements from the iterator or sequence to the\n\
2648end of B.");
2649static PyObject *
2650bytes_extend(PyByteArrayObject *self, PyObject *arg)
2651{
2652 PyObject *it, *item, *bytes_obj;
2653 Py_ssize_t buf_size = 0, len = 0;
2654 int value;
2655 char *buf;
2656
2657 /* bytes_setslice code only accepts something supporting PEP 3118. */
2658 if (PyObject_CheckBuffer(arg)) {
2659 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), arg) == -1)
2660 return NULL;
2661
2662 Py_RETURN_NONE;
2663 }
2664
2665 it = PyObject_GetIter(arg);
2666 if (it == NULL)
2667 return NULL;
2668
2669 /* Try to determine the length of the argument. 32 is abitrary. */
2670 buf_size = _PyObject_LengthHint(arg, 32);
2671
2672 bytes_obj = PyByteArray_FromStringAndSize(NULL, buf_size);
2673 if (bytes_obj == NULL)
2674 return NULL;
2675 buf = PyByteArray_AS_STRING(bytes_obj);
2676
2677 while ((item = PyIter_Next(it)) != NULL) {
2678 if (! _getbytevalue(item, &value)) {
2679 Py_DECREF(item);
2680 Py_DECREF(it);
2681 Py_DECREF(bytes_obj);
2682 return NULL;
2683 }
2684 buf[len++] = value;
2685 Py_DECREF(item);
2686
2687 if (len >= buf_size) {
2688 buf_size = len + (len >> 1) + 1;
2689 if (PyByteArray_Resize((PyObject *)bytes_obj, buf_size) < 0) {
2690 Py_DECREF(it);
2691 Py_DECREF(bytes_obj);
2692 return NULL;
2693 }
2694 /* Recompute the `buf' pointer, since the resizing operation may
2695 have invalidated it. */
2696 buf = PyByteArray_AS_STRING(bytes_obj);
2697 }
2698 }
2699 Py_DECREF(it);
2700
2701 /* Resize down to exact size. */
2702 if (PyByteArray_Resize((PyObject *)bytes_obj, len) < 0) {
2703 Py_DECREF(bytes_obj);
2704 return NULL;
2705 }
2706
2707 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), bytes_obj) == -1)
2708 return NULL;
2709 Py_DECREF(bytes_obj);
2710
2711 Py_RETURN_NONE;
2712}
2713
2714PyDoc_STRVAR(pop__doc__,
2715"B.pop([index]) -> int\n\
2716\n\
2717Remove and return a single item from B. If no index\n\
Andrew M. Kuchlingd8972642008-06-21 13:29:12 +00002718argument is given, will pop the last value.");
Christian Heimes44720832008-05-26 13:01:01 +00002719static PyObject *
2720bytes_pop(PyByteArrayObject *self, PyObject *args)
2721{
2722 int value;
2723 Py_ssize_t where = -1, n = Py_SIZE(self);
2724
2725 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2726 return NULL;
2727
2728 if (n == 0) {
2729 PyErr_SetString(PyExc_OverflowError,
2730 "cannot pop an empty bytes");
2731 return NULL;
2732 }
2733 if (where < 0)
2734 where += Py_SIZE(self);
2735 if (where < 0 || where >= Py_SIZE(self)) {
2736 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2737 return NULL;
2738 }
2739
2740 value = self->ob_bytes[where];
2741 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2742 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2743 return NULL;
2744
2745 return PyInt_FromLong(value);
2746}
2747
2748PyDoc_STRVAR(remove__doc__,
2749"B.remove(int) -> None\n\
2750\n\
2751Remove the first occurance of a value in B.");
2752static PyObject *
2753bytes_remove(PyByteArrayObject *self, PyObject *arg)
2754{
2755 int value;
2756 Py_ssize_t where, n = Py_SIZE(self);
2757
2758 if (! _getbytevalue(arg, &value))
2759 return NULL;
2760
2761 for (where = 0; where < n; where++) {
2762 if (self->ob_bytes[where] == value)
2763 break;
2764 }
2765 if (where == n) {
2766 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2767 return NULL;
2768 }
2769
2770 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2771 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2772 return NULL;
2773
2774 Py_RETURN_NONE;
2775}
2776
2777/* XXX These two helpers could be optimized if argsize == 1 */
2778
2779static Py_ssize_t
2780lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2781 void *argptr, Py_ssize_t argsize)
2782{
2783 Py_ssize_t i = 0;
2784 while (i < mysize && memchr(argptr, myptr[i], argsize))
2785 i++;
2786 return i;
2787}
2788
2789static Py_ssize_t
2790rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2791 void *argptr, Py_ssize_t argsize)
2792{
2793 Py_ssize_t i = mysize - 1;
2794 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2795 i--;
2796 return i + 1;
2797}
2798
2799PyDoc_STRVAR(strip__doc__,
2800"B.strip([bytes]) -> bytearray\n\
2801\n\
2802Strip leading and trailing bytes contained in the argument.\n\
2803If the argument is omitted, strip ASCII whitespace.");
2804static PyObject *
2805bytes_strip(PyByteArrayObject *self, PyObject *args)
2806{
2807 Py_ssize_t left, right, mysize, argsize;
2808 void *myptr, *argptr;
2809 PyObject *arg = Py_None;
2810 Py_buffer varg;
2811 if (!PyArg_ParseTuple(args, "|O:strip", &arg))
2812 return NULL;
2813 if (arg == Py_None) {
2814 argptr = "\t\n\r\f\v ";
2815 argsize = 6;
2816 }
2817 else {
2818 if (_getbuffer(arg, &varg) < 0)
2819 return NULL;
2820 argptr = varg.buf;
2821 argsize = varg.len;
2822 }
2823 myptr = self->ob_bytes;
2824 mysize = Py_SIZE(self);
2825 left = lstrip_helper(myptr, mysize, argptr, argsize);
2826 if (left == mysize)
2827 right = left;
2828 else
2829 right = rstrip_helper(myptr, mysize, argptr, argsize);
2830 if (arg != Py_None)
2831 PyObject_ReleaseBuffer(arg, &varg);
2832 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2833}
2834
2835PyDoc_STRVAR(lstrip__doc__,
2836"B.lstrip([bytes]) -> bytearray\n\
2837\n\
2838Strip leading bytes contained in the argument.\n\
2839If the argument is omitted, strip leading ASCII whitespace.");
2840static PyObject *
2841bytes_lstrip(PyByteArrayObject *self, PyObject *args)
2842{
2843 Py_ssize_t left, right, mysize, argsize;
2844 void *myptr, *argptr;
2845 PyObject *arg = Py_None;
2846 Py_buffer varg;
2847 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
2848 return NULL;
2849 if (arg == Py_None) {
2850 argptr = "\t\n\r\f\v ";
2851 argsize = 6;
2852 }
2853 else {
2854 if (_getbuffer(arg, &varg) < 0)
2855 return NULL;
2856 argptr = varg.buf;
2857 argsize = varg.len;
2858 }
2859 myptr = self->ob_bytes;
2860 mysize = Py_SIZE(self);
2861 left = lstrip_helper(myptr, mysize, argptr, argsize);
2862 right = mysize;
2863 if (arg != Py_None)
2864 PyObject_ReleaseBuffer(arg, &varg);
2865 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2866}
2867
2868PyDoc_STRVAR(rstrip__doc__,
2869"B.rstrip([bytes]) -> bytearray\n\
2870\n\
2871Strip trailing bytes contained in the argument.\n\
2872If the argument is omitted, strip trailing ASCII whitespace.");
2873static PyObject *
2874bytes_rstrip(PyByteArrayObject *self, PyObject *args)
2875{
2876 Py_ssize_t left, right, mysize, argsize;
2877 void *myptr, *argptr;
2878 PyObject *arg = Py_None;
2879 Py_buffer varg;
2880 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
2881 return NULL;
2882 if (arg == Py_None) {
2883 argptr = "\t\n\r\f\v ";
2884 argsize = 6;
2885 }
2886 else {
2887 if (_getbuffer(arg, &varg) < 0)
2888 return NULL;
2889 argptr = varg.buf;
2890 argsize = varg.len;
2891 }
2892 myptr = self->ob_bytes;
2893 mysize = Py_SIZE(self);
2894 left = 0;
2895 right = rstrip_helper(myptr, mysize, argptr, argsize);
2896 if (arg != Py_None)
2897 PyObject_ReleaseBuffer(arg, &varg);
2898 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2899}
2900
2901PyDoc_STRVAR(decode_doc,
2902"B.decode([encoding[, errors]]) -> unicode object.\n\
2903\n\
2904Decodes B using the codec registered for encoding. encoding defaults\n\
2905to the default encoding. errors may be given to set a different error\n\
2906handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2907a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2908as well as any other name registered with codecs.register_error that is\n\
2909able to handle UnicodeDecodeErrors.");
2910
2911static PyObject *
2912bytes_decode(PyObject *self, PyObject *args)
2913{
2914 const char *encoding = NULL;
2915 const char *errors = NULL;
2916
2917 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2918 return NULL;
2919 if (encoding == NULL)
2920 encoding = PyUnicode_GetDefaultEncoding();
2921 return PyCodec_Decode(self, encoding, errors);
2922}
2923
2924PyDoc_STRVAR(alloc_doc,
2925"B.__alloc__() -> int\n\
2926\n\
2927Returns the number of bytes actually allocated.");
2928
2929static PyObject *
2930bytes_alloc(PyByteArrayObject *self)
2931{
2932 return PyInt_FromSsize_t(self->ob_alloc);
2933}
2934
2935PyDoc_STRVAR(join_doc,
2936"B.join(iterable_of_bytes) -> bytes\n\
2937\n\
2938Concatenates any number of bytearray objects, with B in between each pair.");
2939
2940static PyObject *
2941bytes_join(PyByteArrayObject *self, PyObject *it)
2942{
2943 PyObject *seq;
2944 Py_ssize_t mysize = Py_SIZE(self);
2945 Py_ssize_t i;
2946 Py_ssize_t n;
2947 PyObject **items;
2948 Py_ssize_t totalsize = 0;
2949 PyObject *result;
2950 char *dest;
2951
2952 seq = PySequence_Fast(it, "can only join an iterable");
2953 if (seq == NULL)
2954 return NULL;
2955 n = PySequence_Fast_GET_SIZE(seq);
2956 items = PySequence_Fast_ITEMS(seq);
2957
2958 /* Compute the total size, and check that they are all bytes */
2959 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2960 for (i = 0; i < n; i++) {
2961 PyObject *obj = items[i];
2962 if (!PyByteArray_Check(obj) && !PyBytes_Check(obj)) {
2963 PyErr_Format(PyExc_TypeError,
2964 "can only join an iterable of bytes "
2965 "(item %ld has type '%.100s')",
2966 /* XXX %ld isn't right on Win64 */
2967 (long)i, Py_TYPE(obj)->tp_name);
2968 goto error;
2969 }
2970 if (i > 0)
2971 totalsize += mysize;
2972 totalsize += Py_SIZE(obj);
2973 if (totalsize < 0) {
2974 PyErr_NoMemory();
2975 goto error;
2976 }
2977 }
2978
2979 /* Allocate the result, and copy the bytes */
2980 result = PyByteArray_FromStringAndSize(NULL, totalsize);
2981 if (result == NULL)
2982 goto error;
2983 dest = PyByteArray_AS_STRING(result);
2984 for (i = 0; i < n; i++) {
2985 PyObject *obj = items[i];
2986 Py_ssize_t size = Py_SIZE(obj);
2987 char *buf;
2988 if (PyByteArray_Check(obj))
2989 buf = PyByteArray_AS_STRING(obj);
2990 else
2991 buf = PyBytes_AS_STRING(obj);
2992 if (i) {
2993 memcpy(dest, self->ob_bytes, mysize);
2994 dest += mysize;
2995 }
2996 memcpy(dest, buf, size);
2997 dest += size;
2998 }
2999
3000 /* Done */
3001 Py_DECREF(seq);
3002 return result;
3003
3004 /* Error handling */
3005 error:
3006 Py_DECREF(seq);
3007 return NULL;
3008}
3009
3010PyDoc_STRVAR(fromhex_doc,
3011"bytearray.fromhex(string) -> bytearray\n\
3012\n\
3013Create a bytearray object from a string of hexadecimal numbers.\n\
3014Spaces between two numbers are accepted.\n\
3015Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
3016
3017static int
3018hex_digit_to_int(Py_UNICODE c)
3019{
3020 if (c >= 128)
3021 return -1;
3022 if (ISDIGIT(c))
3023 return c - '0';
3024 else {
3025 if (ISUPPER(c))
3026 c = TOLOWER(c);
3027 if (c >= 'a' && c <= 'f')
3028 return c - 'a' + 10;
3029 }
3030 return -1;
3031}
3032
3033static PyObject *
3034bytes_fromhex(PyObject *cls, PyObject *args)
3035{
3036 PyObject *newbytes, *hexobj;
3037 char *buf;
3038 Py_UNICODE *hex;
3039 Py_ssize_t hexlen, byteslen, i, j;
3040 int top, bot;
3041
3042 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
3043 return NULL;
3044 assert(PyUnicode_Check(hexobj));
3045 hexlen = PyUnicode_GET_SIZE(hexobj);
3046 hex = PyUnicode_AS_UNICODE(hexobj);
3047 byteslen = hexlen/2; /* This overestimates if there are spaces */
3048 newbytes = PyByteArray_FromStringAndSize(NULL, byteslen);
3049 if (!newbytes)
3050 return NULL;
3051 buf = PyByteArray_AS_STRING(newbytes);
3052 for (i = j = 0; i < hexlen; i += 2) {
3053 /* skip over spaces in the input */
3054 while (hex[i] == ' ')
3055 i++;
3056 if (i >= hexlen)
3057 break;
3058 top = hex_digit_to_int(hex[i]);
3059 bot = hex_digit_to_int(hex[i+1]);
3060 if (top == -1 || bot == -1) {
3061 PyErr_Format(PyExc_ValueError,
3062 "non-hexadecimal number found in "
3063 "fromhex() arg at position %zd", i);
3064 goto error;
3065 }
3066 buf[j++] = (top << 4) + bot;
3067 }
3068 if (PyByteArray_Resize(newbytes, j) < 0)
3069 goto error;
3070 return newbytes;
3071
3072 error:
3073 Py_DECREF(newbytes);
3074 return NULL;
3075}
3076
3077PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3078
3079static PyObject *
3080bytes_reduce(PyByteArrayObject *self)
3081{
3082 PyObject *latin1, *dict;
3083 if (self->ob_bytes)
3084 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
3085 Py_SIZE(self), NULL);
3086 else
3087 latin1 = PyUnicode_FromString("");
3088
3089 dict = PyObject_GetAttrString((PyObject *)self, "__dict__");
3090 if (dict == NULL) {
3091 PyErr_Clear();
3092 dict = Py_None;
3093 Py_INCREF(dict);
3094 }
3095
3096 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self), latin1, "latin-1", dict);
3097}
3098
Robert Schuppenies9be2ec12008-07-10 15:24:04 +00003099PyDoc_STRVAR(sizeof_doc,
3100"B.__sizeof__() -> int\n\
3101 \n\
3102Returns the size of B in memory, in bytes");
3103static PyObject *
3104bytes_sizeof(PyByteArrayObject *self)
3105{
3106 Py_ssize_t res;
3107
3108 res = sizeof(PyByteArrayObject) + self->ob_alloc * sizeof(char);
3109 return PyInt_FromSsize_t(res);
3110}
3111
Christian Heimes44720832008-05-26 13:01:01 +00003112static PySequenceMethods bytes_as_sequence = {
3113 (lenfunc)bytes_length, /* sq_length */
3114 (binaryfunc)PyByteArray_Concat, /* sq_concat */
3115 (ssizeargfunc)bytes_repeat, /* sq_repeat */
3116 (ssizeargfunc)bytes_getitem, /* sq_item */
3117 0, /* sq_slice */
3118 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
3119 0, /* sq_ass_slice */
3120 (objobjproc)bytes_contains, /* sq_contains */
3121 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
3122 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
3123};
3124
3125static PyMappingMethods bytes_as_mapping = {
3126 (lenfunc)bytes_length,
3127 (binaryfunc)bytes_subscript,
3128 (objobjargproc)bytes_ass_subscript,
3129};
3130
3131static PyBufferProcs bytes_as_buffer = {
3132 (readbufferproc)bytes_buffer_getreadbuf,
3133 (writebufferproc)bytes_buffer_getwritebuf,
3134 (segcountproc)bytes_buffer_getsegcount,
3135 (charbufferproc)bytes_buffer_getcharbuf,
3136 (getbufferproc)bytes_getbuffer,
3137 (releasebufferproc)bytes_releasebuffer,
3138};
3139
3140static PyMethodDef
3141bytes_methods[] = {
3142 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
3143 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
Robert Schuppenies9be2ec12008-07-10 15:24:04 +00003144 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS, sizeof_doc},
Christian Heimes44720832008-05-26 13:01:01 +00003145 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
3146 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3147 _Py_capitalize__doc__},
3148 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3149 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
3150 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
3151 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
3152 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
3153 expandtabs__doc__},
3154 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
3155 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
3156 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
3157 fromhex_doc},
3158 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3159 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
3160 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3161 _Py_isalnum__doc__},
3162 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3163 _Py_isalpha__doc__},
3164 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3165 _Py_isdigit__doc__},
3166 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3167 _Py_islower__doc__},
3168 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3169 _Py_isspace__doc__},
3170 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3171 _Py_istitle__doc__},
3172 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3173 _Py_isupper__doc__},
3174 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
3175 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3176 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
3177 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
3178 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
3179 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
3180 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
3181 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
3182 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
3183 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3184 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3185 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
3186 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
3187 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
3188 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
3189 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
3190 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
3191 splitlines__doc__},
3192 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS ,
3193 startswith__doc__},
3194 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
3195 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3196 _Py_swapcase__doc__},
3197 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
3198 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
3199 translate__doc__},
3200 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3201 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
3202 {NULL}
3203};
3204
3205PyDoc_STRVAR(bytes_doc,
3206"bytearray(iterable_of_ints) -> bytearray.\n\
3207bytearray(string, encoding[, errors]) -> bytearray.\n\
3208bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
3209bytearray(memory_view) -> bytearray.\n\
3210\n\
3211Construct an mutable bytearray object from:\n\
3212 - an iterable yielding integers in range(256)\n\
3213 - a text string encoded using the specified encoding\n\
3214 - a bytes or a bytearray object\n\
3215 - any object implementing the buffer API.\n\
3216\n\
3217bytearray(int) -> bytearray.\n\
3218\n\
3219Construct a zero-initialized bytearray of the given length.");
3220
3221
3222static PyObject *bytes_iter(PyObject *seq);
3223
3224PyTypeObject PyByteArray_Type = {
3225 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3226 "bytearray",
3227 sizeof(PyByteArrayObject),
3228 0,
3229 (destructor)bytes_dealloc, /* tp_dealloc */
3230 0, /* tp_print */
3231 0, /* tp_getattr */
3232 0, /* tp_setattr */
3233 0, /* tp_compare */
3234 (reprfunc)bytes_repr, /* tp_repr */
3235 0, /* tp_as_number */
3236 &bytes_as_sequence, /* tp_as_sequence */
3237 &bytes_as_mapping, /* tp_as_mapping */
3238 0, /* tp_hash */
3239 0, /* tp_call */
3240 bytes_str, /* tp_str */
3241 PyObject_GenericGetAttr, /* tp_getattro */
3242 0, /* tp_setattro */
3243 &bytes_as_buffer, /* tp_as_buffer */
3244 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3245 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3246 bytes_doc, /* tp_doc */
3247 0, /* tp_traverse */
3248 0, /* tp_clear */
3249 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3250 0, /* tp_weaklistoffset */
3251 bytes_iter, /* tp_iter */
3252 0, /* tp_iternext */
3253 bytes_methods, /* tp_methods */
3254 0, /* tp_members */
3255 0, /* tp_getset */
3256 0, /* tp_base */
3257 0, /* tp_dict */
3258 0, /* tp_descr_get */
3259 0, /* tp_descr_set */
3260 0, /* tp_dictoffset */
3261 (initproc)bytes_init, /* tp_init */
3262 PyType_GenericAlloc, /* tp_alloc */
3263 PyType_GenericNew, /* tp_new */
3264 PyObject_Del, /* tp_free */
3265};
3266
3267/*********************** Bytes Iterator ****************************/
3268
3269typedef struct {
3270 PyObject_HEAD
3271 Py_ssize_t it_index;
3272 PyByteArrayObject *it_seq; /* Set to NULL when iterator is exhausted */
3273} bytesiterobject;
3274
3275static void
3276bytesiter_dealloc(bytesiterobject *it)
3277{
3278 _PyObject_GC_UNTRACK(it);
3279 Py_XDECREF(it->it_seq);
3280 PyObject_GC_Del(it);
3281}
3282
3283static int
3284bytesiter_traverse(bytesiterobject *it, visitproc visit, void *arg)
3285{
3286 Py_VISIT(it->it_seq);
3287 return 0;
3288}
3289
3290static PyObject *
3291bytesiter_next(bytesiterobject *it)
3292{
3293 PyByteArrayObject *seq;
3294 PyObject *item;
3295
3296 assert(it != NULL);
3297 seq = it->it_seq;
3298 if (seq == NULL)
3299 return NULL;
3300 assert(PyByteArray_Check(seq));
3301
3302 if (it->it_index < PyByteArray_GET_SIZE(seq)) {
3303 item = PyInt_FromLong(
3304 (unsigned char)seq->ob_bytes[it->it_index]);
3305 if (item != NULL)
3306 ++it->it_index;
3307 return item;
3308 }
3309
3310 Py_DECREF(seq);
3311 it->it_seq = NULL;
3312 return NULL;
3313}
3314
3315static PyObject *
3316bytesiter_length_hint(bytesiterobject *it)
3317{
3318 Py_ssize_t len = 0;
3319 if (it->it_seq)
3320 len = PyByteArray_GET_SIZE(it->it_seq) - it->it_index;
3321 return PyInt_FromSsize_t(len);
3322}
3323
3324PyDoc_STRVAR(length_hint_doc,
3325 "Private method returning an estimate of len(list(it)).");
3326
3327static PyMethodDef bytesiter_methods[] = {
3328 {"__length_hint__", (PyCFunction)bytesiter_length_hint, METH_NOARGS,
3329 length_hint_doc},
3330 {NULL, NULL} /* sentinel */
3331};
3332
3333PyTypeObject PyByteArrayIter_Type = {
3334 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3335 "bytearray_iterator", /* tp_name */
3336 sizeof(bytesiterobject), /* tp_basicsize */
3337 0, /* tp_itemsize */
3338 /* methods */
3339 (destructor)bytesiter_dealloc, /* tp_dealloc */
3340 0, /* tp_print */
3341 0, /* tp_getattr */
3342 0, /* tp_setattr */
3343 0, /* tp_compare */
3344 0, /* tp_repr */
3345 0, /* tp_as_number */
3346 0, /* tp_as_sequence */
3347 0, /* tp_as_mapping */
3348 0, /* tp_hash */
3349 0, /* tp_call */
3350 0, /* tp_str */
3351 PyObject_GenericGetAttr, /* tp_getattro */
3352 0, /* tp_setattro */
3353 0, /* tp_as_buffer */
3354 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3355 0, /* tp_doc */
3356 (traverseproc)bytesiter_traverse, /* tp_traverse */
3357 0, /* tp_clear */
3358 0, /* tp_richcompare */
3359 0, /* tp_weaklistoffset */
3360 PyObject_SelfIter, /* tp_iter */
3361 (iternextfunc)bytesiter_next, /* tp_iternext */
3362 bytesiter_methods, /* tp_methods */
3363 0,
3364};
3365
3366static PyObject *
3367bytes_iter(PyObject *seq)
3368{
3369 bytesiterobject *it;
3370
3371 if (!PyByteArray_Check(seq)) {
3372 PyErr_BadInternalCall();
3373 return NULL;
3374 }
3375 it = PyObject_GC_New(bytesiterobject, &PyByteArrayIter_Type);
3376 if (it == NULL)
3377 return NULL;
3378 it->it_index = 0;
3379 Py_INCREF(seq);
3380 it->it_seq = (PyByteArrayObject *)seq;
3381 _PyObject_GC_TRACK(it);
3382 return (PyObject *)it;
3383}