blob: cdcfcb42d0b9b071b33694477d3f21ae1907d72a [file] [log] [blame]
Christian Heimes44720832008-05-26 13:01:01 +00001/* PyBytes (bytearray) implementation */
2
3#define PY_SSIZE_T_CLEAN
4#include "Python.h"
5#include "structmember.h"
6#include "bytes_methods.h"
7
8static PyByteArrayObject *nullbytes = NULL;
Antoine Pitroubb667d42010-01-17 12:31:10 +00009char _PyByteArray_empty_string[] = "";
Christian Heimes44720832008-05-26 13:01:01 +000010
11void
12PyByteArray_Fini(void)
13{
14 Py_CLEAR(nullbytes);
15}
16
17int
18PyByteArray_Init(void)
19{
20 nullbytes = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
21 if (nullbytes == NULL)
22 return 0;
23 nullbytes->ob_bytes = NULL;
24 Py_SIZE(nullbytes) = nullbytes->ob_alloc = 0;
25 nullbytes->ob_exports = 0;
26 return 1;
27}
28
29/* end nullbytes support */
30
31/* Helpers */
32
33static int
34_getbytevalue(PyObject* arg, int *value)
35{
36 long face_value;
37
Georg Brandl3e483f62008-07-16 22:57:41 +000038 if (PyBytes_CheckExact(arg)) {
Christian Heimes44720832008-05-26 13:01:01 +000039 if (Py_SIZE(arg) != 1) {
40 PyErr_SetString(PyExc_ValueError, "string must be of size 1");
41 return 0;
42 }
Georg Brandl3e483f62008-07-16 22:57:41 +000043 *value = Py_CHARMASK(((PyBytesObject*)arg)->ob_sval[0]);
44 return 1;
45 }
46 else if (PyInt_Check(arg) || PyLong_Check(arg)) {
47 face_value = PyLong_AsLong(arg);
Christian Heimes44720832008-05-26 13:01:01 +000048 }
49 else {
Georg Brandl3e483f62008-07-16 22:57:41 +000050 PyObject *index = PyNumber_Index(arg);
51 if (index == NULL) {
52 PyErr_Format(PyExc_TypeError,
53 "an integer or string of size 1 is required");
54 return 0;
55 }
56 face_value = PyLong_AsLong(index);
57 Py_DECREF(index);
58 }
Georg Brandl3e483f62008-07-16 22:57:41 +000059
60 if (face_value < 0 || face_value >= 256) {
Georg Brandl3238a3e2008-07-16 23:17:46 +000061 /* this includes the OverflowError in case the long is too large */
Georg Brandl3e483f62008-07-16 22:57:41 +000062 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
Christian Heimes44720832008-05-26 13:01:01 +000063 return 0;
64 }
65
66 *value = face_value;
67 return 1;
68}
69
70static Py_ssize_t
71bytes_buffer_getreadbuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
72{
73 if ( index != 0 ) {
74 PyErr_SetString(PyExc_SystemError,
75 "accessing non-existent bytes segment");
76 return -1;
77 }
Antoine Pitroubb667d42010-01-17 12:31:10 +000078 *ptr = (void *)PyByteArray_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +000079 return Py_SIZE(self);
80}
81
82static Py_ssize_t
83bytes_buffer_getwritebuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
84{
85 if ( index != 0 ) {
86 PyErr_SetString(PyExc_SystemError,
87 "accessing non-existent bytes segment");
88 return -1;
89 }
Antoine Pitroubb667d42010-01-17 12:31:10 +000090 *ptr = (void *)PyByteArray_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +000091 return Py_SIZE(self);
92}
93
94static Py_ssize_t
95bytes_buffer_getsegcount(PyByteArrayObject *self, Py_ssize_t *lenp)
96{
97 if ( lenp )
98 *lenp = Py_SIZE(self);
99 return 1;
100}
101
102static Py_ssize_t
103bytes_buffer_getcharbuf(PyByteArrayObject *self, Py_ssize_t index, const char **ptr)
104{
105 if ( index != 0 ) {
106 PyErr_SetString(PyExc_SystemError,
107 "accessing non-existent bytes segment");
108 return -1;
109 }
Antoine Pitroubb667d42010-01-17 12:31:10 +0000110 *ptr = PyByteArray_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +0000111 return Py_SIZE(self);
112}
113
114static int
115bytes_getbuffer(PyByteArrayObject *obj, Py_buffer *view, int flags)
116{
117 int ret;
118 void *ptr;
119 if (view == NULL) {
120 obj->ob_exports++;
121 return 0;
122 }
Antoine Pitroubb667d42010-01-17 12:31:10 +0000123 ptr = (void *) PyByteArray_AS_STRING(obj);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000124 ret = PyBuffer_FillInfo(view, (PyObject*)obj, ptr, Py_SIZE(obj), 0, flags);
Christian Heimes44720832008-05-26 13:01:01 +0000125 if (ret >= 0) {
126 obj->ob_exports++;
127 }
128 return ret;
129}
130
131static void
132bytes_releasebuffer(PyByteArrayObject *obj, Py_buffer *view)
133{
134 obj->ob_exports--;
135}
136
137static Py_ssize_t
138_getbuffer(PyObject *obj, Py_buffer *view)
139{
140 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
141
142 if (buffer == NULL || buffer->bf_getbuffer == NULL)
143 {
144 PyErr_Format(PyExc_TypeError,
145 "Type %.100s doesn't support the buffer API",
146 Py_TYPE(obj)->tp_name);
147 return -1;
148 }
149
150 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
151 return -1;
152 return view->len;
153}
154
Antoine Pitrou599db7f2008-12-07 00:07:51 +0000155static int
156_canresize(PyByteArrayObject *self)
157{
158 if (self->ob_exports > 0) {
159 PyErr_SetString(PyExc_BufferError,
160 "Existing exports of data: object cannot be re-sized");
161 return 0;
162 }
163 return 1;
164}
165
Christian Heimes44720832008-05-26 13:01:01 +0000166/* Direct API functions */
167
168PyObject *
169PyByteArray_FromObject(PyObject *input)
170{
171 return PyObject_CallFunctionObjArgs((PyObject *)&PyByteArray_Type,
172 input, NULL);
173}
174
175PyObject *
176PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
177{
178 PyByteArrayObject *new;
179 Py_ssize_t alloc;
180
181 if (size < 0) {
182 PyErr_SetString(PyExc_SystemError,
183 "Negative size passed to PyByteArray_FromStringAndSize");
184 return NULL;
185 }
186
187 new = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
188 if (new == NULL)
189 return NULL;
190
191 if (size == 0) {
192 new->ob_bytes = NULL;
193 alloc = 0;
194 }
195 else {
196 alloc = size + 1;
197 new->ob_bytes = PyMem_Malloc(alloc);
198 if (new->ob_bytes == NULL) {
199 Py_DECREF(new);
200 return PyErr_NoMemory();
201 }
Antoine Pitroubb667d42010-01-17 12:31:10 +0000202 if (bytes != NULL && size > 0)
Christian Heimes44720832008-05-26 13:01:01 +0000203 memcpy(new->ob_bytes, bytes, size);
204 new->ob_bytes[size] = '\0'; /* Trailing null byte */
205 }
206 Py_SIZE(new) = size;
207 new->ob_alloc = alloc;
208 new->ob_exports = 0;
209
210 return (PyObject *)new;
211}
212
213Py_ssize_t
214PyByteArray_Size(PyObject *self)
215{
216 assert(self != NULL);
217 assert(PyByteArray_Check(self));
218
219 return PyByteArray_GET_SIZE(self);
220}
221
222char *
223PyByteArray_AsString(PyObject *self)
224{
225 assert(self != NULL);
226 assert(PyByteArray_Check(self));
227
228 return PyByteArray_AS_STRING(self);
229}
230
231int
232PyByteArray_Resize(PyObject *self, Py_ssize_t size)
233{
234 void *sval;
235 Py_ssize_t alloc = ((PyByteArrayObject *)self)->ob_alloc;
236
237 assert(self != NULL);
238 assert(PyByteArray_Check(self));
239 assert(size >= 0);
240
Antoine Pitrou599db7f2008-12-07 00:07:51 +0000241 if (size == Py_SIZE(self)) {
242 return 0;
243 }
244 if (!_canresize((PyByteArrayObject *)self)) {
245 return -1;
246 }
247
Christian Heimes44720832008-05-26 13:01:01 +0000248 if (size < alloc / 2) {
249 /* Major downsize; resize down to exact size */
250 alloc = size + 1;
251 }
252 else if (size < alloc) {
253 /* Within allocated size; quick exit */
254 Py_SIZE(self) = size;
255 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
256 return 0;
257 }
258 else if (size <= alloc * 1.125) {
259 /* Moderate upsize; overallocate similar to list_resize() */
260 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
261 }
262 else {
263 /* Major upsize; resize up to exact size */
264 alloc = size + 1;
265 }
266
Christian Heimes44720832008-05-26 13:01:01 +0000267 sval = PyMem_Realloc(((PyByteArrayObject *)self)->ob_bytes, alloc);
268 if (sval == NULL) {
269 PyErr_NoMemory();
270 return -1;
271 }
272
273 ((PyByteArrayObject *)self)->ob_bytes = sval;
274 Py_SIZE(self) = size;
275 ((PyByteArrayObject *)self)->ob_alloc = alloc;
276 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
277
278 return 0;
279}
280
281PyObject *
282PyByteArray_Concat(PyObject *a, PyObject *b)
283{
284 Py_ssize_t size;
285 Py_buffer va, vb;
286 PyByteArrayObject *result = NULL;
287
288 va.len = -1;
289 vb.len = -1;
290 if (_getbuffer(a, &va) < 0 ||
291 _getbuffer(b, &vb) < 0) {
292 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
293 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
294 goto done;
295 }
296
297 size = va.len + vb.len;
298 if (size < 0) {
299 return PyErr_NoMemory();
300 goto done;
301 }
302
303 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, size);
304 if (result != NULL) {
305 memcpy(result->ob_bytes, va.buf, va.len);
306 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
307 }
308
309 done:
310 if (va.len != -1)
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000311 PyBuffer_Release(&va);
Christian Heimes44720832008-05-26 13:01:01 +0000312 if (vb.len != -1)
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000313 PyBuffer_Release(&vb);
Christian Heimes44720832008-05-26 13:01:01 +0000314 return (PyObject *)result;
315}
316
317/* Functions stuffed into the type object */
318
319static Py_ssize_t
320bytes_length(PyByteArrayObject *self)
321{
322 return Py_SIZE(self);
323}
324
325static PyObject *
326bytes_iconcat(PyByteArrayObject *self, PyObject *other)
327{
328 Py_ssize_t mysize;
329 Py_ssize_t size;
330 Py_buffer vo;
331
332 if (_getbuffer(other, &vo) < 0) {
333 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
334 Py_TYPE(other)->tp_name, Py_TYPE(self)->tp_name);
335 return NULL;
336 }
337
338 mysize = Py_SIZE(self);
339 size = mysize + vo.len;
340 if (size < 0) {
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000341 PyBuffer_Release(&vo);
Christian Heimes44720832008-05-26 13:01:01 +0000342 return PyErr_NoMemory();
343 }
344 if (size < self->ob_alloc) {
345 Py_SIZE(self) = size;
346 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
347 }
348 else if (PyByteArray_Resize((PyObject *)self, size) < 0) {
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000349 PyBuffer_Release(&vo);
Christian Heimes44720832008-05-26 13:01:01 +0000350 return NULL;
351 }
352 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000353 PyBuffer_Release(&vo);
Christian Heimes44720832008-05-26 13:01:01 +0000354 Py_INCREF(self);
355 return (PyObject *)self;
356}
357
358static PyObject *
359bytes_repeat(PyByteArrayObject *self, Py_ssize_t count)
360{
361 PyByteArrayObject *result;
362 Py_ssize_t mysize;
363 Py_ssize_t size;
364
365 if (count < 0)
366 count = 0;
367 mysize = Py_SIZE(self);
368 size = mysize * count;
369 if (count != 0 && size / count != mysize)
370 return PyErr_NoMemory();
371 result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size);
372 if (result != NULL && size != 0) {
373 if (mysize == 1)
374 memset(result->ob_bytes, self->ob_bytes[0], size);
375 else {
376 Py_ssize_t i;
377 for (i = 0; i < count; i++)
378 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
379 }
380 }
381 return (PyObject *)result;
382}
383
384static PyObject *
385bytes_irepeat(PyByteArrayObject *self, Py_ssize_t count)
386{
387 Py_ssize_t mysize;
388 Py_ssize_t size;
389
390 if (count < 0)
391 count = 0;
392 mysize = Py_SIZE(self);
393 size = mysize * count;
394 if (count != 0 && size / count != mysize)
395 return PyErr_NoMemory();
396 if (size < self->ob_alloc) {
397 Py_SIZE(self) = size;
398 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
399 }
400 else if (PyByteArray_Resize((PyObject *)self, size) < 0)
401 return NULL;
402
403 if (mysize == 1)
404 memset(self->ob_bytes, self->ob_bytes[0], size);
405 else {
406 Py_ssize_t i;
407 for (i = 1; i < count; i++)
408 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
409 }
410
411 Py_INCREF(self);
412 return (PyObject *)self;
413}
414
415static PyObject *
416bytes_getitem(PyByteArrayObject *self, Py_ssize_t i)
417{
418 if (i < 0)
419 i += Py_SIZE(self);
420 if (i < 0 || i >= Py_SIZE(self)) {
421 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
422 return NULL;
423 }
424 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
425}
426
427static PyObject *
Georg Brandl3e483f62008-07-16 22:57:41 +0000428bytes_subscript(PyByteArrayObject *self, PyObject *index)
Christian Heimes44720832008-05-26 13:01:01 +0000429{
Georg Brandl3e483f62008-07-16 22:57:41 +0000430 if (PyIndex_Check(index)) {
431 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
Christian Heimes44720832008-05-26 13:01:01 +0000432
433 if (i == -1 && PyErr_Occurred())
434 return NULL;
435
436 if (i < 0)
437 i += PyByteArray_GET_SIZE(self);
438
439 if (i < 0 || i >= Py_SIZE(self)) {
440 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
441 return NULL;
442 }
443 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
444 }
Georg Brandl3e483f62008-07-16 22:57:41 +0000445 else if (PySlice_Check(index)) {
Christian Heimes44720832008-05-26 13:01:01 +0000446 Py_ssize_t start, stop, step, slicelength, cur, i;
Georg Brandl3e483f62008-07-16 22:57:41 +0000447 if (PySlice_GetIndicesEx((PySliceObject *)index,
Christian Heimes44720832008-05-26 13:01:01 +0000448 PyByteArray_GET_SIZE(self),
449 &start, &stop, &step, &slicelength) < 0) {
450 return NULL;
451 }
452
453 if (slicelength <= 0)
454 return PyByteArray_FromStringAndSize("", 0);
455 else if (step == 1) {
456 return PyByteArray_FromStringAndSize(self->ob_bytes + start,
457 slicelength);
458 }
459 else {
460 char *source_buf = PyByteArray_AS_STRING(self);
461 char *result_buf = (char *)PyMem_Malloc(slicelength);
462 PyObject *result;
463
464 if (result_buf == NULL)
465 return PyErr_NoMemory();
466
467 for (cur = start, i = 0; i < slicelength;
468 cur += step, i++) {
469 result_buf[i] = source_buf[cur];
470 }
471 result = PyByteArray_FromStringAndSize(result_buf, slicelength);
472 PyMem_Free(result_buf);
473 return result;
474 }
475 }
476 else {
477 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integers");
478 return NULL;
479 }
480}
481
482static int
483bytes_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi,
484 PyObject *values)
485{
486 Py_ssize_t avail, needed;
487 void *bytes;
488 Py_buffer vbytes;
489 int res = 0;
490
491 vbytes.len = -1;
492 if (values == (PyObject *)self) {
493 /* Make a copy and call this function recursively */
494 int err;
495 values = PyByteArray_FromObject(values);
496 if (values == NULL)
497 return -1;
498 err = bytes_setslice(self, lo, hi, values);
499 Py_DECREF(values);
500 return err;
501 }
502 if (values == NULL) {
503 /* del b[lo:hi] */
504 bytes = NULL;
505 needed = 0;
506 }
507 else {
508 if (_getbuffer(values, &vbytes) < 0) {
509 PyErr_Format(PyExc_TypeError,
Neal Norwitzc86b54c2008-07-20 19:35:23 +0000510 "can't set bytearray slice from %.100s",
Christian Heimes44720832008-05-26 13:01:01 +0000511 Py_TYPE(values)->tp_name);
512 return -1;
513 }
514 needed = vbytes.len;
515 bytes = vbytes.buf;
516 }
517
518 if (lo < 0)
519 lo = 0;
520 if (hi < lo)
521 hi = lo;
522 if (hi > Py_SIZE(self))
523 hi = Py_SIZE(self);
524
525 avail = hi - lo;
526 if (avail < 0)
527 lo = hi = avail = 0;
528
529 if (avail != needed) {
530 if (avail > needed) {
Antoine Pitrou599db7f2008-12-07 00:07:51 +0000531 if (!_canresize(self)) {
532 res = -1;
533 goto finish;
534 }
Christian Heimes44720832008-05-26 13:01:01 +0000535 /*
536 0 lo hi old_size
537 | |<----avail----->|<-----tomove------>|
538 | |<-needed->|<-----tomove------>|
539 0 lo new_hi new_size
540 */
541 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
542 Py_SIZE(self) - hi);
543 }
544 /* XXX(nnorwitz): need to verify this can't overflow! */
545 if (PyByteArray_Resize((PyObject *)self,
546 Py_SIZE(self) + needed - avail) < 0) {
547 res = -1;
548 goto finish;
549 }
550 if (avail < needed) {
551 /*
552 0 lo hi old_size
553 | |<-avail->|<-----tomove------>|
554 | |<----needed---->|<-----tomove------>|
555 0 lo new_hi new_size
556 */
557 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
558 Py_SIZE(self) - lo - needed);
559 }
560 }
561
562 if (needed > 0)
563 memcpy(self->ob_bytes + lo, bytes, needed);
564
565
566 finish:
567 if (vbytes.len != -1)
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000568 PyBuffer_Release(&vbytes);
Christian Heimes44720832008-05-26 13:01:01 +0000569 return res;
570}
571
572static int
573bytes_setitem(PyByteArrayObject *self, Py_ssize_t i, PyObject *value)
574{
575 int ival;
576
577 if (i < 0)
578 i += Py_SIZE(self);
579
580 if (i < 0 || i >= Py_SIZE(self)) {
581 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
582 return -1;
583 }
584
585 if (value == NULL)
586 return bytes_setslice(self, i, i+1, NULL);
587
588 if (!_getbytevalue(value, &ival))
589 return -1;
590
591 self->ob_bytes[i] = ival;
592 return 0;
593}
594
595static int
Georg Brandl3e483f62008-07-16 22:57:41 +0000596bytes_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *values)
Christian Heimes44720832008-05-26 13:01:01 +0000597{
598 Py_ssize_t start, stop, step, slicelen, needed;
599 char *bytes;
600
Georg Brandl3e483f62008-07-16 22:57:41 +0000601 if (PyIndex_Check(index)) {
602 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
Christian Heimes44720832008-05-26 13:01:01 +0000603
604 if (i == -1 && PyErr_Occurred())
605 return -1;
606
607 if (i < 0)
608 i += PyByteArray_GET_SIZE(self);
609
610 if (i < 0 || i >= Py_SIZE(self)) {
611 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
612 return -1;
613 }
614
615 if (values == NULL) {
616 /* Fall through to slice assignment */
617 start = i;
618 stop = i + 1;
619 step = 1;
620 slicelen = 1;
621 }
622 else {
Georg Brandl3e483f62008-07-16 22:57:41 +0000623 int ival;
624 if (!_getbytevalue(values, &ival))
Christian Heimes44720832008-05-26 13:01:01 +0000625 return -1;
Christian Heimes44720832008-05-26 13:01:01 +0000626 self->ob_bytes[i] = (char)ival;
627 return 0;
628 }
629 }
Georg Brandl3e483f62008-07-16 22:57:41 +0000630 else if (PySlice_Check(index)) {
631 if (PySlice_GetIndicesEx((PySliceObject *)index,
Christian Heimes44720832008-05-26 13:01:01 +0000632 PyByteArray_GET_SIZE(self),
633 &start, &stop, &step, &slicelen) < 0) {
634 return -1;
635 }
636 }
637 else {
638 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integer");
639 return -1;
640 }
641
642 if (values == NULL) {
643 bytes = NULL;
644 needed = 0;
645 }
646 else if (values == (PyObject *)self || !PyByteArray_Check(values)) {
647 /* Make a copy an call this function recursively */
648 int err;
649 values = PyByteArray_FromObject(values);
650 if (values == NULL)
651 return -1;
Georg Brandl3e483f62008-07-16 22:57:41 +0000652 err = bytes_ass_subscript(self, index, values);
Christian Heimes44720832008-05-26 13:01:01 +0000653 Py_DECREF(values);
654 return err;
655 }
656 else {
657 assert(PyByteArray_Check(values));
658 bytes = ((PyByteArrayObject *)values)->ob_bytes;
659 needed = Py_SIZE(values);
660 }
661 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
662 if ((step < 0 && start < stop) ||
663 (step > 0 && start > stop))
664 stop = start;
665 if (step == 1) {
666 if (slicelen != needed) {
Antoine Pitrou599db7f2008-12-07 00:07:51 +0000667 if (!_canresize(self))
668 return -1;
Christian Heimes44720832008-05-26 13:01:01 +0000669 if (slicelen > needed) {
670 /*
671 0 start stop old_size
672 | |<---slicelen--->|<-----tomove------>|
673 | |<-needed->|<-----tomove------>|
674 0 lo new_hi new_size
675 */
676 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
677 Py_SIZE(self) - stop);
678 }
679 if (PyByteArray_Resize((PyObject *)self,
680 Py_SIZE(self) + needed - slicelen) < 0)
681 return -1;
682 if (slicelen < needed) {
683 /*
684 0 lo hi old_size
685 | |<-avail->|<-----tomove------>|
686 | |<----needed---->|<-----tomove------>|
687 0 lo new_hi new_size
688 */
689 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
690 Py_SIZE(self) - start - needed);
691 }
692 }
693
694 if (needed > 0)
695 memcpy(self->ob_bytes + start, bytes, needed);
696
697 return 0;
698 }
699 else {
700 if (needed == 0) {
701 /* Delete slice */
Mark Dickinson02733542010-01-29 17:16:18 +0000702 size_t cur;
703 Py_ssize_t i;
Christian Heimes44720832008-05-26 13:01:01 +0000704
Antoine Pitrou599db7f2008-12-07 00:07:51 +0000705 if (!_canresize(self))
706 return -1;
Christian Heimes44720832008-05-26 13:01:01 +0000707 if (step < 0) {
708 stop = start + 1;
709 start = stop + step * (slicelen - 1) - 1;
710 step = -step;
711 }
712 for (cur = start, i = 0;
713 i < slicelen; cur += step, i++) {
714 Py_ssize_t lim = step - 1;
715
Mark Dickinsona9209612010-02-14 13:08:35 +0000716 if (cur + step >= (size_t)PyByteArray_GET_SIZE(self))
Christian Heimes44720832008-05-26 13:01:01 +0000717 lim = PyByteArray_GET_SIZE(self) - cur - 1;
718
719 memmove(self->ob_bytes + cur - i,
720 self->ob_bytes + cur + 1, lim);
721 }
722 /* Move the tail of the bytes, in one chunk */
723 cur = start + slicelen*step;
Mark Dickinsona9209612010-02-14 13:08:35 +0000724 if (cur < (size_t)PyByteArray_GET_SIZE(self)) {
Christian Heimes44720832008-05-26 13:01:01 +0000725 memmove(self->ob_bytes + cur - slicelen,
726 self->ob_bytes + cur,
727 PyByteArray_GET_SIZE(self) - cur);
728 }
729 if (PyByteArray_Resize((PyObject *)self,
730 PyByteArray_GET_SIZE(self) - slicelen) < 0)
731 return -1;
732
733 return 0;
734 }
735 else {
736 /* Assign slice */
737 Py_ssize_t cur, i;
738
739 if (needed != slicelen) {
740 PyErr_Format(PyExc_ValueError,
741 "attempt to assign bytes of size %zd "
742 "to extended slice of size %zd",
743 needed, slicelen);
744 return -1;
745 }
746 for (cur = start, i = 0; i < slicelen; cur += step, i++)
747 self->ob_bytes[cur] = bytes[i];
748 return 0;
749 }
750 }
751}
752
753static int
754bytes_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
755{
756 static char *kwlist[] = {"source", "encoding", "errors", 0};
757 PyObject *arg = NULL;
758 const char *encoding = NULL;
759 const char *errors = NULL;
760 Py_ssize_t count;
761 PyObject *it;
762 PyObject *(*iternext)(PyObject *);
763
764 if (Py_SIZE(self) != 0) {
765 /* Empty previous contents (yes, do this first of all!) */
766 if (PyByteArray_Resize((PyObject *)self, 0) < 0)
767 return -1;
768 }
769
770 /* Parse arguments */
Neal Norwitzc86b54c2008-07-20 19:35:23 +0000771 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytearray", kwlist,
Christian Heimes44720832008-05-26 13:01:01 +0000772 &arg, &encoding, &errors))
773 return -1;
774
775 /* Make a quick exit if no first argument */
776 if (arg == NULL) {
777 if (encoding != NULL || errors != NULL) {
778 PyErr_SetString(PyExc_TypeError,
779 "encoding or errors without sequence argument");
780 return -1;
781 }
782 return 0;
783 }
784
785 if (PyBytes_Check(arg)) {
786 PyObject *new, *encoded;
787 if (encoding != NULL) {
788 encoded = PyCodec_Encode(arg, encoding, errors);
789 if (encoded == NULL)
790 return -1;
791 assert(PyBytes_Check(encoded));
792 }
793 else {
794 encoded = arg;
795 Py_INCREF(arg);
796 }
797 new = bytes_iconcat(self, arg);
798 Py_DECREF(encoded);
799 if (new == NULL)
800 return -1;
801 Py_DECREF(new);
802 return 0;
803 }
804
805 if (PyUnicode_Check(arg)) {
806 /* Encode via the codec registry */
807 PyObject *encoded, *new;
808 if (encoding == NULL) {
809 PyErr_SetString(PyExc_TypeError,
810 "unicode argument without an encoding");
811 return -1;
812 }
813 encoded = PyCodec_Encode(arg, encoding, errors);
814 if (encoded == NULL)
815 return -1;
816 assert(PyBytes_Check(encoded));
817 new = bytes_iconcat(self, encoded);
818 Py_DECREF(encoded);
819 if (new == NULL)
820 return -1;
821 Py_DECREF(new);
822 return 0;
823 }
824
825 /* If it's not unicode, there can't be encoding or errors */
826 if (encoding != NULL || errors != NULL) {
827 PyErr_SetString(PyExc_TypeError,
828 "encoding or errors without a string argument");
829 return -1;
830 }
831
832 /* Is it an int? */
Benjamin Petersonc218ce82010-04-16 22:43:53 +0000833 count = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
834 if (count == -1 && PyErr_Occurred()) {
835 if (PyErr_ExceptionMatches(PyExc_OverflowError))
Christian Heimes44720832008-05-26 13:01:01 +0000836 return -1;
Benjamin Petersonc218ce82010-04-16 22:43:53 +0000837 else
838 PyErr_Clear();
839 }
840 else if (count < 0) {
841 PyErr_SetString(PyExc_ValueError, "negative count");
842 return -1;
843 }
844 else {
Christian Heimes44720832008-05-26 13:01:01 +0000845 if (count > 0) {
846 if (PyByteArray_Resize((PyObject *)self, count))
847 return -1;
848 memset(self->ob_bytes, 0, count);
849 }
850 return 0;
851 }
852
853 /* Use the buffer API */
854 if (PyObject_CheckBuffer(arg)) {
855 Py_ssize_t size;
856 Py_buffer view;
857 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
858 return -1;
859 size = view.len;
860 if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail;
861 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
862 goto fail;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000863 PyBuffer_Release(&view);
Christian Heimes44720832008-05-26 13:01:01 +0000864 return 0;
865 fail:
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000866 PyBuffer_Release(&view);
Christian Heimes44720832008-05-26 13:01:01 +0000867 return -1;
868 }
869
870 /* XXX Optimize this if the arguments is a list, tuple */
871
872 /* Get the iterator */
873 it = PyObject_GetIter(arg);
874 if (it == NULL)
875 return -1;
876 iternext = *Py_TYPE(it)->tp_iternext;
877
878 /* Run the iterator to exhaustion */
879 for (;;) {
880 PyObject *item;
Georg Brandl3e758462008-07-16 23:10:05 +0000881 int rc, value;
Christian Heimes44720832008-05-26 13:01:01 +0000882
883 /* Get the next item */
884 item = iternext(it);
885 if (item == NULL) {
886 if (PyErr_Occurred()) {
887 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
888 goto error;
889 PyErr_Clear();
890 }
891 break;
892 }
893
894 /* Interpret it as an int (__index__) */
Georg Brandl3e758462008-07-16 23:10:05 +0000895 rc = _getbytevalue(item, &value);
Christian Heimes44720832008-05-26 13:01:01 +0000896 Py_DECREF(item);
Georg Brandl3e758462008-07-16 23:10:05 +0000897 if (!rc)
Christian Heimes44720832008-05-26 13:01:01 +0000898 goto error;
899
Christian Heimes44720832008-05-26 13:01:01 +0000900 /* Append the byte */
901 if (Py_SIZE(self) < self->ob_alloc)
902 Py_SIZE(self)++;
903 else if (PyByteArray_Resize((PyObject *)self, Py_SIZE(self)+1) < 0)
904 goto error;
905 self->ob_bytes[Py_SIZE(self)-1] = value;
906 }
907
908 /* Clean up and return success */
909 Py_DECREF(it);
910 return 0;
911
912 error:
913 /* Error handling when it != NULL */
914 Py_DECREF(it);
915 return -1;
916}
917
918/* Mostly copied from string_repr, but without the
919 "smart quote" functionality. */
920static PyObject *
921bytes_repr(PyByteArrayObject *self)
922{
923 static const char *hexdigits = "0123456789abcdef";
924 const char *quote_prefix = "bytearray(b";
925 const char *quote_postfix = ")";
926 Py_ssize_t length = Py_SIZE(self);
927 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
Mark Dickinsona9209612010-02-14 13:08:35 +0000928 size_t newsize;
Christian Heimes44720832008-05-26 13:01:01 +0000929 PyObject *v;
Mark Dickinsona9209612010-02-14 13:08:35 +0000930 if (length > (PY_SSIZE_T_MAX - 14) / 4) {
Christian Heimes44720832008-05-26 13:01:01 +0000931 PyErr_SetString(PyExc_OverflowError,
932 "bytearray object is too large to make repr");
933 return NULL;
934 }
Mark Dickinsona9209612010-02-14 13:08:35 +0000935 newsize = 14 + 4 * length;
Christian Heimes44720832008-05-26 13:01:01 +0000936 v = PyUnicode_FromUnicode(NULL, newsize);
937 if (v == NULL) {
938 return NULL;
939 }
940 else {
941 register Py_ssize_t i;
942 register Py_UNICODE c;
943 register Py_UNICODE *p;
944 int quote;
945
946 /* Figure out which quote to use; single is preferred */
947 quote = '\'';
948 {
949 char *test, *start;
950 start = PyByteArray_AS_STRING(self);
951 for (test = start; test < start+length; ++test) {
952 if (*test == '"') {
953 quote = '\''; /* back to single */
954 goto decided;
955 }
956 else if (*test == '\'')
957 quote = '"';
958 }
959 decided:
960 ;
961 }
962
963 p = PyUnicode_AS_UNICODE(v);
964 while (*quote_prefix)
965 *p++ = *quote_prefix++;
966 *p++ = quote;
967
968 for (i = 0; i < length; i++) {
969 /* There's at least enough room for a hex escape
970 and a closing quote. */
971 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
972 c = self->ob_bytes[i];
973 if (c == '\'' || c == '\\')
974 *p++ = '\\', *p++ = c;
975 else if (c == '\t')
976 *p++ = '\\', *p++ = 't';
977 else if (c == '\n')
978 *p++ = '\\', *p++ = 'n';
979 else if (c == '\r')
980 *p++ = '\\', *p++ = 'r';
981 else if (c == 0)
982 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
983 else if (c < ' ' || c >= 0x7f) {
984 *p++ = '\\';
985 *p++ = 'x';
986 *p++ = hexdigits[(c & 0xf0) >> 4];
987 *p++ = hexdigits[c & 0xf];
988 }
989 else
990 *p++ = c;
991 }
992 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
993 *p++ = quote;
994 while (*quote_postfix) {
995 *p++ = *quote_postfix++;
996 }
997 *p = '\0';
998 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
999 Py_DECREF(v);
1000 return NULL;
1001 }
1002 return v;
1003 }
1004}
1005
1006static PyObject *
1007bytes_str(PyObject *op)
1008{
1009#if 0
1010 if (Py_BytesWarningFlag) {
1011 if (PyErr_WarnEx(PyExc_BytesWarning,
1012 "str() on a bytearray instance", 1))
1013 return NULL;
1014 }
1015 return bytes_repr((PyByteArrayObject*)op);
1016#endif
1017 return PyBytes_FromStringAndSize(((PyByteArrayObject*)op)->ob_bytes, Py_SIZE(op));
1018}
1019
1020static PyObject *
1021bytes_richcompare(PyObject *self, PyObject *other, int op)
1022{
1023 Py_ssize_t self_size, other_size;
1024 Py_buffer self_bytes, other_bytes;
1025 PyObject *res;
1026 Py_ssize_t minsize;
1027 int cmp;
1028
1029 /* Bytes can be compared to anything that supports the (binary)
1030 buffer API. Except that a comparison with Unicode is always an
1031 error, even if the comparison is for equality. */
1032 if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
1033 PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
1034 if (Py_BytesWarningFlag && op == Py_EQ) {
1035 if (PyErr_WarnEx(PyExc_BytesWarning,
Ezio Melotti262c3ce2010-01-14 11:39:50 +00001036 "Comparison between bytearray and string", 1))
Christian Heimes44720832008-05-26 13:01:01 +00001037 return NULL;
1038 }
1039
1040 Py_INCREF(Py_NotImplemented);
1041 return Py_NotImplemented;
1042 }
1043
1044 self_size = _getbuffer(self, &self_bytes);
1045 if (self_size < 0) {
1046 PyErr_Clear();
1047 Py_INCREF(Py_NotImplemented);
1048 return Py_NotImplemented;
1049 }
1050
1051 other_size = _getbuffer(other, &other_bytes);
1052 if (other_size < 0) {
1053 PyErr_Clear();
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001054 PyBuffer_Release(&self_bytes);
Christian Heimes44720832008-05-26 13:01:01 +00001055 Py_INCREF(Py_NotImplemented);
1056 return Py_NotImplemented;
1057 }
1058
1059 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
1060 /* Shortcut: if the lengths differ, the objects differ */
1061 cmp = (op == Py_NE);
1062 }
1063 else {
1064 minsize = self_size;
1065 if (other_size < minsize)
1066 minsize = other_size;
1067
1068 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
1069 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1070
1071 if (cmp == 0) {
1072 if (self_size < other_size)
1073 cmp = -1;
1074 else if (self_size > other_size)
1075 cmp = 1;
1076 }
1077
1078 switch (op) {
1079 case Py_LT: cmp = cmp < 0; break;
1080 case Py_LE: cmp = cmp <= 0; break;
1081 case Py_EQ: cmp = cmp == 0; break;
1082 case Py_NE: cmp = cmp != 0; break;
1083 case Py_GT: cmp = cmp > 0; break;
1084 case Py_GE: cmp = cmp >= 0; break;
1085 }
1086 }
1087
1088 res = cmp ? Py_True : Py_False;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001089 PyBuffer_Release(&self_bytes);
1090 PyBuffer_Release(&other_bytes);
Christian Heimes44720832008-05-26 13:01:01 +00001091 Py_INCREF(res);
1092 return res;
1093}
1094
1095static void
1096bytes_dealloc(PyByteArrayObject *self)
1097{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001098 if (self->ob_exports > 0) {
1099 PyErr_SetString(PyExc_SystemError,
Georg Brandle9b91212009-04-05 21:26:31 +00001100 "deallocated bytearray object has exported buffers");
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001101 PyErr_Print();
1102 }
Christian Heimes44720832008-05-26 13:01:01 +00001103 if (self->ob_bytes != 0) {
1104 PyMem_Free(self->ob_bytes);
1105 }
1106 Py_TYPE(self)->tp_free((PyObject *)self);
1107}
1108
1109
1110/* -------------------------------------------------------------------- */
1111/* Methods */
1112
1113#define STRINGLIB_CHAR char
1114#define STRINGLIB_CMP memcmp
1115#define STRINGLIB_LEN PyByteArray_GET_SIZE
1116#define STRINGLIB_STR PyByteArray_AS_STRING
1117#define STRINGLIB_NEW PyByteArray_FromStringAndSize
1118#define STRINGLIB_EMPTY nullbytes
1119#define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
1120#define STRINGLIB_MUTABLE 1
Christian Heimes7d4c3172008-08-22 19:47:25 +00001121#define FROM_BYTEARRAY 1
Christian Heimes44720832008-05-26 13:01:01 +00001122
1123#include "stringlib/fastsearch.h"
1124#include "stringlib/count.h"
1125#include "stringlib/find.h"
1126#include "stringlib/partition.h"
1127#include "stringlib/ctype.h"
1128#include "stringlib/transmogrify.h"
1129
1130
1131/* The following Py_LOCAL_INLINE and Py_LOCAL functions
1132were copied from the old char* style string object. */
1133
1134Py_LOCAL_INLINE(void)
1135_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1136{
1137 if (*end > len)
1138 *end = len;
1139 else if (*end < 0)
1140 *end += len;
1141 if (*end < 0)
1142 *end = 0;
1143 if (*start < 0)
1144 *start += len;
1145 if (*start < 0)
1146 *start = 0;
1147}
1148
1149
1150Py_LOCAL_INLINE(Py_ssize_t)
1151bytes_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
1152{
1153 PyObject *subobj;
1154 Py_buffer subbuf;
1155 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1156 Py_ssize_t res;
1157
1158 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1159 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1160 return -2;
1161 if (_getbuffer(subobj, &subbuf) < 0)
1162 return -2;
1163 if (dir > 0)
1164 res = stringlib_find_slice(
1165 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1166 subbuf.buf, subbuf.len, start, end);
1167 else
1168 res = stringlib_rfind_slice(
1169 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1170 subbuf.buf, subbuf.len, start, end);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001171 PyBuffer_Release(&subbuf);
Christian Heimes44720832008-05-26 13:01:01 +00001172 return res;
1173}
1174
1175PyDoc_STRVAR(find__doc__,
1176"B.find(sub [,start [,end]]) -> int\n\
1177\n\
1178Return the lowest index in B where subsection sub is found,\n\
1179such that sub is contained within s[start,end]. Optional\n\
1180arguments start and end are interpreted as in slice notation.\n\
1181\n\
1182Return -1 on failure.");
1183
1184static PyObject *
1185bytes_find(PyByteArrayObject *self, PyObject *args)
1186{
1187 Py_ssize_t result = bytes_find_internal(self, args, +1);
1188 if (result == -2)
1189 return NULL;
1190 return PyInt_FromSsize_t(result);
1191}
1192
1193PyDoc_STRVAR(count__doc__,
1194"B.count(sub [,start [,end]]) -> int\n\
1195\n\
1196Return the number of non-overlapping occurrences of subsection sub in\n\
1197bytes B[start:end]. Optional arguments start and end are interpreted\n\
1198as in slice notation.");
1199
1200static PyObject *
1201bytes_count(PyByteArrayObject *self, PyObject *args)
1202{
1203 PyObject *sub_obj;
1204 const char *str = PyByteArray_AS_STRING(self);
1205 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1206 Py_buffer vsub;
1207 PyObject *count_obj;
1208
1209 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1210 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1211 return NULL;
1212
1213 if (_getbuffer(sub_obj, &vsub) < 0)
1214 return NULL;
1215
1216 _adjust_indices(&start, &end, PyByteArray_GET_SIZE(self));
1217
1218 count_obj = PyInt_FromSsize_t(
1219 stringlib_count(str + start, end - start, vsub.buf, vsub.len)
1220 );
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001221 PyBuffer_Release(&vsub);
Christian Heimes44720832008-05-26 13:01:01 +00001222 return count_obj;
1223}
1224
1225
1226PyDoc_STRVAR(index__doc__,
1227"B.index(sub [,start [,end]]) -> int\n\
1228\n\
1229Like B.find() but raise ValueError when the subsection is not found.");
1230
1231static PyObject *
1232bytes_index(PyByteArrayObject *self, PyObject *args)
1233{
1234 Py_ssize_t result = bytes_find_internal(self, args, +1);
1235 if (result == -2)
1236 return NULL;
1237 if (result == -1) {
1238 PyErr_SetString(PyExc_ValueError,
1239 "subsection not found");
1240 return NULL;
1241 }
1242 return PyInt_FromSsize_t(result);
1243}
1244
1245
1246PyDoc_STRVAR(rfind__doc__,
1247"B.rfind(sub [,start [,end]]) -> int\n\
1248\n\
1249Return the highest index in B where subsection sub is found,\n\
1250such that sub is contained within s[start,end]. Optional\n\
1251arguments start and end are interpreted as in slice notation.\n\
1252\n\
1253Return -1 on failure.");
1254
1255static PyObject *
1256bytes_rfind(PyByteArrayObject *self, PyObject *args)
1257{
1258 Py_ssize_t result = bytes_find_internal(self, args, -1);
1259 if (result == -2)
1260 return NULL;
1261 return PyInt_FromSsize_t(result);
1262}
1263
1264
1265PyDoc_STRVAR(rindex__doc__,
1266"B.rindex(sub [,start [,end]]) -> int\n\
1267\n\
1268Like B.rfind() but raise ValueError when the subsection is not found.");
1269
1270static PyObject *
1271bytes_rindex(PyByteArrayObject *self, PyObject *args)
1272{
1273 Py_ssize_t result = bytes_find_internal(self, args, -1);
1274 if (result == -2)
1275 return NULL;
1276 if (result == -1) {
1277 PyErr_SetString(PyExc_ValueError,
1278 "subsection not found");
1279 return NULL;
1280 }
1281 return PyInt_FromSsize_t(result);
1282}
1283
1284
1285static int
1286bytes_contains(PyObject *self, PyObject *arg)
1287{
1288 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1289 if (ival == -1 && PyErr_Occurred()) {
1290 Py_buffer varg;
1291 int pos;
1292 PyErr_Clear();
1293 if (_getbuffer(arg, &varg) < 0)
1294 return -1;
1295 pos = stringlib_find(PyByteArray_AS_STRING(self), Py_SIZE(self),
1296 varg.buf, varg.len, 0);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001297 PyBuffer_Release(&varg);
Christian Heimes44720832008-05-26 13:01:01 +00001298 return pos >= 0;
1299 }
1300 if (ival < 0 || ival >= 256) {
1301 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1302 return -1;
1303 }
1304
1305 return memchr(PyByteArray_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
1306}
1307
1308
1309/* Matches the end (direction >= 0) or start (direction < 0) of self
1310 * against substr, using the start and end arguments. Returns
1311 * -1 on error, 0 if not found and 1 if found.
1312 */
1313Py_LOCAL(int)
1314_bytes_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start,
1315 Py_ssize_t end, int direction)
1316{
1317 Py_ssize_t len = PyByteArray_GET_SIZE(self);
1318 const char* str;
1319 Py_buffer vsubstr;
1320 int rv = 0;
1321
1322 str = PyByteArray_AS_STRING(self);
1323
1324 if (_getbuffer(substr, &vsubstr) < 0)
1325 return -1;
1326
1327 _adjust_indices(&start, &end, len);
1328
1329 if (direction < 0) {
1330 /* startswith */
1331 if (start+vsubstr.len > len) {
1332 goto done;
1333 }
1334 } else {
1335 /* endswith */
1336 if (end-start < vsubstr.len || start > len) {
1337 goto done;
1338 }
1339
1340 if (end-vsubstr.len > start)
1341 start = end - vsubstr.len;
1342 }
1343 if (end-start >= vsubstr.len)
1344 rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1345
1346done:
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001347 PyBuffer_Release(&vsubstr);
Christian Heimes44720832008-05-26 13:01:01 +00001348 return rv;
1349}
1350
1351
1352PyDoc_STRVAR(startswith__doc__,
1353"B.startswith(prefix [,start [,end]]) -> bool\n\
1354\n\
1355Return True if B starts with the specified prefix, False otherwise.\n\
1356With optional start, test B beginning at that position.\n\
1357With optional end, stop comparing B at that position.\n\
1358prefix can also be a tuple of strings to try.");
1359
1360static PyObject *
1361bytes_startswith(PyByteArrayObject *self, PyObject *args)
1362{
1363 Py_ssize_t start = 0;
1364 Py_ssize_t end = PY_SSIZE_T_MAX;
1365 PyObject *subobj;
1366 int result;
1367
1368 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1369 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1370 return NULL;
1371 if (PyTuple_Check(subobj)) {
1372 Py_ssize_t i;
1373 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1374 result = _bytes_tailmatch(self,
1375 PyTuple_GET_ITEM(subobj, i),
1376 start, end, -1);
1377 if (result == -1)
1378 return NULL;
1379 else if (result) {
1380 Py_RETURN_TRUE;
1381 }
1382 }
1383 Py_RETURN_FALSE;
1384 }
1385 result = _bytes_tailmatch(self, subobj, start, end, -1);
1386 if (result == -1)
1387 return NULL;
1388 else
1389 return PyBool_FromLong(result);
1390}
1391
1392PyDoc_STRVAR(endswith__doc__,
1393"B.endswith(suffix [,start [,end]]) -> bool\n\
1394\n\
1395Return True if B ends with the specified suffix, False otherwise.\n\
1396With optional start, test B beginning at that position.\n\
1397With optional end, stop comparing B at that position.\n\
1398suffix can also be a tuple of strings to try.");
1399
1400static PyObject *
1401bytes_endswith(PyByteArrayObject *self, PyObject *args)
1402{
1403 Py_ssize_t start = 0;
1404 Py_ssize_t end = PY_SSIZE_T_MAX;
1405 PyObject *subobj;
1406 int result;
1407
1408 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1409 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1410 return NULL;
1411 if (PyTuple_Check(subobj)) {
1412 Py_ssize_t i;
1413 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1414 result = _bytes_tailmatch(self,
1415 PyTuple_GET_ITEM(subobj, i),
1416 start, end, +1);
1417 if (result == -1)
1418 return NULL;
1419 else if (result) {
1420 Py_RETURN_TRUE;
1421 }
1422 }
1423 Py_RETURN_FALSE;
1424 }
1425 result = _bytes_tailmatch(self, subobj, start, end, +1);
1426 if (result == -1)
1427 return NULL;
1428 else
1429 return PyBool_FromLong(result);
1430}
1431
1432
1433PyDoc_STRVAR(translate__doc__,
1434"B.translate(table[, deletechars]) -> bytearray\n\
1435\n\
1436Return a copy of B, where all characters occurring in the\n\
1437optional argument deletechars are removed, and the remaining\n\
1438characters have been mapped through the given translation\n\
1439table, which must be a bytes object of length 256.");
1440
1441static PyObject *
1442bytes_translate(PyByteArrayObject *self, PyObject *args)
1443{
1444 register char *input, *output;
1445 register const char *table;
Benjamin Peterson866eba92008-11-19 22:05:53 +00001446 register Py_ssize_t i, c;
Christian Heimes44720832008-05-26 13:01:01 +00001447 PyObject *input_obj = (PyObject*)self;
1448 const char *output_start;
1449 Py_ssize_t inlen;
1450 PyObject *result;
1451 int trans_table[256];
1452 PyObject *tableobj, *delobj = NULL;
1453 Py_buffer vtable, vdel;
1454
1455 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1456 &tableobj, &delobj))
1457 return NULL;
1458
1459 if (_getbuffer(tableobj, &vtable) < 0)
1460 return NULL;
1461
1462 if (vtable.len != 256) {
1463 PyErr_SetString(PyExc_ValueError,
1464 "translation table must be 256 characters long");
Georg Brandl11a81b22009-07-22 12:03:09 +00001465 PyBuffer_Release(&vtable);
1466 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001467 }
1468
1469 if (delobj != NULL) {
1470 if (_getbuffer(delobj, &vdel) < 0) {
Georg Brandl11a81b22009-07-22 12:03:09 +00001471 PyBuffer_Release(&vtable);
1472 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001473 }
1474 }
1475 else {
1476 vdel.buf = NULL;
1477 vdel.len = 0;
1478 }
1479
1480 table = (const char *)vtable.buf;
1481 inlen = PyByteArray_GET_SIZE(input_obj);
1482 result = PyByteArray_FromStringAndSize((char *)NULL, inlen);
1483 if (result == NULL)
1484 goto done;
1485 output_start = output = PyByteArray_AsString(result);
1486 input = PyByteArray_AS_STRING(input_obj);
1487
1488 if (vdel.len == 0) {
1489 /* If no deletions are required, use faster code */
1490 for (i = inlen; --i >= 0; ) {
1491 c = Py_CHARMASK(*input++);
Benjamin Peterson866eba92008-11-19 22:05:53 +00001492 *output++ = table[c];
Christian Heimes44720832008-05-26 13:01:01 +00001493 }
Christian Heimes44720832008-05-26 13:01:01 +00001494 goto done;
1495 }
Antoine Pitrou599db7f2008-12-07 00:07:51 +00001496
Christian Heimes44720832008-05-26 13:01:01 +00001497 for (i = 0; i < 256; i++)
1498 trans_table[i] = Py_CHARMASK(table[i]);
1499
1500 for (i = 0; i < vdel.len; i++)
1501 trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
1502
1503 for (i = inlen; --i >= 0; ) {
1504 c = Py_CHARMASK(*input++);
1505 if (trans_table[c] != -1)
1506 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1507 continue;
Christian Heimes44720832008-05-26 13:01:01 +00001508 }
1509 /* Fix the size of the resulting string */
1510 if (inlen > 0)
1511 PyByteArray_Resize(result, output - output_start);
1512
1513done:
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001514 PyBuffer_Release(&vtable);
Christian Heimes44720832008-05-26 13:01:01 +00001515 if (delobj != NULL)
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001516 PyBuffer_Release(&vdel);
Christian Heimes44720832008-05-26 13:01:01 +00001517 return result;
1518}
1519
1520
1521#define FORWARD 1
1522#define REVERSE -1
1523
1524/* find and count characters and substrings */
1525
1526#define findchar(target, target_len, c) \
1527 ((char *)memchr((const void *)(target), c, target_len))
1528
1529/* Don't call if length < 2 */
1530#define Py_STRING_MATCH(target, offset, pattern, length) \
1531 (target[offset] == pattern[0] && \
1532 target[offset+length-1] == pattern[length-1] && \
1533 !memcmp(target+offset+1, pattern+1, length-2) )
1534
1535
Benjamin Peterson866eba92008-11-19 22:05:53 +00001536/* Bytes ops must return a string, create a copy */
Christian Heimes44720832008-05-26 13:01:01 +00001537Py_LOCAL(PyByteArrayObject *)
1538return_self(PyByteArrayObject *self)
1539{
Christian Heimes44720832008-05-26 13:01:01 +00001540 return (PyByteArrayObject *)PyByteArray_FromStringAndSize(
1541 PyByteArray_AS_STRING(self),
1542 PyByteArray_GET_SIZE(self));
1543}
1544
1545Py_LOCAL_INLINE(Py_ssize_t)
1546countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
1547{
1548 Py_ssize_t count=0;
1549 const char *start=target;
1550 const char *end=target+target_len;
1551
1552 while ( (start=findchar(start, end-start, c)) != NULL ) {
1553 count++;
1554 if (count >= maxcount)
1555 break;
1556 start += 1;
1557 }
1558 return count;
1559}
1560
1561Py_LOCAL(Py_ssize_t)
1562findstring(const char *target, Py_ssize_t target_len,
1563 const char *pattern, Py_ssize_t pattern_len,
1564 Py_ssize_t start,
1565 Py_ssize_t end,
1566 int direction)
1567{
1568 if (start < 0) {
1569 start += target_len;
1570 if (start < 0)
1571 start = 0;
1572 }
1573 if (end > target_len) {
1574 end = target_len;
1575 } else if (end < 0) {
1576 end += target_len;
1577 if (end < 0)
1578 end = 0;
1579 }
1580
1581 /* zero-length substrings always match at the first attempt */
1582 if (pattern_len == 0)
1583 return (direction > 0) ? start : end;
1584
1585 end -= pattern_len;
1586
1587 if (direction < 0) {
1588 for (; end >= start; end--)
1589 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1590 return end;
1591 } else {
1592 for (; start <= end; start++)
1593 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1594 return start;
1595 }
1596 return -1;
1597}
1598
1599Py_LOCAL_INLINE(Py_ssize_t)
1600countstring(const char *target, Py_ssize_t target_len,
1601 const char *pattern, Py_ssize_t pattern_len,
1602 Py_ssize_t start,
1603 Py_ssize_t end,
1604 int direction, Py_ssize_t maxcount)
1605{
1606 Py_ssize_t count=0;
1607
1608 if (start < 0) {
1609 start += target_len;
1610 if (start < 0)
1611 start = 0;
1612 }
1613 if (end > target_len) {
1614 end = target_len;
1615 } else if (end < 0) {
1616 end += target_len;
1617 if (end < 0)
1618 end = 0;
1619 }
1620
1621 /* zero-length substrings match everywhere */
1622 if (pattern_len == 0 || maxcount == 0) {
1623 if (target_len+1 < maxcount)
1624 return target_len+1;
1625 return maxcount;
1626 }
1627
1628 end -= pattern_len;
1629 if (direction < 0) {
1630 for (; (end >= start); end--)
1631 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1632 count++;
1633 if (--maxcount <= 0) break;
1634 end -= pattern_len-1;
1635 }
1636 } else {
1637 for (; (start <= end); start++)
1638 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1639 count++;
1640 if (--maxcount <= 0)
1641 break;
1642 start += pattern_len-1;
1643 }
1644 }
1645 return count;
1646}
1647
1648
1649/* Algorithms for different cases of string replacement */
1650
1651/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1652Py_LOCAL(PyByteArrayObject *)
1653replace_interleave(PyByteArrayObject *self,
1654 const char *to_s, Py_ssize_t to_len,
1655 Py_ssize_t maxcount)
1656{
1657 char *self_s, *result_s;
1658 Py_ssize_t self_len, result_len;
1659 Py_ssize_t count, i, product;
1660 PyByteArrayObject *result;
1661
1662 self_len = PyByteArray_GET_SIZE(self);
1663
1664 /* 1 at the end plus 1 after every character */
1665 count = self_len+1;
1666 if (maxcount < count)
1667 count = maxcount;
1668
1669 /* Check for overflow */
1670 /* result_len = count * to_len + self_len; */
1671 product = count * to_len;
1672 if (product / to_len != count) {
1673 PyErr_SetString(PyExc_OverflowError,
1674 "replace string is too long");
1675 return NULL;
1676 }
1677 result_len = product + self_len;
1678 if (result_len < 0) {
1679 PyErr_SetString(PyExc_OverflowError,
1680 "replace string is too long");
1681 return NULL;
1682 }
1683
1684 if (! (result = (PyByteArrayObject *)
1685 PyByteArray_FromStringAndSize(NULL, result_len)) )
1686 return NULL;
1687
1688 self_s = PyByteArray_AS_STRING(self);
1689 result_s = PyByteArray_AS_STRING(result);
1690
1691 /* TODO: special case single character, which doesn't need memcpy */
1692
1693 /* Lay the first one down (guaranteed this will occur) */
1694 Py_MEMCPY(result_s, to_s, to_len);
1695 result_s += to_len;
1696 count -= 1;
1697
1698 for (i=0; i<count; i++) {
1699 *result_s++ = *self_s++;
1700 Py_MEMCPY(result_s, to_s, to_len);
1701 result_s += to_len;
1702 }
1703
1704 /* Copy the rest of the original string */
1705 Py_MEMCPY(result_s, self_s, self_len-i);
1706
1707 return result;
1708}
1709
1710/* Special case for deleting a single character */
1711/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1712Py_LOCAL(PyByteArrayObject *)
1713replace_delete_single_character(PyByteArrayObject *self,
1714 char from_c, Py_ssize_t maxcount)
1715{
1716 char *self_s, *result_s;
1717 char *start, *next, *end;
1718 Py_ssize_t self_len, result_len;
1719 Py_ssize_t count;
1720 PyByteArrayObject *result;
1721
1722 self_len = PyByteArray_GET_SIZE(self);
1723 self_s = PyByteArray_AS_STRING(self);
1724
1725 count = countchar(self_s, self_len, from_c, maxcount);
1726 if (count == 0) {
1727 return return_self(self);
1728 }
1729
1730 result_len = self_len - count; /* from_len == 1 */
1731 assert(result_len>=0);
1732
1733 if ( (result = (PyByteArrayObject *)
1734 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1735 return NULL;
1736 result_s = PyByteArray_AS_STRING(result);
1737
1738 start = self_s;
1739 end = self_s + self_len;
1740 while (count-- > 0) {
1741 next = findchar(start, end-start, from_c);
1742 if (next == NULL)
1743 break;
1744 Py_MEMCPY(result_s, start, next-start);
1745 result_s += (next-start);
1746 start = next+1;
1747 }
1748 Py_MEMCPY(result_s, start, end-start);
1749
1750 return result;
1751}
1752
1753/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1754
1755Py_LOCAL(PyByteArrayObject *)
1756replace_delete_substring(PyByteArrayObject *self,
1757 const char *from_s, Py_ssize_t from_len,
1758 Py_ssize_t maxcount)
1759{
1760 char *self_s, *result_s;
1761 char *start, *next, *end;
1762 Py_ssize_t self_len, result_len;
1763 Py_ssize_t count, offset;
1764 PyByteArrayObject *result;
1765
1766 self_len = PyByteArray_GET_SIZE(self);
1767 self_s = PyByteArray_AS_STRING(self);
1768
1769 count = countstring(self_s, self_len,
1770 from_s, from_len,
1771 0, self_len, 1,
1772 maxcount);
1773
1774 if (count == 0) {
1775 /* no matches */
1776 return return_self(self);
1777 }
1778
1779 result_len = self_len - (count * from_len);
1780 assert (result_len>=0);
1781
1782 if ( (result = (PyByteArrayObject *)
1783 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL )
1784 return NULL;
1785
1786 result_s = PyByteArray_AS_STRING(result);
1787
1788 start = self_s;
1789 end = self_s + self_len;
1790 while (count-- > 0) {
1791 offset = findstring(start, end-start,
1792 from_s, from_len,
1793 0, end-start, FORWARD);
1794 if (offset == -1)
1795 break;
1796 next = start + offset;
1797
1798 Py_MEMCPY(result_s, start, next-start);
1799
1800 result_s += (next-start);
1801 start = next+from_len;
1802 }
1803 Py_MEMCPY(result_s, start, end-start);
1804 return result;
1805}
1806
1807/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1808Py_LOCAL(PyByteArrayObject *)
1809replace_single_character_in_place(PyByteArrayObject *self,
1810 char from_c, char to_c,
1811 Py_ssize_t maxcount)
1812{
1813 char *self_s, *result_s, *start, *end, *next;
1814 Py_ssize_t self_len;
1815 PyByteArrayObject *result;
1816
1817 /* The result string will be the same size */
1818 self_s = PyByteArray_AS_STRING(self);
1819 self_len = PyByteArray_GET_SIZE(self);
1820
1821 next = findchar(self_s, self_len, from_c);
1822
1823 if (next == NULL) {
1824 /* No matches; return the original bytes */
1825 return return_self(self);
1826 }
1827
1828 /* Need to make a new bytes */
1829 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1830 if (result == NULL)
1831 return NULL;
1832 result_s = PyByteArray_AS_STRING(result);
1833 Py_MEMCPY(result_s, self_s, self_len);
1834
1835 /* change everything in-place, starting with this one */
1836 start = result_s + (next-self_s);
1837 *start = to_c;
1838 start++;
1839 end = result_s + self_len;
1840
1841 while (--maxcount > 0) {
1842 next = findchar(start, end-start, from_c);
1843 if (next == NULL)
1844 break;
1845 *next = to_c;
1846 start = next+1;
1847 }
1848
1849 return result;
1850}
1851
1852/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1853Py_LOCAL(PyByteArrayObject *)
1854replace_substring_in_place(PyByteArrayObject *self,
1855 const char *from_s, Py_ssize_t from_len,
1856 const char *to_s, Py_ssize_t to_len,
1857 Py_ssize_t maxcount)
1858{
1859 char *result_s, *start, *end;
1860 char *self_s;
1861 Py_ssize_t self_len, offset;
1862 PyByteArrayObject *result;
1863
1864 /* The result bytes will be the same size */
1865
1866 self_s = PyByteArray_AS_STRING(self);
1867 self_len = PyByteArray_GET_SIZE(self);
1868
1869 offset = findstring(self_s, self_len,
1870 from_s, from_len,
1871 0, self_len, FORWARD);
1872 if (offset == -1) {
1873 /* No matches; return the original bytes */
1874 return return_self(self);
1875 }
1876
1877 /* Need to make a new bytes */
1878 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1879 if (result == NULL)
1880 return NULL;
1881 result_s = PyByteArray_AS_STRING(result);
1882 Py_MEMCPY(result_s, self_s, self_len);
1883
1884 /* change everything in-place, starting with this one */
1885 start = result_s + offset;
1886 Py_MEMCPY(start, to_s, from_len);
1887 start += from_len;
1888 end = result_s + self_len;
1889
1890 while ( --maxcount > 0) {
1891 offset = findstring(start, end-start,
1892 from_s, from_len,
1893 0, end-start, FORWARD);
1894 if (offset==-1)
1895 break;
1896 Py_MEMCPY(start+offset, to_s, from_len);
1897 start += offset+from_len;
1898 }
1899
1900 return result;
1901}
1902
1903/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1904Py_LOCAL(PyByteArrayObject *)
1905replace_single_character(PyByteArrayObject *self,
1906 char from_c,
1907 const char *to_s, Py_ssize_t to_len,
1908 Py_ssize_t maxcount)
1909{
1910 char *self_s, *result_s;
1911 char *start, *next, *end;
1912 Py_ssize_t self_len, result_len;
1913 Py_ssize_t count, product;
1914 PyByteArrayObject *result;
1915
1916 self_s = PyByteArray_AS_STRING(self);
1917 self_len = PyByteArray_GET_SIZE(self);
1918
1919 count = countchar(self_s, self_len, from_c, maxcount);
1920 if (count == 0) {
1921 /* no matches, return unchanged */
1922 return return_self(self);
1923 }
1924
1925 /* use the difference between current and new, hence the "-1" */
1926 /* result_len = self_len + count * (to_len-1) */
1927 product = count * (to_len-1);
1928 if (product / (to_len-1) != count) {
1929 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1930 return NULL;
1931 }
1932 result_len = self_len + product;
1933 if (result_len < 0) {
1934 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1935 return NULL;
1936 }
1937
1938 if ( (result = (PyByteArrayObject *)
1939 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1940 return NULL;
1941 result_s = PyByteArray_AS_STRING(result);
1942
1943 start = self_s;
1944 end = self_s + self_len;
1945 while (count-- > 0) {
1946 next = findchar(start, end-start, from_c);
1947 if (next == NULL)
1948 break;
1949
1950 if (next == start) {
1951 /* replace with the 'to' */
1952 Py_MEMCPY(result_s, to_s, to_len);
1953 result_s += to_len;
1954 start += 1;
1955 } else {
1956 /* copy the unchanged old then the 'to' */
1957 Py_MEMCPY(result_s, start, next-start);
1958 result_s += (next-start);
1959 Py_MEMCPY(result_s, to_s, to_len);
1960 result_s += to_len;
1961 start = next+1;
1962 }
1963 }
1964 /* Copy the remainder of the remaining bytes */
1965 Py_MEMCPY(result_s, start, end-start);
1966
1967 return result;
1968}
1969
1970/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1971Py_LOCAL(PyByteArrayObject *)
1972replace_substring(PyByteArrayObject *self,
1973 const char *from_s, Py_ssize_t from_len,
1974 const char *to_s, Py_ssize_t to_len,
1975 Py_ssize_t maxcount)
1976{
1977 char *self_s, *result_s;
1978 char *start, *next, *end;
1979 Py_ssize_t self_len, result_len;
1980 Py_ssize_t count, offset, product;
1981 PyByteArrayObject *result;
1982
1983 self_s = PyByteArray_AS_STRING(self);
1984 self_len = PyByteArray_GET_SIZE(self);
1985
1986 count = countstring(self_s, self_len,
1987 from_s, from_len,
1988 0, self_len, FORWARD, maxcount);
1989 if (count == 0) {
1990 /* no matches, return unchanged */
1991 return return_self(self);
1992 }
1993
1994 /* Check for overflow */
1995 /* result_len = self_len + count * (to_len-from_len) */
1996 product = count * (to_len-from_len);
1997 if (product / (to_len-from_len) != count) {
1998 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1999 return NULL;
2000 }
2001 result_len = self_len + product;
2002 if (result_len < 0) {
2003 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
2004 return NULL;
2005 }
2006
2007 if ( (result = (PyByteArrayObject *)
2008 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
2009 return NULL;
2010 result_s = PyByteArray_AS_STRING(result);
2011
2012 start = self_s;
2013 end = self_s + self_len;
2014 while (count-- > 0) {
2015 offset = findstring(start, end-start,
2016 from_s, from_len,
2017 0, end-start, FORWARD);
2018 if (offset == -1)
2019 break;
2020 next = start+offset;
2021 if (next == start) {
2022 /* replace with the 'to' */
2023 Py_MEMCPY(result_s, to_s, to_len);
2024 result_s += to_len;
2025 start += from_len;
2026 } else {
2027 /* copy the unchanged old then the 'to' */
2028 Py_MEMCPY(result_s, start, next-start);
2029 result_s += (next-start);
2030 Py_MEMCPY(result_s, to_s, to_len);
2031 result_s += to_len;
2032 start = next+from_len;
2033 }
2034 }
2035 /* Copy the remainder of the remaining bytes */
2036 Py_MEMCPY(result_s, start, end-start);
2037
2038 return result;
2039}
2040
2041
2042Py_LOCAL(PyByteArrayObject *)
2043replace(PyByteArrayObject *self,
2044 const char *from_s, Py_ssize_t from_len,
2045 const char *to_s, Py_ssize_t to_len,
2046 Py_ssize_t maxcount)
2047{
2048 if (maxcount < 0) {
2049 maxcount = PY_SSIZE_T_MAX;
2050 } else if (maxcount == 0 || PyByteArray_GET_SIZE(self) == 0) {
2051 /* nothing to do; return the original bytes */
2052 return return_self(self);
2053 }
2054
2055 if (maxcount == 0 ||
2056 (from_len == 0 && to_len == 0)) {
2057 /* nothing to do; return the original bytes */
2058 return return_self(self);
2059 }
2060
2061 /* Handle zero-length special cases */
2062
2063 if (from_len == 0) {
2064 /* insert the 'to' bytes everywhere. */
2065 /* >>> "Python".replace("", ".") */
2066 /* '.P.y.t.h.o.n.' */
2067 return replace_interleave(self, to_s, to_len, maxcount);
2068 }
2069
2070 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2071 /* point for an empty self bytes to generate a non-empty bytes */
2072 /* Special case so the remaining code always gets a non-empty bytes */
2073 if (PyByteArray_GET_SIZE(self) == 0) {
2074 return return_self(self);
2075 }
2076
2077 if (to_len == 0) {
2078 /* delete all occurances of 'from' bytes */
2079 if (from_len == 1) {
2080 return replace_delete_single_character(
2081 self, from_s[0], maxcount);
2082 } else {
2083 return replace_delete_substring(self, from_s, from_len, maxcount);
2084 }
2085 }
2086
2087 /* Handle special case where both bytes have the same length */
2088
2089 if (from_len == to_len) {
2090 if (from_len == 1) {
2091 return replace_single_character_in_place(
2092 self,
2093 from_s[0],
2094 to_s[0],
2095 maxcount);
2096 } else {
2097 return replace_substring_in_place(
2098 self, from_s, from_len, to_s, to_len, maxcount);
2099 }
2100 }
2101
2102 /* Otherwise use the more generic algorithms */
2103 if (from_len == 1) {
2104 return replace_single_character(self, from_s[0],
2105 to_s, to_len, maxcount);
2106 } else {
2107 /* len('from')>=2, len('to')>=1 */
2108 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2109 }
2110}
2111
2112
2113PyDoc_STRVAR(replace__doc__,
2114"B.replace(old, new[, count]) -> bytes\n\
2115\n\
2116Return a copy of B with all occurrences of subsection\n\
2117old replaced by new. If the optional argument count is\n\
2118given, only the first count occurrences are replaced.");
2119
2120static PyObject *
2121bytes_replace(PyByteArrayObject *self, PyObject *args)
2122{
2123 Py_ssize_t count = -1;
2124 PyObject *from, *to, *res;
2125 Py_buffer vfrom, vto;
2126
2127 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2128 return NULL;
2129
2130 if (_getbuffer(from, &vfrom) < 0)
2131 return NULL;
2132 if (_getbuffer(to, &vto) < 0) {
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002133 PyBuffer_Release(&vfrom);
Christian Heimes44720832008-05-26 13:01:01 +00002134 return NULL;
2135 }
2136
2137 res = (PyObject *)replace((PyByteArrayObject *) self,
2138 vfrom.buf, vfrom.len,
2139 vto.buf, vto.len, count);
2140
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002141 PyBuffer_Release(&vfrom);
2142 PyBuffer_Release(&vto);
Christian Heimes44720832008-05-26 13:01:01 +00002143 return res;
2144}
2145
2146
2147/* Overallocate the initial list to reduce the number of reallocs for small
2148 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2149 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2150 text (roughly 11 words per line) and field delimited data (usually 1-10
2151 fields). For large strings the split algorithms are bandwidth limited
2152 so increasing the preallocation likely will not improve things.*/
2153
2154#define MAX_PREALLOC 12
2155
2156/* 5 splits gives 6 elements */
2157#define PREALLOC_SIZE(maxsplit) \
2158 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2159
2160#define SPLIT_APPEND(data, left, right) \
2161 str = PyByteArray_FromStringAndSize((data) + (left), \
2162 (right) - (left)); \
2163 if (str == NULL) \
2164 goto onError; \
2165 if (PyList_Append(list, str)) { \
2166 Py_DECREF(str); \
2167 goto onError; \
2168 } \
2169 else \
2170 Py_DECREF(str);
2171
2172#define SPLIT_ADD(data, left, right) { \
2173 str = PyByteArray_FromStringAndSize((data) + (left), \
2174 (right) - (left)); \
2175 if (str == NULL) \
2176 goto onError; \
2177 if (count < MAX_PREALLOC) { \
2178 PyList_SET_ITEM(list, count, str); \
2179 } else { \
2180 if (PyList_Append(list, str)) { \
2181 Py_DECREF(str); \
2182 goto onError; \
2183 } \
2184 else \
2185 Py_DECREF(str); \
2186 } \
2187 count++; }
2188
2189/* Always force the list to the expected size. */
2190#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
2191
2192
2193Py_LOCAL_INLINE(PyObject *)
2194split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2195{
2196 register Py_ssize_t i, j, count = 0;
2197 PyObject *str;
2198 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2199
2200 if (list == NULL)
2201 return NULL;
2202
2203 i = j = 0;
2204 while ((j < len) && (maxcount-- > 0)) {
2205 for(; j < len; j++) {
2206 /* I found that using memchr makes no difference */
2207 if (s[j] == ch) {
2208 SPLIT_ADD(s, i, j);
2209 i = j = j + 1;
2210 break;
2211 }
2212 }
2213 }
2214 if (i <= len) {
2215 SPLIT_ADD(s, i, len);
2216 }
2217 FIX_PREALLOC_SIZE(list);
2218 return list;
2219
2220 onError:
2221 Py_DECREF(list);
2222 return NULL;
2223}
2224
2225
2226Py_LOCAL_INLINE(PyObject *)
2227split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2228{
2229 register Py_ssize_t i, j, count = 0;
2230 PyObject *str;
2231 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2232
2233 if (list == NULL)
2234 return NULL;
2235
2236 for (i = j = 0; i < len; ) {
2237 /* find a token */
2238 while (i < len && ISSPACE(s[i]))
2239 i++;
2240 j = i;
2241 while (i < len && !ISSPACE(s[i]))
2242 i++;
2243 if (j < i) {
2244 if (maxcount-- <= 0)
2245 break;
2246 SPLIT_ADD(s, j, i);
2247 while (i < len && ISSPACE(s[i]))
2248 i++;
2249 j = i;
2250 }
2251 }
2252 if (j < len) {
2253 SPLIT_ADD(s, j, len);
2254 }
2255 FIX_PREALLOC_SIZE(list);
2256 return list;
2257
2258 onError:
2259 Py_DECREF(list);
2260 return NULL;
2261}
2262
2263PyDoc_STRVAR(split__doc__,
2264"B.split([sep[, maxsplit]]) -> list of bytearray\n\
2265\n\
2266Return a list of the sections in B, using sep as the delimiter.\n\
2267If sep is not given, B is split on ASCII whitespace characters\n\
2268(space, tab, return, newline, formfeed, vertical tab).\n\
2269If maxsplit is given, at most maxsplit splits are done.");
2270
2271static PyObject *
2272bytes_split(PyByteArrayObject *self, PyObject *args)
2273{
2274 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
2275 Py_ssize_t maxsplit = -1, count = 0;
2276 const char *s = PyByteArray_AS_STRING(self), *sub;
2277 PyObject *list, *str, *subobj = Py_None;
2278 Py_buffer vsub;
2279#ifdef USE_FAST
2280 Py_ssize_t pos;
2281#endif
2282
2283 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
2284 return NULL;
2285 if (maxsplit < 0)
2286 maxsplit = PY_SSIZE_T_MAX;
2287
2288 if (subobj == Py_None)
2289 return split_whitespace(s, len, maxsplit);
2290
2291 if (_getbuffer(subobj, &vsub) < 0)
2292 return NULL;
2293 sub = vsub.buf;
2294 n = vsub.len;
2295
2296 if (n == 0) {
2297 PyErr_SetString(PyExc_ValueError, "empty separator");
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002298 PyBuffer_Release(&vsub);
Christian Heimes44720832008-05-26 13:01:01 +00002299 return NULL;
2300 }
Amaury Forgeot d'Arc313bda12008-08-17 21:05:18 +00002301 if (n == 1) {
2302 list = split_char(s, len, sub[0], maxsplit);
2303 PyBuffer_Release(&vsub);
2304 return list;
2305 }
Christian Heimes44720832008-05-26 13:01:01 +00002306
2307 list = PyList_New(PREALLOC_SIZE(maxsplit));
2308 if (list == NULL) {
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002309 PyBuffer_Release(&vsub);
Christian Heimes44720832008-05-26 13:01:01 +00002310 return NULL;
2311 }
2312
2313#ifdef USE_FAST
2314 i = j = 0;
2315 while (maxsplit-- > 0) {
2316 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2317 if (pos < 0)
2318 break;
2319 j = i+pos;
2320 SPLIT_ADD(s, i, j);
2321 i = j + n;
2322 }
2323#else
2324 i = j = 0;
2325 while ((j+n <= len) && (maxsplit-- > 0)) {
2326 for (; j+n <= len; j++) {
2327 if (Py_STRING_MATCH(s, j, sub, n)) {
2328 SPLIT_ADD(s, i, j);
2329 i = j = j + n;
2330 break;
2331 }
2332 }
2333 }
2334#endif
2335 SPLIT_ADD(s, i, len);
2336 FIX_PREALLOC_SIZE(list);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002337 PyBuffer_Release(&vsub);
Christian Heimes44720832008-05-26 13:01:01 +00002338 return list;
2339
2340 onError:
2341 Py_DECREF(list);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002342 PyBuffer_Release(&vsub);
Christian Heimes44720832008-05-26 13:01:01 +00002343 return NULL;
2344}
2345
2346/* stringlib's partition shares nullbytes in some cases.
2347 undo this, we don't want the nullbytes to be shared. */
2348static PyObject *
2349make_nullbytes_unique(PyObject *result)
2350{
2351 if (result != NULL) {
2352 int i;
2353 assert(PyTuple_Check(result));
2354 assert(PyTuple_GET_SIZE(result) == 3);
2355 for (i = 0; i < 3; i++) {
2356 if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) {
2357 PyObject *new = PyByteArray_FromStringAndSize(NULL, 0);
2358 if (new == NULL) {
2359 Py_DECREF(result);
2360 result = NULL;
2361 break;
2362 }
2363 Py_DECREF(nullbytes);
2364 PyTuple_SET_ITEM(result, i, new);
2365 }
2366 }
2367 }
2368 return result;
2369}
2370
2371PyDoc_STRVAR(partition__doc__,
2372"B.partition(sep) -> (head, sep, tail)\n\
2373\n\
2374Searches for the separator sep in B, and returns the part before it,\n\
2375the separator itself, and the part after it. If the separator is not\n\
2376found, returns B and two empty bytearray objects.");
2377
2378static PyObject *
2379bytes_partition(PyByteArrayObject *self, PyObject *sep_obj)
2380{
2381 PyObject *bytesep, *result;
2382
2383 bytesep = PyByteArray_FromObject(sep_obj);
2384 if (! bytesep)
2385 return NULL;
2386
2387 result = stringlib_partition(
2388 (PyObject*) self,
2389 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2390 bytesep,
2391 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2392 );
2393
2394 Py_DECREF(bytesep);
2395 return make_nullbytes_unique(result);
2396}
2397
2398PyDoc_STRVAR(rpartition__doc__,
Ezio Melottidabb5f72010-01-25 11:46:11 +00002399"B.rpartition(sep) -> (head, sep, tail)\n\
Christian Heimes44720832008-05-26 13:01:01 +00002400\n\
2401Searches for the separator sep in B, starting at the end of B,\n\
2402and returns the part before it, the separator itself, and the\n\
2403part after it. If the separator is not found, returns two empty\n\
2404bytearray objects and B.");
2405
2406static PyObject *
2407bytes_rpartition(PyByteArrayObject *self, PyObject *sep_obj)
2408{
2409 PyObject *bytesep, *result;
2410
2411 bytesep = PyByteArray_FromObject(sep_obj);
2412 if (! bytesep)
2413 return NULL;
2414
2415 result = stringlib_rpartition(
2416 (PyObject*) self,
2417 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2418 bytesep,
2419 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2420 );
2421
2422 Py_DECREF(bytesep);
2423 return make_nullbytes_unique(result);
2424}
2425
2426Py_LOCAL_INLINE(PyObject *)
2427rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2428{
2429 register Py_ssize_t i, j, count=0;
2430 PyObject *str;
2431 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2432
2433 if (list == NULL)
2434 return NULL;
2435
2436 i = j = len - 1;
2437 while ((i >= 0) && (maxcount-- > 0)) {
2438 for (; i >= 0; i--) {
2439 if (s[i] == ch) {
2440 SPLIT_ADD(s, i + 1, j + 1);
2441 j = i = i - 1;
2442 break;
2443 }
2444 }
2445 }
2446 if (j >= -1) {
2447 SPLIT_ADD(s, 0, j + 1);
2448 }
2449 FIX_PREALLOC_SIZE(list);
2450 if (PyList_Reverse(list) < 0)
2451 goto onError;
2452
2453 return list;
2454
2455 onError:
2456 Py_DECREF(list);
2457 return NULL;
2458}
2459
2460Py_LOCAL_INLINE(PyObject *)
2461rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2462{
2463 register Py_ssize_t i, j, count = 0;
2464 PyObject *str;
2465 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2466
2467 if (list == NULL)
2468 return NULL;
2469
2470 for (i = j = len - 1; i >= 0; ) {
2471 /* find a token */
2472 while (i >= 0 && ISSPACE(s[i]))
2473 i--;
2474 j = i;
2475 while (i >= 0 && !ISSPACE(s[i]))
2476 i--;
2477 if (j > i) {
2478 if (maxcount-- <= 0)
2479 break;
2480 SPLIT_ADD(s, i + 1, j + 1);
2481 while (i >= 0 && ISSPACE(s[i]))
2482 i--;
2483 j = i;
2484 }
2485 }
2486 if (j >= 0) {
2487 SPLIT_ADD(s, 0, j + 1);
2488 }
2489 FIX_PREALLOC_SIZE(list);
2490 if (PyList_Reverse(list) < 0)
2491 goto onError;
2492
2493 return list;
2494
2495 onError:
2496 Py_DECREF(list);
2497 return NULL;
2498}
2499
2500PyDoc_STRVAR(rsplit__doc__,
2501"B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2502\n\
2503Return a list of the sections in B, using sep as the delimiter,\n\
2504starting at the end of B and working to the front.\n\
2505If sep is not given, B is split on ASCII whitespace characters\n\
2506(space, tab, return, newline, formfeed, vertical tab).\n\
2507If maxsplit is given, at most maxsplit splits are done.");
2508
2509static PyObject *
2510bytes_rsplit(PyByteArrayObject *self, PyObject *args)
2511{
2512 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
2513 Py_ssize_t maxsplit = -1, count = 0;
2514 const char *s = PyByteArray_AS_STRING(self), *sub;
2515 PyObject *list, *str, *subobj = Py_None;
2516 Py_buffer vsub;
2517
2518 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
2519 return NULL;
2520 if (maxsplit < 0)
2521 maxsplit = PY_SSIZE_T_MAX;
2522
2523 if (subobj == Py_None)
2524 return rsplit_whitespace(s, len, maxsplit);
2525
2526 if (_getbuffer(subobj, &vsub) < 0)
2527 return NULL;
2528 sub = vsub.buf;
2529 n = vsub.len;
2530
2531 if (n == 0) {
2532 PyErr_SetString(PyExc_ValueError, "empty separator");
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002533 PyBuffer_Release(&vsub);
Christian Heimes44720832008-05-26 13:01:01 +00002534 return NULL;
2535 }
Amaury Forgeot d'Arc313bda12008-08-17 21:05:18 +00002536 else if (n == 1) {
2537 list = rsplit_char(s, len, sub[0], maxsplit);
2538 PyBuffer_Release(&vsub);
2539 return list;
2540 }
Christian Heimes44720832008-05-26 13:01:01 +00002541
2542 list = PyList_New(PREALLOC_SIZE(maxsplit));
2543 if (list == NULL) {
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002544 PyBuffer_Release(&vsub);
Christian Heimes44720832008-05-26 13:01:01 +00002545 return NULL;
2546 }
2547
2548 j = len;
2549 i = j - n;
2550
2551 while ( (i >= 0) && (maxsplit-- > 0) ) {
2552 for (; i>=0; i--) {
2553 if (Py_STRING_MATCH(s, i, sub, n)) {
2554 SPLIT_ADD(s, i + n, j);
2555 j = i;
2556 i -= n;
2557 break;
2558 }
2559 }
2560 }
2561 SPLIT_ADD(s, 0, j);
2562 FIX_PREALLOC_SIZE(list);
2563 if (PyList_Reverse(list) < 0)
2564 goto onError;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002565 PyBuffer_Release(&vsub);
Christian Heimes44720832008-05-26 13:01:01 +00002566 return list;
2567
2568onError:
2569 Py_DECREF(list);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002570 PyBuffer_Release(&vsub);
Christian Heimes44720832008-05-26 13:01:01 +00002571 return NULL;
2572}
2573
2574PyDoc_STRVAR(reverse__doc__,
2575"B.reverse() -> None\n\
2576\n\
2577Reverse the order of the values in B in place.");
2578static PyObject *
2579bytes_reverse(PyByteArrayObject *self, PyObject *unused)
2580{
2581 char swap, *head, *tail;
2582 Py_ssize_t i, j, n = Py_SIZE(self);
2583
2584 j = n / 2;
2585 head = self->ob_bytes;
2586 tail = head + n - 1;
2587 for (i = 0; i < j; i++) {
2588 swap = *head;
2589 *head++ = *tail;
2590 *tail-- = swap;
2591 }
2592
2593 Py_RETURN_NONE;
2594}
2595
2596PyDoc_STRVAR(insert__doc__,
2597"B.insert(index, int) -> None\n\
2598\n\
2599Insert a single item into the bytearray before the given index.");
2600static PyObject *
2601bytes_insert(PyByteArrayObject *self, PyObject *args)
2602{
Georg Brandl3e483f62008-07-16 22:57:41 +00002603 PyObject *value;
2604 int ival;
Christian Heimes44720832008-05-26 13:01:01 +00002605 Py_ssize_t where, n = Py_SIZE(self);
2606
Georg Brandl3e483f62008-07-16 22:57:41 +00002607 if (!PyArg_ParseTuple(args, "nO:insert", &where, &value))
Christian Heimes44720832008-05-26 13:01:01 +00002608 return NULL;
2609
2610 if (n == PY_SSIZE_T_MAX) {
2611 PyErr_SetString(PyExc_OverflowError,
Mark Dickinson76e96432009-09-06 10:33:12 +00002612 "cannot add more objects to bytearray");
Christian Heimes44720832008-05-26 13:01:01 +00002613 return NULL;
2614 }
Georg Brandl3e483f62008-07-16 22:57:41 +00002615 if (!_getbytevalue(value, &ival))
Christian Heimes44720832008-05-26 13:01:01 +00002616 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002617 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2618 return NULL;
2619
2620 if (where < 0) {
2621 where += n;
2622 if (where < 0)
2623 where = 0;
2624 }
2625 if (where > n)
2626 where = n;
2627 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
Georg Brandl3e483f62008-07-16 22:57:41 +00002628 self->ob_bytes[where] = ival;
Christian Heimes44720832008-05-26 13:01:01 +00002629
2630 Py_RETURN_NONE;
2631}
2632
2633PyDoc_STRVAR(append__doc__,
2634"B.append(int) -> None\n\
2635\n\
2636Append a single item to the end of B.");
2637static PyObject *
2638bytes_append(PyByteArrayObject *self, PyObject *arg)
2639{
2640 int value;
2641 Py_ssize_t n = Py_SIZE(self);
2642
2643 if (! _getbytevalue(arg, &value))
2644 return NULL;
2645 if (n == PY_SSIZE_T_MAX) {
2646 PyErr_SetString(PyExc_OverflowError,
Mark Dickinson76e96432009-09-06 10:33:12 +00002647 "cannot add more objects to bytearray");
Christian Heimes44720832008-05-26 13:01:01 +00002648 return NULL;
2649 }
2650 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2651 return NULL;
2652
2653 self->ob_bytes[n] = value;
2654
2655 Py_RETURN_NONE;
2656}
2657
2658PyDoc_STRVAR(extend__doc__,
2659"B.extend(iterable int) -> None\n\
2660\n\
2661Append all the elements from the iterator or sequence to the\n\
2662end of B.");
2663static PyObject *
2664bytes_extend(PyByteArrayObject *self, PyObject *arg)
2665{
2666 PyObject *it, *item, *bytes_obj;
2667 Py_ssize_t buf_size = 0, len = 0;
2668 int value;
2669 char *buf;
2670
2671 /* bytes_setslice code only accepts something supporting PEP 3118. */
2672 if (PyObject_CheckBuffer(arg)) {
2673 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), arg) == -1)
2674 return NULL;
2675
2676 Py_RETURN_NONE;
2677 }
2678
2679 it = PyObject_GetIter(arg);
2680 if (it == NULL)
2681 return NULL;
2682
2683 /* Try to determine the length of the argument. 32 is abitrary. */
2684 buf_size = _PyObject_LengthHint(arg, 32);
Georg Brandle9b91212009-04-05 21:26:31 +00002685 if (buf_size == -1) {
2686 Py_DECREF(it);
2687 return NULL;
2688 }
Christian Heimes44720832008-05-26 13:01:01 +00002689
2690 bytes_obj = PyByteArray_FromStringAndSize(NULL, buf_size);
2691 if (bytes_obj == NULL)
2692 return NULL;
2693 buf = PyByteArray_AS_STRING(bytes_obj);
2694
2695 while ((item = PyIter_Next(it)) != NULL) {
2696 if (! _getbytevalue(item, &value)) {
2697 Py_DECREF(item);
2698 Py_DECREF(it);
2699 Py_DECREF(bytes_obj);
2700 return NULL;
2701 }
2702 buf[len++] = value;
2703 Py_DECREF(item);
2704
2705 if (len >= buf_size) {
2706 buf_size = len + (len >> 1) + 1;
2707 if (PyByteArray_Resize((PyObject *)bytes_obj, buf_size) < 0) {
2708 Py_DECREF(it);
2709 Py_DECREF(bytes_obj);
2710 return NULL;
2711 }
2712 /* Recompute the `buf' pointer, since the resizing operation may
2713 have invalidated it. */
2714 buf = PyByteArray_AS_STRING(bytes_obj);
2715 }
2716 }
2717 Py_DECREF(it);
2718
2719 /* Resize down to exact size. */
2720 if (PyByteArray_Resize((PyObject *)bytes_obj, len) < 0) {
2721 Py_DECREF(bytes_obj);
2722 return NULL;
2723 }
2724
2725 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), bytes_obj) == -1)
2726 return NULL;
2727 Py_DECREF(bytes_obj);
2728
2729 Py_RETURN_NONE;
2730}
2731
2732PyDoc_STRVAR(pop__doc__,
2733"B.pop([index]) -> int\n\
2734\n\
2735Remove and return a single item from B. If no index\n\
Andrew M. Kuchlingd8972642008-06-21 13:29:12 +00002736argument is given, will pop the last value.");
Christian Heimes44720832008-05-26 13:01:01 +00002737static PyObject *
2738bytes_pop(PyByteArrayObject *self, PyObject *args)
2739{
2740 int value;
2741 Py_ssize_t where = -1, n = Py_SIZE(self);
2742
2743 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2744 return NULL;
2745
2746 if (n == 0) {
2747 PyErr_SetString(PyExc_OverflowError,
Mark Dickinson76e96432009-09-06 10:33:12 +00002748 "cannot pop an empty bytearray");
Christian Heimes44720832008-05-26 13:01:01 +00002749 return NULL;
2750 }
2751 if (where < 0)
2752 where += Py_SIZE(self);
2753 if (where < 0 || where >= Py_SIZE(self)) {
2754 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2755 return NULL;
2756 }
Antoine Pitrou599db7f2008-12-07 00:07:51 +00002757 if (!_canresize(self))
2758 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002759
2760 value = self->ob_bytes[where];
2761 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2762 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2763 return NULL;
2764
Mark Dickinsonb61c0352009-09-06 10:05:28 +00002765 return PyInt_FromLong((unsigned char)value);
Christian Heimes44720832008-05-26 13:01:01 +00002766}
2767
2768PyDoc_STRVAR(remove__doc__,
2769"B.remove(int) -> None\n\
2770\n\
2771Remove the first occurance of a value in B.");
2772static PyObject *
2773bytes_remove(PyByteArrayObject *self, PyObject *arg)
2774{
2775 int value;
2776 Py_ssize_t where, n = Py_SIZE(self);
2777
2778 if (! _getbytevalue(arg, &value))
2779 return NULL;
2780
2781 for (where = 0; where < n; where++) {
2782 if (self->ob_bytes[where] == value)
2783 break;
2784 }
2785 if (where == n) {
Mark Dickinson76e96432009-09-06 10:33:12 +00002786 PyErr_SetString(PyExc_ValueError, "value not found in bytearray");
Christian Heimes44720832008-05-26 13:01:01 +00002787 return NULL;
2788 }
Antoine Pitrou599db7f2008-12-07 00:07:51 +00002789 if (!_canresize(self))
2790 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002791
2792 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2793 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2794 return NULL;
2795
2796 Py_RETURN_NONE;
2797}
2798
2799/* XXX These two helpers could be optimized if argsize == 1 */
2800
2801static Py_ssize_t
2802lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2803 void *argptr, Py_ssize_t argsize)
2804{
2805 Py_ssize_t i = 0;
2806 while (i < mysize && memchr(argptr, myptr[i], argsize))
2807 i++;
2808 return i;
2809}
2810
2811static Py_ssize_t
2812rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2813 void *argptr, Py_ssize_t argsize)
2814{
2815 Py_ssize_t i = mysize - 1;
2816 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2817 i--;
2818 return i + 1;
2819}
2820
2821PyDoc_STRVAR(strip__doc__,
2822"B.strip([bytes]) -> bytearray\n\
2823\n\
2824Strip leading and trailing bytes contained in the argument.\n\
2825If the argument is omitted, strip ASCII whitespace.");
2826static PyObject *
2827bytes_strip(PyByteArrayObject *self, PyObject *args)
2828{
2829 Py_ssize_t left, right, mysize, argsize;
2830 void *myptr, *argptr;
2831 PyObject *arg = Py_None;
2832 Py_buffer varg;
2833 if (!PyArg_ParseTuple(args, "|O:strip", &arg))
2834 return NULL;
2835 if (arg == Py_None) {
2836 argptr = "\t\n\r\f\v ";
2837 argsize = 6;
2838 }
2839 else {
2840 if (_getbuffer(arg, &varg) < 0)
2841 return NULL;
2842 argptr = varg.buf;
2843 argsize = varg.len;
2844 }
2845 myptr = self->ob_bytes;
2846 mysize = Py_SIZE(self);
2847 left = lstrip_helper(myptr, mysize, argptr, argsize);
2848 if (left == mysize)
2849 right = left;
2850 else
2851 right = rstrip_helper(myptr, mysize, argptr, argsize);
2852 if (arg != Py_None)
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002853 PyBuffer_Release(&varg);
Christian Heimes44720832008-05-26 13:01:01 +00002854 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2855}
2856
2857PyDoc_STRVAR(lstrip__doc__,
2858"B.lstrip([bytes]) -> bytearray\n\
2859\n\
2860Strip leading bytes contained in the argument.\n\
2861If the argument is omitted, strip leading ASCII whitespace.");
2862static PyObject *
2863bytes_lstrip(PyByteArrayObject *self, PyObject *args)
2864{
2865 Py_ssize_t left, right, mysize, argsize;
2866 void *myptr, *argptr;
2867 PyObject *arg = Py_None;
2868 Py_buffer varg;
2869 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
2870 return NULL;
2871 if (arg == Py_None) {
2872 argptr = "\t\n\r\f\v ";
2873 argsize = 6;
2874 }
2875 else {
2876 if (_getbuffer(arg, &varg) < 0)
2877 return NULL;
2878 argptr = varg.buf;
2879 argsize = varg.len;
2880 }
2881 myptr = self->ob_bytes;
2882 mysize = Py_SIZE(self);
2883 left = lstrip_helper(myptr, mysize, argptr, argsize);
2884 right = mysize;
2885 if (arg != Py_None)
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002886 PyBuffer_Release(&varg);
Christian Heimes44720832008-05-26 13:01:01 +00002887 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2888}
2889
2890PyDoc_STRVAR(rstrip__doc__,
2891"B.rstrip([bytes]) -> bytearray\n\
2892\n\
2893Strip trailing bytes contained in the argument.\n\
2894If the argument is omitted, strip trailing ASCII whitespace.");
2895static PyObject *
2896bytes_rstrip(PyByteArrayObject *self, PyObject *args)
2897{
2898 Py_ssize_t left, right, mysize, argsize;
2899 void *myptr, *argptr;
2900 PyObject *arg = Py_None;
2901 Py_buffer varg;
2902 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
2903 return NULL;
2904 if (arg == Py_None) {
2905 argptr = "\t\n\r\f\v ";
2906 argsize = 6;
2907 }
2908 else {
2909 if (_getbuffer(arg, &varg) < 0)
2910 return NULL;
2911 argptr = varg.buf;
2912 argsize = varg.len;
2913 }
2914 myptr = self->ob_bytes;
2915 mysize = Py_SIZE(self);
2916 left = 0;
2917 right = rstrip_helper(myptr, mysize, argptr, argsize);
2918 if (arg != Py_None)
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002919 PyBuffer_Release(&varg);
Christian Heimes44720832008-05-26 13:01:01 +00002920 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2921}
2922
2923PyDoc_STRVAR(decode_doc,
2924"B.decode([encoding[, errors]]) -> unicode object.\n\
2925\n\
2926Decodes B using the codec registered for encoding. encoding defaults\n\
2927to the default encoding. errors may be given to set a different error\n\
2928handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2929a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2930as well as any other name registered with codecs.register_error that is\n\
2931able to handle UnicodeDecodeErrors.");
2932
2933static PyObject *
2934bytes_decode(PyObject *self, PyObject *args)
2935{
2936 const char *encoding = NULL;
2937 const char *errors = NULL;
2938
2939 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2940 return NULL;
2941 if (encoding == NULL)
2942 encoding = PyUnicode_GetDefaultEncoding();
2943 return PyCodec_Decode(self, encoding, errors);
2944}
2945
2946PyDoc_STRVAR(alloc_doc,
2947"B.__alloc__() -> int\n\
2948\n\
2949Returns the number of bytes actually allocated.");
2950
2951static PyObject *
2952bytes_alloc(PyByteArrayObject *self)
2953{
2954 return PyInt_FromSsize_t(self->ob_alloc);
2955}
2956
2957PyDoc_STRVAR(join_doc,
2958"B.join(iterable_of_bytes) -> bytes\n\
2959\n\
2960Concatenates any number of bytearray objects, with B in between each pair.");
2961
2962static PyObject *
2963bytes_join(PyByteArrayObject *self, PyObject *it)
2964{
2965 PyObject *seq;
2966 Py_ssize_t mysize = Py_SIZE(self);
2967 Py_ssize_t i;
2968 Py_ssize_t n;
2969 PyObject **items;
2970 Py_ssize_t totalsize = 0;
2971 PyObject *result;
2972 char *dest;
2973
2974 seq = PySequence_Fast(it, "can only join an iterable");
2975 if (seq == NULL)
2976 return NULL;
2977 n = PySequence_Fast_GET_SIZE(seq);
2978 items = PySequence_Fast_ITEMS(seq);
2979
2980 /* Compute the total size, and check that they are all bytes */
2981 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2982 for (i = 0; i < n; i++) {
2983 PyObject *obj = items[i];
2984 if (!PyByteArray_Check(obj) && !PyBytes_Check(obj)) {
2985 PyErr_Format(PyExc_TypeError,
2986 "can only join an iterable of bytes "
2987 "(item %ld has type '%.100s')",
2988 /* XXX %ld isn't right on Win64 */
2989 (long)i, Py_TYPE(obj)->tp_name);
2990 goto error;
2991 }
2992 if (i > 0)
2993 totalsize += mysize;
2994 totalsize += Py_SIZE(obj);
2995 if (totalsize < 0) {
2996 PyErr_NoMemory();
2997 goto error;
2998 }
2999 }
3000
3001 /* Allocate the result, and copy the bytes */
3002 result = PyByteArray_FromStringAndSize(NULL, totalsize);
3003 if (result == NULL)
3004 goto error;
3005 dest = PyByteArray_AS_STRING(result);
3006 for (i = 0; i < n; i++) {
3007 PyObject *obj = items[i];
3008 Py_ssize_t size = Py_SIZE(obj);
3009 char *buf;
3010 if (PyByteArray_Check(obj))
3011 buf = PyByteArray_AS_STRING(obj);
3012 else
3013 buf = PyBytes_AS_STRING(obj);
3014 if (i) {
3015 memcpy(dest, self->ob_bytes, mysize);
3016 dest += mysize;
3017 }
3018 memcpy(dest, buf, size);
3019 dest += size;
3020 }
3021
3022 /* Done */
3023 Py_DECREF(seq);
3024 return result;
3025
3026 /* Error handling */
3027 error:
3028 Py_DECREF(seq);
3029 return NULL;
3030}
3031
3032PyDoc_STRVAR(fromhex_doc,
3033"bytearray.fromhex(string) -> bytearray\n\
3034\n\
3035Create a bytearray object from a string of hexadecimal numbers.\n\
3036Spaces between two numbers are accepted.\n\
3037Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
3038
3039static int
3040hex_digit_to_int(Py_UNICODE c)
3041{
3042 if (c >= 128)
3043 return -1;
3044 if (ISDIGIT(c))
3045 return c - '0';
3046 else {
3047 if (ISUPPER(c))
3048 c = TOLOWER(c);
3049 if (c >= 'a' && c <= 'f')
3050 return c - 'a' + 10;
3051 }
3052 return -1;
3053}
3054
3055static PyObject *
3056bytes_fromhex(PyObject *cls, PyObject *args)
3057{
3058 PyObject *newbytes, *hexobj;
3059 char *buf;
3060 Py_UNICODE *hex;
3061 Py_ssize_t hexlen, byteslen, i, j;
3062 int top, bot;
3063
3064 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
3065 return NULL;
3066 assert(PyUnicode_Check(hexobj));
3067 hexlen = PyUnicode_GET_SIZE(hexobj);
3068 hex = PyUnicode_AS_UNICODE(hexobj);
3069 byteslen = hexlen/2; /* This overestimates if there are spaces */
3070 newbytes = PyByteArray_FromStringAndSize(NULL, byteslen);
3071 if (!newbytes)
3072 return NULL;
3073 buf = PyByteArray_AS_STRING(newbytes);
3074 for (i = j = 0; i < hexlen; i += 2) {
3075 /* skip over spaces in the input */
3076 while (hex[i] == ' ')
3077 i++;
3078 if (i >= hexlen)
3079 break;
3080 top = hex_digit_to_int(hex[i]);
3081 bot = hex_digit_to_int(hex[i+1]);
3082 if (top == -1 || bot == -1) {
3083 PyErr_Format(PyExc_ValueError,
3084 "non-hexadecimal number found in "
3085 "fromhex() arg at position %zd", i);
3086 goto error;
3087 }
3088 buf[j++] = (top << 4) + bot;
3089 }
3090 if (PyByteArray_Resize(newbytes, j) < 0)
3091 goto error;
3092 return newbytes;
3093
3094 error:
3095 Py_DECREF(newbytes);
3096 return NULL;
3097}
3098
3099PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3100
3101static PyObject *
3102bytes_reduce(PyByteArrayObject *self)
3103{
3104 PyObject *latin1, *dict;
3105 if (self->ob_bytes)
3106 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
3107 Py_SIZE(self), NULL);
3108 else
3109 latin1 = PyUnicode_FromString("");
3110
3111 dict = PyObject_GetAttrString((PyObject *)self, "__dict__");
3112 if (dict == NULL) {
3113 PyErr_Clear();
3114 dict = Py_None;
3115 Py_INCREF(dict);
3116 }
3117
3118 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self), latin1, "latin-1", dict);
3119}
3120
Robert Schuppenies9be2ec12008-07-10 15:24:04 +00003121PyDoc_STRVAR(sizeof_doc,
3122"B.__sizeof__() -> int\n\
3123 \n\
3124Returns the size of B in memory, in bytes");
3125static PyObject *
3126bytes_sizeof(PyByteArrayObject *self)
3127{
Georg Brandle9b91212009-04-05 21:26:31 +00003128 Py_ssize_t res;
Robert Schuppenies9be2ec12008-07-10 15:24:04 +00003129
Georg Brandle9b91212009-04-05 21:26:31 +00003130 res = sizeof(PyByteArrayObject) + self->ob_alloc * sizeof(char);
3131 return PyInt_FromSsize_t(res);
Robert Schuppenies9be2ec12008-07-10 15:24:04 +00003132}
3133
Christian Heimes44720832008-05-26 13:01:01 +00003134static PySequenceMethods bytes_as_sequence = {
3135 (lenfunc)bytes_length, /* sq_length */
3136 (binaryfunc)PyByteArray_Concat, /* sq_concat */
3137 (ssizeargfunc)bytes_repeat, /* sq_repeat */
3138 (ssizeargfunc)bytes_getitem, /* sq_item */
3139 0, /* sq_slice */
3140 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
3141 0, /* sq_ass_slice */
3142 (objobjproc)bytes_contains, /* sq_contains */
3143 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
3144 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
3145};
3146
3147static PyMappingMethods bytes_as_mapping = {
3148 (lenfunc)bytes_length,
3149 (binaryfunc)bytes_subscript,
3150 (objobjargproc)bytes_ass_subscript,
3151};
3152
3153static PyBufferProcs bytes_as_buffer = {
3154 (readbufferproc)bytes_buffer_getreadbuf,
3155 (writebufferproc)bytes_buffer_getwritebuf,
3156 (segcountproc)bytes_buffer_getsegcount,
3157 (charbufferproc)bytes_buffer_getcharbuf,
3158 (getbufferproc)bytes_getbuffer,
3159 (releasebufferproc)bytes_releasebuffer,
3160};
3161
3162static PyMethodDef
3163bytes_methods[] = {
3164 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
3165 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
Robert Schuppenies9be2ec12008-07-10 15:24:04 +00003166 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS, sizeof_doc},
Christian Heimes44720832008-05-26 13:01:01 +00003167 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
3168 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3169 _Py_capitalize__doc__},
3170 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3171 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
3172 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
3173 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
3174 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
3175 expandtabs__doc__},
3176 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
3177 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
3178 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
3179 fromhex_doc},
3180 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3181 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
3182 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3183 _Py_isalnum__doc__},
3184 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3185 _Py_isalpha__doc__},
3186 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3187 _Py_isdigit__doc__},
3188 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3189 _Py_islower__doc__},
3190 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3191 _Py_isspace__doc__},
3192 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3193 _Py_istitle__doc__},
3194 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3195 _Py_isupper__doc__},
3196 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
3197 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3198 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
3199 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
3200 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
3201 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
3202 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
3203 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
3204 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
3205 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3206 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3207 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
3208 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
3209 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
3210 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
3211 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
3212 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
3213 splitlines__doc__},
3214 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS ,
3215 startswith__doc__},
3216 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
3217 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3218 _Py_swapcase__doc__},
3219 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
3220 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
3221 translate__doc__},
3222 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3223 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
3224 {NULL}
3225};
3226
3227PyDoc_STRVAR(bytes_doc,
3228"bytearray(iterable_of_ints) -> bytearray.\n\
3229bytearray(string, encoding[, errors]) -> bytearray.\n\
3230bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
3231bytearray(memory_view) -> bytearray.\n\
3232\n\
3233Construct an mutable bytearray object from:\n\
3234 - an iterable yielding integers in range(256)\n\
3235 - a text string encoded using the specified encoding\n\
3236 - a bytes or a bytearray object\n\
3237 - any object implementing the buffer API.\n\
3238\n\
3239bytearray(int) -> bytearray.\n\
3240\n\
3241Construct a zero-initialized bytearray of the given length.");
3242
3243
3244static PyObject *bytes_iter(PyObject *seq);
3245
3246PyTypeObject PyByteArray_Type = {
3247 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3248 "bytearray",
3249 sizeof(PyByteArrayObject),
3250 0,
3251 (destructor)bytes_dealloc, /* tp_dealloc */
3252 0, /* tp_print */
3253 0, /* tp_getattr */
3254 0, /* tp_setattr */
3255 0, /* tp_compare */
3256 (reprfunc)bytes_repr, /* tp_repr */
3257 0, /* tp_as_number */
3258 &bytes_as_sequence, /* tp_as_sequence */
3259 &bytes_as_mapping, /* tp_as_mapping */
3260 0, /* tp_hash */
3261 0, /* tp_call */
3262 bytes_str, /* tp_str */
3263 PyObject_GenericGetAttr, /* tp_getattro */
3264 0, /* tp_setattro */
3265 &bytes_as_buffer, /* tp_as_buffer */
3266 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3267 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3268 bytes_doc, /* tp_doc */
3269 0, /* tp_traverse */
3270 0, /* tp_clear */
3271 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3272 0, /* tp_weaklistoffset */
3273 bytes_iter, /* tp_iter */
3274 0, /* tp_iternext */
3275 bytes_methods, /* tp_methods */
3276 0, /* tp_members */
3277 0, /* tp_getset */
3278 0, /* tp_base */
3279 0, /* tp_dict */
3280 0, /* tp_descr_get */
3281 0, /* tp_descr_set */
3282 0, /* tp_dictoffset */
3283 (initproc)bytes_init, /* tp_init */
3284 PyType_GenericAlloc, /* tp_alloc */
3285 PyType_GenericNew, /* tp_new */
3286 PyObject_Del, /* tp_free */
3287};
3288
3289/*********************** Bytes Iterator ****************************/
3290
3291typedef struct {
3292 PyObject_HEAD
3293 Py_ssize_t it_index;
3294 PyByteArrayObject *it_seq; /* Set to NULL when iterator is exhausted */
3295} bytesiterobject;
3296
3297static void
3298bytesiter_dealloc(bytesiterobject *it)
3299{
3300 _PyObject_GC_UNTRACK(it);
3301 Py_XDECREF(it->it_seq);
3302 PyObject_GC_Del(it);
3303}
3304
3305static int
3306bytesiter_traverse(bytesiterobject *it, visitproc visit, void *arg)
3307{
3308 Py_VISIT(it->it_seq);
3309 return 0;
3310}
3311
3312static PyObject *
3313bytesiter_next(bytesiterobject *it)
3314{
3315 PyByteArrayObject *seq;
3316 PyObject *item;
3317
3318 assert(it != NULL);
3319 seq = it->it_seq;
3320 if (seq == NULL)
3321 return NULL;
3322 assert(PyByteArray_Check(seq));
3323
3324 if (it->it_index < PyByteArray_GET_SIZE(seq)) {
3325 item = PyInt_FromLong(
3326 (unsigned char)seq->ob_bytes[it->it_index]);
3327 if (item != NULL)
3328 ++it->it_index;
3329 return item;
3330 }
3331
3332 Py_DECREF(seq);
3333 it->it_seq = NULL;
3334 return NULL;
3335}
3336
3337static PyObject *
3338bytesiter_length_hint(bytesiterobject *it)
3339{
3340 Py_ssize_t len = 0;
3341 if (it->it_seq)
3342 len = PyByteArray_GET_SIZE(it->it_seq) - it->it_index;
3343 return PyInt_FromSsize_t(len);
3344}
3345
3346PyDoc_STRVAR(length_hint_doc,
3347 "Private method returning an estimate of len(list(it)).");
3348
3349static PyMethodDef bytesiter_methods[] = {
3350 {"__length_hint__", (PyCFunction)bytesiter_length_hint, METH_NOARGS,
3351 length_hint_doc},
3352 {NULL, NULL} /* sentinel */
3353};
3354
3355PyTypeObject PyByteArrayIter_Type = {
3356 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3357 "bytearray_iterator", /* tp_name */
3358 sizeof(bytesiterobject), /* tp_basicsize */
3359 0, /* tp_itemsize */
3360 /* methods */
3361 (destructor)bytesiter_dealloc, /* tp_dealloc */
3362 0, /* tp_print */
3363 0, /* tp_getattr */
3364 0, /* tp_setattr */
3365 0, /* tp_compare */
3366 0, /* tp_repr */
3367 0, /* tp_as_number */
3368 0, /* tp_as_sequence */
3369 0, /* tp_as_mapping */
3370 0, /* tp_hash */
3371 0, /* tp_call */
3372 0, /* tp_str */
3373 PyObject_GenericGetAttr, /* tp_getattro */
3374 0, /* tp_setattro */
3375 0, /* tp_as_buffer */
3376 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3377 0, /* tp_doc */
3378 (traverseproc)bytesiter_traverse, /* tp_traverse */
3379 0, /* tp_clear */
3380 0, /* tp_richcompare */
3381 0, /* tp_weaklistoffset */
3382 PyObject_SelfIter, /* tp_iter */
3383 (iternextfunc)bytesiter_next, /* tp_iternext */
3384 bytesiter_methods, /* tp_methods */
3385 0,
3386};
3387
3388static PyObject *
3389bytes_iter(PyObject *seq)
3390{
3391 bytesiterobject *it;
3392
3393 if (!PyByteArray_Check(seq)) {
3394 PyErr_BadInternalCall();
3395 return NULL;
3396 }
3397 it = PyObject_GC_New(bytesiterobject, &PyByteArrayIter_Type);
3398 if (it == NULL)
3399 return NULL;
3400 it->it_index = 0;
3401 Py_INCREF(seq);
3402 it->it_seq = (PyByteArrayObject *)seq;
3403 _PyObject_GC_TRACK(it);
3404 return (PyObject *)it;
3405}