blob: 15b09ba1e75f0d300b80b85393826ee899e65f3e [file] [log] [blame]
Christian Heimes44720832008-05-26 13:01:01 +00001/* PyBytes (bytearray) implementation */
2
3#define PY_SSIZE_T_CLEAN
4#include "Python.h"
5#include "structmember.h"
6#include "bytes_methods.h"
7
8static PyByteArrayObject *nullbytes = NULL;
Antoine Pitroubb667d42010-01-17 12:31:10 +00009char _PyByteArray_empty_string[] = "";
Christian Heimes44720832008-05-26 13:01:01 +000010
11void
12PyByteArray_Fini(void)
13{
14 Py_CLEAR(nullbytes);
15}
16
17int
18PyByteArray_Init(void)
19{
20 nullbytes = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
21 if (nullbytes == NULL)
22 return 0;
23 nullbytes->ob_bytes = NULL;
24 Py_SIZE(nullbytes) = nullbytes->ob_alloc = 0;
25 nullbytes->ob_exports = 0;
26 return 1;
27}
28
29/* end nullbytes support */
30
31/* Helpers */
32
33static int
34_getbytevalue(PyObject* arg, int *value)
35{
36 long face_value;
37
Georg Brandl3e483f62008-07-16 22:57:41 +000038 if (PyBytes_CheckExact(arg)) {
Christian Heimes44720832008-05-26 13:01:01 +000039 if (Py_SIZE(arg) != 1) {
40 PyErr_SetString(PyExc_ValueError, "string must be of size 1");
41 return 0;
42 }
Georg Brandl3e483f62008-07-16 22:57:41 +000043 *value = Py_CHARMASK(((PyBytesObject*)arg)->ob_sval[0]);
44 return 1;
45 }
46 else if (PyInt_Check(arg) || PyLong_Check(arg)) {
47 face_value = PyLong_AsLong(arg);
Christian Heimes44720832008-05-26 13:01:01 +000048 }
49 else {
Georg Brandl3e483f62008-07-16 22:57:41 +000050 PyObject *index = PyNumber_Index(arg);
51 if (index == NULL) {
52 PyErr_Format(PyExc_TypeError,
53 "an integer or string of size 1 is required");
54 return 0;
55 }
56 face_value = PyLong_AsLong(index);
57 Py_DECREF(index);
58 }
Georg Brandl3e483f62008-07-16 22:57:41 +000059
60 if (face_value < 0 || face_value >= 256) {
Georg Brandl3238a3e2008-07-16 23:17:46 +000061 /* this includes the OverflowError in case the long is too large */
Georg Brandl3e483f62008-07-16 22:57:41 +000062 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
Christian Heimes44720832008-05-26 13:01:01 +000063 return 0;
64 }
65
66 *value = face_value;
67 return 1;
68}
69
70static Py_ssize_t
71bytes_buffer_getreadbuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
72{
73 if ( index != 0 ) {
74 PyErr_SetString(PyExc_SystemError,
75 "accessing non-existent bytes segment");
76 return -1;
77 }
Antoine Pitroubb667d42010-01-17 12:31:10 +000078 *ptr = (void *)PyByteArray_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +000079 return Py_SIZE(self);
80}
81
82static Py_ssize_t
83bytes_buffer_getwritebuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
84{
85 if ( index != 0 ) {
86 PyErr_SetString(PyExc_SystemError,
87 "accessing non-existent bytes segment");
88 return -1;
89 }
Antoine Pitroubb667d42010-01-17 12:31:10 +000090 *ptr = (void *)PyByteArray_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +000091 return Py_SIZE(self);
92}
93
94static Py_ssize_t
95bytes_buffer_getsegcount(PyByteArrayObject *self, Py_ssize_t *lenp)
96{
97 if ( lenp )
98 *lenp = Py_SIZE(self);
99 return 1;
100}
101
102static Py_ssize_t
103bytes_buffer_getcharbuf(PyByteArrayObject *self, Py_ssize_t index, const char **ptr)
104{
105 if ( index != 0 ) {
106 PyErr_SetString(PyExc_SystemError,
107 "accessing non-existent bytes segment");
108 return -1;
109 }
Antoine Pitroubb667d42010-01-17 12:31:10 +0000110 *ptr = PyByteArray_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +0000111 return Py_SIZE(self);
112}
113
114static int
115bytes_getbuffer(PyByteArrayObject *obj, Py_buffer *view, int flags)
116{
117 int ret;
118 void *ptr;
119 if (view == NULL) {
120 obj->ob_exports++;
121 return 0;
122 }
Antoine Pitroubb667d42010-01-17 12:31:10 +0000123 ptr = (void *) PyByteArray_AS_STRING(obj);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000124 ret = PyBuffer_FillInfo(view, (PyObject*)obj, ptr, Py_SIZE(obj), 0, flags);
Christian Heimes44720832008-05-26 13:01:01 +0000125 if (ret >= 0) {
126 obj->ob_exports++;
127 }
128 return ret;
129}
130
131static void
132bytes_releasebuffer(PyByteArrayObject *obj, Py_buffer *view)
133{
134 obj->ob_exports--;
135}
136
137static Py_ssize_t
138_getbuffer(PyObject *obj, Py_buffer *view)
139{
140 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
141
142 if (buffer == NULL || buffer->bf_getbuffer == NULL)
143 {
144 PyErr_Format(PyExc_TypeError,
145 "Type %.100s doesn't support the buffer API",
146 Py_TYPE(obj)->tp_name);
147 return -1;
148 }
149
150 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
151 return -1;
152 return view->len;
153}
154
Antoine Pitrou599db7f2008-12-07 00:07:51 +0000155static int
156_canresize(PyByteArrayObject *self)
157{
158 if (self->ob_exports > 0) {
159 PyErr_SetString(PyExc_BufferError,
160 "Existing exports of data: object cannot be re-sized");
161 return 0;
162 }
163 return 1;
164}
165
Christian Heimes44720832008-05-26 13:01:01 +0000166/* Direct API functions */
167
168PyObject *
169PyByteArray_FromObject(PyObject *input)
170{
171 return PyObject_CallFunctionObjArgs((PyObject *)&PyByteArray_Type,
172 input, NULL);
173}
174
175PyObject *
176PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
177{
178 PyByteArrayObject *new;
179 Py_ssize_t alloc;
180
181 if (size < 0) {
182 PyErr_SetString(PyExc_SystemError,
183 "Negative size passed to PyByteArray_FromStringAndSize");
184 return NULL;
185 }
186
187 new = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
188 if (new == NULL)
189 return NULL;
190
191 if (size == 0) {
192 new->ob_bytes = NULL;
193 alloc = 0;
194 }
195 else {
196 alloc = size + 1;
197 new->ob_bytes = PyMem_Malloc(alloc);
198 if (new->ob_bytes == NULL) {
199 Py_DECREF(new);
200 return PyErr_NoMemory();
201 }
Antoine Pitroubb667d42010-01-17 12:31:10 +0000202 if (bytes != NULL && size > 0)
Christian Heimes44720832008-05-26 13:01:01 +0000203 memcpy(new->ob_bytes, bytes, size);
204 new->ob_bytes[size] = '\0'; /* Trailing null byte */
205 }
206 Py_SIZE(new) = size;
207 new->ob_alloc = alloc;
208 new->ob_exports = 0;
209
210 return (PyObject *)new;
211}
212
213Py_ssize_t
214PyByteArray_Size(PyObject *self)
215{
216 assert(self != NULL);
217 assert(PyByteArray_Check(self));
218
219 return PyByteArray_GET_SIZE(self);
220}
221
222char *
223PyByteArray_AsString(PyObject *self)
224{
225 assert(self != NULL);
226 assert(PyByteArray_Check(self));
227
228 return PyByteArray_AS_STRING(self);
229}
230
231int
232PyByteArray_Resize(PyObject *self, Py_ssize_t size)
233{
234 void *sval;
235 Py_ssize_t alloc = ((PyByteArrayObject *)self)->ob_alloc;
236
237 assert(self != NULL);
238 assert(PyByteArray_Check(self));
239 assert(size >= 0);
240
Antoine Pitrou599db7f2008-12-07 00:07:51 +0000241 if (size == Py_SIZE(self)) {
242 return 0;
243 }
244 if (!_canresize((PyByteArrayObject *)self)) {
245 return -1;
246 }
247
Christian Heimes44720832008-05-26 13:01:01 +0000248 if (size < alloc / 2) {
249 /* Major downsize; resize down to exact size */
250 alloc = size + 1;
251 }
252 else if (size < alloc) {
253 /* Within allocated size; quick exit */
254 Py_SIZE(self) = size;
255 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
256 return 0;
257 }
258 else if (size <= alloc * 1.125) {
259 /* Moderate upsize; overallocate similar to list_resize() */
260 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
261 }
262 else {
263 /* Major upsize; resize up to exact size */
264 alloc = size + 1;
265 }
266
Christian Heimes44720832008-05-26 13:01:01 +0000267 sval = PyMem_Realloc(((PyByteArrayObject *)self)->ob_bytes, alloc);
268 if (sval == NULL) {
269 PyErr_NoMemory();
270 return -1;
271 }
272
273 ((PyByteArrayObject *)self)->ob_bytes = sval;
274 Py_SIZE(self) = size;
275 ((PyByteArrayObject *)self)->ob_alloc = alloc;
276 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
277
278 return 0;
279}
280
281PyObject *
282PyByteArray_Concat(PyObject *a, PyObject *b)
283{
284 Py_ssize_t size;
285 Py_buffer va, vb;
286 PyByteArrayObject *result = NULL;
287
288 va.len = -1;
289 vb.len = -1;
290 if (_getbuffer(a, &va) < 0 ||
291 _getbuffer(b, &vb) < 0) {
292 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
293 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
294 goto done;
295 }
296
297 size = va.len + vb.len;
298 if (size < 0) {
299 return PyErr_NoMemory();
300 goto done;
301 }
302
303 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, size);
304 if (result != NULL) {
305 memcpy(result->ob_bytes, va.buf, va.len);
306 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
307 }
308
309 done:
310 if (va.len != -1)
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000311 PyBuffer_Release(&va);
Christian Heimes44720832008-05-26 13:01:01 +0000312 if (vb.len != -1)
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000313 PyBuffer_Release(&vb);
Christian Heimes44720832008-05-26 13:01:01 +0000314 return (PyObject *)result;
315}
316
317/* Functions stuffed into the type object */
318
319static Py_ssize_t
320bytes_length(PyByteArrayObject *self)
321{
322 return Py_SIZE(self);
323}
324
325static PyObject *
326bytes_iconcat(PyByteArrayObject *self, PyObject *other)
327{
328 Py_ssize_t mysize;
329 Py_ssize_t size;
330 Py_buffer vo;
331
332 if (_getbuffer(other, &vo) < 0) {
333 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
334 Py_TYPE(other)->tp_name, Py_TYPE(self)->tp_name);
335 return NULL;
336 }
337
338 mysize = Py_SIZE(self);
339 size = mysize + vo.len;
340 if (size < 0) {
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000341 PyBuffer_Release(&vo);
Christian Heimes44720832008-05-26 13:01:01 +0000342 return PyErr_NoMemory();
343 }
344 if (size < self->ob_alloc) {
345 Py_SIZE(self) = size;
346 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
347 }
348 else if (PyByteArray_Resize((PyObject *)self, size) < 0) {
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000349 PyBuffer_Release(&vo);
Christian Heimes44720832008-05-26 13:01:01 +0000350 return NULL;
351 }
352 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000353 PyBuffer_Release(&vo);
Christian Heimes44720832008-05-26 13:01:01 +0000354 Py_INCREF(self);
355 return (PyObject *)self;
356}
357
358static PyObject *
359bytes_repeat(PyByteArrayObject *self, Py_ssize_t count)
360{
361 PyByteArrayObject *result;
362 Py_ssize_t mysize;
363 Py_ssize_t size;
364
365 if (count < 0)
366 count = 0;
367 mysize = Py_SIZE(self);
368 size = mysize * count;
369 if (count != 0 && size / count != mysize)
370 return PyErr_NoMemory();
371 result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size);
372 if (result != NULL && size != 0) {
373 if (mysize == 1)
374 memset(result->ob_bytes, self->ob_bytes[0], size);
375 else {
376 Py_ssize_t i;
377 for (i = 0; i < count; i++)
378 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
379 }
380 }
381 return (PyObject *)result;
382}
383
384static PyObject *
385bytes_irepeat(PyByteArrayObject *self, Py_ssize_t count)
386{
387 Py_ssize_t mysize;
388 Py_ssize_t size;
389
390 if (count < 0)
391 count = 0;
392 mysize = Py_SIZE(self);
393 size = mysize * count;
394 if (count != 0 && size / count != mysize)
395 return PyErr_NoMemory();
396 if (size < self->ob_alloc) {
397 Py_SIZE(self) = size;
398 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
399 }
400 else if (PyByteArray_Resize((PyObject *)self, size) < 0)
401 return NULL;
402
403 if (mysize == 1)
404 memset(self->ob_bytes, self->ob_bytes[0], size);
405 else {
406 Py_ssize_t i;
407 for (i = 1; i < count; i++)
408 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
409 }
410
411 Py_INCREF(self);
412 return (PyObject *)self;
413}
414
415static PyObject *
416bytes_getitem(PyByteArrayObject *self, Py_ssize_t i)
417{
418 if (i < 0)
419 i += Py_SIZE(self);
420 if (i < 0 || i >= Py_SIZE(self)) {
421 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
422 return NULL;
423 }
424 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
425}
426
427static PyObject *
Georg Brandl3e483f62008-07-16 22:57:41 +0000428bytes_subscript(PyByteArrayObject *self, PyObject *index)
Christian Heimes44720832008-05-26 13:01:01 +0000429{
Georg Brandl3e483f62008-07-16 22:57:41 +0000430 if (PyIndex_Check(index)) {
431 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
Christian Heimes44720832008-05-26 13:01:01 +0000432
433 if (i == -1 && PyErr_Occurred())
434 return NULL;
435
436 if (i < 0)
437 i += PyByteArray_GET_SIZE(self);
438
439 if (i < 0 || i >= Py_SIZE(self)) {
440 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
441 return NULL;
442 }
443 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
444 }
Georg Brandl3e483f62008-07-16 22:57:41 +0000445 else if (PySlice_Check(index)) {
Christian Heimes44720832008-05-26 13:01:01 +0000446 Py_ssize_t start, stop, step, slicelength, cur, i;
Georg Brandl3e483f62008-07-16 22:57:41 +0000447 if (PySlice_GetIndicesEx((PySliceObject *)index,
Christian Heimes44720832008-05-26 13:01:01 +0000448 PyByteArray_GET_SIZE(self),
449 &start, &stop, &step, &slicelength) < 0) {
450 return NULL;
451 }
452
453 if (slicelength <= 0)
454 return PyByteArray_FromStringAndSize("", 0);
455 else if (step == 1) {
456 return PyByteArray_FromStringAndSize(self->ob_bytes + start,
457 slicelength);
458 }
459 else {
460 char *source_buf = PyByteArray_AS_STRING(self);
461 char *result_buf = (char *)PyMem_Malloc(slicelength);
462 PyObject *result;
463
464 if (result_buf == NULL)
465 return PyErr_NoMemory();
466
467 for (cur = start, i = 0; i < slicelength;
468 cur += step, i++) {
469 result_buf[i] = source_buf[cur];
470 }
471 result = PyByteArray_FromStringAndSize(result_buf, slicelength);
472 PyMem_Free(result_buf);
473 return result;
474 }
475 }
476 else {
477 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integers");
478 return NULL;
479 }
480}
481
482static int
483bytes_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi,
484 PyObject *values)
485{
486 Py_ssize_t avail, needed;
487 void *bytes;
488 Py_buffer vbytes;
489 int res = 0;
490
491 vbytes.len = -1;
492 if (values == (PyObject *)self) {
493 /* Make a copy and call this function recursively */
494 int err;
495 values = PyByteArray_FromObject(values);
496 if (values == NULL)
497 return -1;
498 err = bytes_setslice(self, lo, hi, values);
499 Py_DECREF(values);
500 return err;
501 }
502 if (values == NULL) {
503 /* del b[lo:hi] */
504 bytes = NULL;
505 needed = 0;
506 }
507 else {
508 if (_getbuffer(values, &vbytes) < 0) {
509 PyErr_Format(PyExc_TypeError,
Neal Norwitzc86b54c2008-07-20 19:35:23 +0000510 "can't set bytearray slice from %.100s",
Christian Heimes44720832008-05-26 13:01:01 +0000511 Py_TYPE(values)->tp_name);
512 return -1;
513 }
514 needed = vbytes.len;
515 bytes = vbytes.buf;
516 }
517
518 if (lo < 0)
519 lo = 0;
520 if (hi < lo)
521 hi = lo;
522 if (hi > Py_SIZE(self))
523 hi = Py_SIZE(self);
524
525 avail = hi - lo;
526 if (avail < 0)
527 lo = hi = avail = 0;
528
529 if (avail != needed) {
530 if (avail > needed) {
Antoine Pitrou599db7f2008-12-07 00:07:51 +0000531 if (!_canresize(self)) {
532 res = -1;
533 goto finish;
534 }
Christian Heimes44720832008-05-26 13:01:01 +0000535 /*
536 0 lo hi old_size
537 | |<----avail----->|<-----tomove------>|
538 | |<-needed->|<-----tomove------>|
539 0 lo new_hi new_size
540 */
541 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
542 Py_SIZE(self) - hi);
543 }
544 /* XXX(nnorwitz): need to verify this can't overflow! */
545 if (PyByteArray_Resize((PyObject *)self,
546 Py_SIZE(self) + needed - avail) < 0) {
547 res = -1;
548 goto finish;
549 }
550 if (avail < needed) {
551 /*
552 0 lo hi old_size
553 | |<-avail->|<-----tomove------>|
554 | |<----needed---->|<-----tomove------>|
555 0 lo new_hi new_size
556 */
557 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
558 Py_SIZE(self) - lo - needed);
559 }
560 }
561
562 if (needed > 0)
563 memcpy(self->ob_bytes + lo, bytes, needed);
564
565
566 finish:
567 if (vbytes.len != -1)
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000568 PyBuffer_Release(&vbytes);
Christian Heimes44720832008-05-26 13:01:01 +0000569 return res;
570}
571
572static int
573bytes_setitem(PyByteArrayObject *self, Py_ssize_t i, PyObject *value)
574{
575 int ival;
576
577 if (i < 0)
578 i += Py_SIZE(self);
579
580 if (i < 0 || i >= Py_SIZE(self)) {
581 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
582 return -1;
583 }
584
585 if (value == NULL)
586 return bytes_setslice(self, i, i+1, NULL);
587
588 if (!_getbytevalue(value, &ival))
589 return -1;
590
591 self->ob_bytes[i] = ival;
592 return 0;
593}
594
595static int
Georg Brandl3e483f62008-07-16 22:57:41 +0000596bytes_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *values)
Christian Heimes44720832008-05-26 13:01:01 +0000597{
598 Py_ssize_t start, stop, step, slicelen, needed;
599 char *bytes;
600
Georg Brandl3e483f62008-07-16 22:57:41 +0000601 if (PyIndex_Check(index)) {
602 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
Christian Heimes44720832008-05-26 13:01:01 +0000603
604 if (i == -1 && PyErr_Occurred())
605 return -1;
606
607 if (i < 0)
608 i += PyByteArray_GET_SIZE(self);
609
610 if (i < 0 || i >= Py_SIZE(self)) {
611 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
612 return -1;
613 }
614
615 if (values == NULL) {
616 /* Fall through to slice assignment */
617 start = i;
618 stop = i + 1;
619 step = 1;
620 slicelen = 1;
621 }
622 else {
Georg Brandl3e483f62008-07-16 22:57:41 +0000623 int ival;
624 if (!_getbytevalue(values, &ival))
Christian Heimes44720832008-05-26 13:01:01 +0000625 return -1;
Christian Heimes44720832008-05-26 13:01:01 +0000626 self->ob_bytes[i] = (char)ival;
627 return 0;
628 }
629 }
Georg Brandl3e483f62008-07-16 22:57:41 +0000630 else if (PySlice_Check(index)) {
631 if (PySlice_GetIndicesEx((PySliceObject *)index,
Christian Heimes44720832008-05-26 13:01:01 +0000632 PyByteArray_GET_SIZE(self),
633 &start, &stop, &step, &slicelen) < 0) {
634 return -1;
635 }
636 }
637 else {
638 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integer");
639 return -1;
640 }
641
642 if (values == NULL) {
643 bytes = NULL;
644 needed = 0;
645 }
646 else if (values == (PyObject *)self || !PyByteArray_Check(values)) {
647 /* Make a copy an call this function recursively */
648 int err;
649 values = PyByteArray_FromObject(values);
650 if (values == NULL)
651 return -1;
Georg Brandl3e483f62008-07-16 22:57:41 +0000652 err = bytes_ass_subscript(self, index, values);
Christian Heimes44720832008-05-26 13:01:01 +0000653 Py_DECREF(values);
654 return err;
655 }
656 else {
657 assert(PyByteArray_Check(values));
658 bytes = ((PyByteArrayObject *)values)->ob_bytes;
659 needed = Py_SIZE(values);
660 }
661 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
662 if ((step < 0 && start < stop) ||
663 (step > 0 && start > stop))
664 stop = start;
665 if (step == 1) {
666 if (slicelen != needed) {
Antoine Pitrou599db7f2008-12-07 00:07:51 +0000667 if (!_canresize(self))
668 return -1;
Christian Heimes44720832008-05-26 13:01:01 +0000669 if (slicelen > needed) {
670 /*
671 0 start stop old_size
672 | |<---slicelen--->|<-----tomove------>|
673 | |<-needed->|<-----tomove------>|
674 0 lo new_hi new_size
675 */
676 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
677 Py_SIZE(self) - stop);
678 }
679 if (PyByteArray_Resize((PyObject *)self,
680 Py_SIZE(self) + needed - slicelen) < 0)
681 return -1;
682 if (slicelen < needed) {
683 /*
684 0 lo hi old_size
685 | |<-avail->|<-----tomove------>|
686 | |<----needed---->|<-----tomove------>|
687 0 lo new_hi new_size
688 */
689 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
690 Py_SIZE(self) - start - needed);
691 }
692 }
693
694 if (needed > 0)
695 memcpy(self->ob_bytes + start, bytes, needed);
696
697 return 0;
698 }
699 else {
700 if (needed == 0) {
701 /* Delete slice */
Mark Dickinson02733542010-01-29 17:16:18 +0000702 size_t cur;
703 Py_ssize_t i;
Christian Heimes44720832008-05-26 13:01:01 +0000704
Antoine Pitrou599db7f2008-12-07 00:07:51 +0000705 if (!_canresize(self))
706 return -1;
Christian Heimes44720832008-05-26 13:01:01 +0000707 if (step < 0) {
708 stop = start + 1;
709 start = stop + step * (slicelen - 1) - 1;
710 step = -step;
711 }
712 for (cur = start, i = 0;
713 i < slicelen; cur += step, i++) {
714 Py_ssize_t lim = step - 1;
715
Mark Dickinsona9209612010-02-14 13:08:35 +0000716 if (cur + step >= (size_t)PyByteArray_GET_SIZE(self))
Christian Heimes44720832008-05-26 13:01:01 +0000717 lim = PyByteArray_GET_SIZE(self) - cur - 1;
718
719 memmove(self->ob_bytes + cur - i,
720 self->ob_bytes + cur + 1, lim);
721 }
722 /* Move the tail of the bytes, in one chunk */
723 cur = start + slicelen*step;
Mark Dickinsona9209612010-02-14 13:08:35 +0000724 if (cur < (size_t)PyByteArray_GET_SIZE(self)) {
Christian Heimes44720832008-05-26 13:01:01 +0000725 memmove(self->ob_bytes + cur - slicelen,
726 self->ob_bytes + cur,
727 PyByteArray_GET_SIZE(self) - cur);
728 }
729 if (PyByteArray_Resize((PyObject *)self,
730 PyByteArray_GET_SIZE(self) - slicelen) < 0)
731 return -1;
732
733 return 0;
734 }
735 else {
736 /* Assign slice */
737 Py_ssize_t cur, i;
738
739 if (needed != slicelen) {
740 PyErr_Format(PyExc_ValueError,
741 "attempt to assign bytes of size %zd "
742 "to extended slice of size %zd",
743 needed, slicelen);
744 return -1;
745 }
746 for (cur = start, i = 0; i < slicelen; cur += step, i++)
747 self->ob_bytes[cur] = bytes[i];
748 return 0;
749 }
750 }
751}
752
753static int
754bytes_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
755{
756 static char *kwlist[] = {"source", "encoding", "errors", 0};
757 PyObject *arg = NULL;
758 const char *encoding = NULL;
759 const char *errors = NULL;
760 Py_ssize_t count;
761 PyObject *it;
762 PyObject *(*iternext)(PyObject *);
763
764 if (Py_SIZE(self) != 0) {
765 /* Empty previous contents (yes, do this first of all!) */
766 if (PyByteArray_Resize((PyObject *)self, 0) < 0)
767 return -1;
768 }
769
770 /* Parse arguments */
Neal Norwitzc86b54c2008-07-20 19:35:23 +0000771 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytearray", kwlist,
Christian Heimes44720832008-05-26 13:01:01 +0000772 &arg, &encoding, &errors))
773 return -1;
774
775 /* Make a quick exit if no first argument */
776 if (arg == NULL) {
777 if (encoding != NULL || errors != NULL) {
778 PyErr_SetString(PyExc_TypeError,
779 "encoding or errors without sequence argument");
780 return -1;
781 }
782 return 0;
783 }
784
785 if (PyBytes_Check(arg)) {
786 PyObject *new, *encoded;
787 if (encoding != NULL) {
788 encoded = PyCodec_Encode(arg, encoding, errors);
789 if (encoded == NULL)
790 return -1;
791 assert(PyBytes_Check(encoded));
792 }
793 else {
794 encoded = arg;
795 Py_INCREF(arg);
796 }
797 new = bytes_iconcat(self, arg);
798 Py_DECREF(encoded);
799 if (new == NULL)
800 return -1;
801 Py_DECREF(new);
802 return 0;
803 }
804
805 if (PyUnicode_Check(arg)) {
806 /* Encode via the codec registry */
807 PyObject *encoded, *new;
808 if (encoding == NULL) {
809 PyErr_SetString(PyExc_TypeError,
810 "unicode argument without an encoding");
811 return -1;
812 }
813 encoded = PyCodec_Encode(arg, encoding, errors);
814 if (encoded == NULL)
815 return -1;
816 assert(PyBytes_Check(encoded));
817 new = bytes_iconcat(self, encoded);
818 Py_DECREF(encoded);
819 if (new == NULL)
820 return -1;
821 Py_DECREF(new);
822 return 0;
823 }
824
825 /* If it's not unicode, there can't be encoding or errors */
826 if (encoding != NULL || errors != NULL) {
827 PyErr_SetString(PyExc_TypeError,
828 "encoding or errors without a string argument");
829 return -1;
830 }
831
832 /* Is it an int? */
833 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
834 if (count == -1 && PyErr_Occurred())
835 PyErr_Clear();
836 else {
837 if (count < 0) {
838 PyErr_SetString(PyExc_ValueError, "negative count");
839 return -1;
840 }
841 if (count > 0) {
842 if (PyByteArray_Resize((PyObject *)self, count))
843 return -1;
844 memset(self->ob_bytes, 0, count);
845 }
846 return 0;
847 }
848
849 /* Use the buffer API */
850 if (PyObject_CheckBuffer(arg)) {
851 Py_ssize_t size;
852 Py_buffer view;
853 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
854 return -1;
855 size = view.len;
856 if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail;
857 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
858 goto fail;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000859 PyBuffer_Release(&view);
Christian Heimes44720832008-05-26 13:01:01 +0000860 return 0;
861 fail:
Martin v. Löwisf91d46a2008-08-12 14:49:50 +0000862 PyBuffer_Release(&view);
Christian Heimes44720832008-05-26 13:01:01 +0000863 return -1;
864 }
865
866 /* XXX Optimize this if the arguments is a list, tuple */
867
868 /* Get the iterator */
869 it = PyObject_GetIter(arg);
870 if (it == NULL)
871 return -1;
872 iternext = *Py_TYPE(it)->tp_iternext;
873
874 /* Run the iterator to exhaustion */
875 for (;;) {
876 PyObject *item;
Georg Brandl3e758462008-07-16 23:10:05 +0000877 int rc, value;
Christian Heimes44720832008-05-26 13:01:01 +0000878
879 /* Get the next item */
880 item = iternext(it);
881 if (item == NULL) {
882 if (PyErr_Occurred()) {
883 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
884 goto error;
885 PyErr_Clear();
886 }
887 break;
888 }
889
890 /* Interpret it as an int (__index__) */
Georg Brandl3e758462008-07-16 23:10:05 +0000891 rc = _getbytevalue(item, &value);
Christian Heimes44720832008-05-26 13:01:01 +0000892 Py_DECREF(item);
Georg Brandl3e758462008-07-16 23:10:05 +0000893 if (!rc)
Christian Heimes44720832008-05-26 13:01:01 +0000894 goto error;
895
Christian Heimes44720832008-05-26 13:01:01 +0000896 /* Append the byte */
897 if (Py_SIZE(self) < self->ob_alloc)
898 Py_SIZE(self)++;
899 else if (PyByteArray_Resize((PyObject *)self, Py_SIZE(self)+1) < 0)
900 goto error;
901 self->ob_bytes[Py_SIZE(self)-1] = value;
902 }
903
904 /* Clean up and return success */
905 Py_DECREF(it);
906 return 0;
907
908 error:
909 /* Error handling when it != NULL */
910 Py_DECREF(it);
911 return -1;
912}
913
914/* Mostly copied from string_repr, but without the
915 "smart quote" functionality. */
916static PyObject *
917bytes_repr(PyByteArrayObject *self)
918{
919 static const char *hexdigits = "0123456789abcdef";
920 const char *quote_prefix = "bytearray(b";
921 const char *quote_postfix = ")";
922 Py_ssize_t length = Py_SIZE(self);
923 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
Mark Dickinsona9209612010-02-14 13:08:35 +0000924 size_t newsize;
Christian Heimes44720832008-05-26 13:01:01 +0000925 PyObject *v;
Mark Dickinsona9209612010-02-14 13:08:35 +0000926 if (length > (PY_SSIZE_T_MAX - 14) / 4) {
Christian Heimes44720832008-05-26 13:01:01 +0000927 PyErr_SetString(PyExc_OverflowError,
928 "bytearray object is too large to make repr");
929 return NULL;
930 }
Mark Dickinsona9209612010-02-14 13:08:35 +0000931 newsize = 14 + 4 * length;
Christian Heimes44720832008-05-26 13:01:01 +0000932 v = PyUnicode_FromUnicode(NULL, newsize);
933 if (v == NULL) {
934 return NULL;
935 }
936 else {
937 register Py_ssize_t i;
938 register Py_UNICODE c;
939 register Py_UNICODE *p;
940 int quote;
941
942 /* Figure out which quote to use; single is preferred */
943 quote = '\'';
944 {
945 char *test, *start;
946 start = PyByteArray_AS_STRING(self);
947 for (test = start; test < start+length; ++test) {
948 if (*test == '"') {
949 quote = '\''; /* back to single */
950 goto decided;
951 }
952 else if (*test == '\'')
953 quote = '"';
954 }
955 decided:
956 ;
957 }
958
959 p = PyUnicode_AS_UNICODE(v);
960 while (*quote_prefix)
961 *p++ = *quote_prefix++;
962 *p++ = quote;
963
964 for (i = 0; i < length; i++) {
965 /* There's at least enough room for a hex escape
966 and a closing quote. */
967 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
968 c = self->ob_bytes[i];
969 if (c == '\'' || c == '\\')
970 *p++ = '\\', *p++ = c;
971 else if (c == '\t')
972 *p++ = '\\', *p++ = 't';
973 else if (c == '\n')
974 *p++ = '\\', *p++ = 'n';
975 else if (c == '\r')
976 *p++ = '\\', *p++ = 'r';
977 else if (c == 0)
978 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
979 else if (c < ' ' || c >= 0x7f) {
980 *p++ = '\\';
981 *p++ = 'x';
982 *p++ = hexdigits[(c & 0xf0) >> 4];
983 *p++ = hexdigits[c & 0xf];
984 }
985 else
986 *p++ = c;
987 }
988 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
989 *p++ = quote;
990 while (*quote_postfix) {
991 *p++ = *quote_postfix++;
992 }
993 *p = '\0';
994 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
995 Py_DECREF(v);
996 return NULL;
997 }
998 return v;
999 }
1000}
1001
1002static PyObject *
1003bytes_str(PyObject *op)
1004{
1005#if 0
1006 if (Py_BytesWarningFlag) {
1007 if (PyErr_WarnEx(PyExc_BytesWarning,
1008 "str() on a bytearray instance", 1))
1009 return NULL;
1010 }
1011 return bytes_repr((PyByteArrayObject*)op);
1012#endif
1013 return PyBytes_FromStringAndSize(((PyByteArrayObject*)op)->ob_bytes, Py_SIZE(op));
1014}
1015
1016static PyObject *
1017bytes_richcompare(PyObject *self, PyObject *other, int op)
1018{
1019 Py_ssize_t self_size, other_size;
1020 Py_buffer self_bytes, other_bytes;
1021 PyObject *res;
1022 Py_ssize_t minsize;
1023 int cmp;
1024
1025 /* Bytes can be compared to anything that supports the (binary)
1026 buffer API. Except that a comparison with Unicode is always an
1027 error, even if the comparison is for equality. */
1028 if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
1029 PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
1030 if (Py_BytesWarningFlag && op == Py_EQ) {
1031 if (PyErr_WarnEx(PyExc_BytesWarning,
Ezio Melotti262c3ce2010-01-14 11:39:50 +00001032 "Comparison between bytearray and string", 1))
Christian Heimes44720832008-05-26 13:01:01 +00001033 return NULL;
1034 }
1035
1036 Py_INCREF(Py_NotImplemented);
1037 return Py_NotImplemented;
1038 }
1039
1040 self_size = _getbuffer(self, &self_bytes);
1041 if (self_size < 0) {
1042 PyErr_Clear();
1043 Py_INCREF(Py_NotImplemented);
1044 return Py_NotImplemented;
1045 }
1046
1047 other_size = _getbuffer(other, &other_bytes);
1048 if (other_size < 0) {
1049 PyErr_Clear();
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001050 PyBuffer_Release(&self_bytes);
Christian Heimes44720832008-05-26 13:01:01 +00001051 Py_INCREF(Py_NotImplemented);
1052 return Py_NotImplemented;
1053 }
1054
1055 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
1056 /* Shortcut: if the lengths differ, the objects differ */
1057 cmp = (op == Py_NE);
1058 }
1059 else {
1060 minsize = self_size;
1061 if (other_size < minsize)
1062 minsize = other_size;
1063
1064 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
1065 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1066
1067 if (cmp == 0) {
1068 if (self_size < other_size)
1069 cmp = -1;
1070 else if (self_size > other_size)
1071 cmp = 1;
1072 }
1073
1074 switch (op) {
1075 case Py_LT: cmp = cmp < 0; break;
1076 case Py_LE: cmp = cmp <= 0; break;
1077 case Py_EQ: cmp = cmp == 0; break;
1078 case Py_NE: cmp = cmp != 0; break;
1079 case Py_GT: cmp = cmp > 0; break;
1080 case Py_GE: cmp = cmp >= 0; break;
1081 }
1082 }
1083
1084 res = cmp ? Py_True : Py_False;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001085 PyBuffer_Release(&self_bytes);
1086 PyBuffer_Release(&other_bytes);
Christian Heimes44720832008-05-26 13:01:01 +00001087 Py_INCREF(res);
1088 return res;
1089}
1090
1091static void
1092bytes_dealloc(PyByteArrayObject *self)
1093{
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001094 if (self->ob_exports > 0) {
1095 PyErr_SetString(PyExc_SystemError,
Georg Brandle9b91212009-04-05 21:26:31 +00001096 "deallocated bytearray object has exported buffers");
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001097 PyErr_Print();
1098 }
Christian Heimes44720832008-05-26 13:01:01 +00001099 if (self->ob_bytes != 0) {
1100 PyMem_Free(self->ob_bytes);
1101 }
1102 Py_TYPE(self)->tp_free((PyObject *)self);
1103}
1104
1105
1106/* -------------------------------------------------------------------- */
1107/* Methods */
1108
1109#define STRINGLIB_CHAR char
1110#define STRINGLIB_CMP memcmp
1111#define STRINGLIB_LEN PyByteArray_GET_SIZE
1112#define STRINGLIB_STR PyByteArray_AS_STRING
1113#define STRINGLIB_NEW PyByteArray_FromStringAndSize
1114#define STRINGLIB_EMPTY nullbytes
1115#define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
1116#define STRINGLIB_MUTABLE 1
Christian Heimes7d4c3172008-08-22 19:47:25 +00001117#define FROM_BYTEARRAY 1
Christian Heimes44720832008-05-26 13:01:01 +00001118
1119#include "stringlib/fastsearch.h"
1120#include "stringlib/count.h"
1121#include "stringlib/find.h"
1122#include "stringlib/partition.h"
1123#include "stringlib/ctype.h"
1124#include "stringlib/transmogrify.h"
1125
1126
1127/* The following Py_LOCAL_INLINE and Py_LOCAL functions
1128were copied from the old char* style string object. */
1129
1130Py_LOCAL_INLINE(void)
1131_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1132{
1133 if (*end > len)
1134 *end = len;
1135 else if (*end < 0)
1136 *end += len;
1137 if (*end < 0)
1138 *end = 0;
1139 if (*start < 0)
1140 *start += len;
1141 if (*start < 0)
1142 *start = 0;
1143}
1144
1145
1146Py_LOCAL_INLINE(Py_ssize_t)
1147bytes_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
1148{
1149 PyObject *subobj;
1150 Py_buffer subbuf;
1151 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1152 Py_ssize_t res;
1153
1154 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1155 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1156 return -2;
1157 if (_getbuffer(subobj, &subbuf) < 0)
1158 return -2;
1159 if (dir > 0)
1160 res = stringlib_find_slice(
1161 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1162 subbuf.buf, subbuf.len, start, end);
1163 else
1164 res = stringlib_rfind_slice(
1165 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1166 subbuf.buf, subbuf.len, start, end);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001167 PyBuffer_Release(&subbuf);
Christian Heimes44720832008-05-26 13:01:01 +00001168 return res;
1169}
1170
1171PyDoc_STRVAR(find__doc__,
1172"B.find(sub [,start [,end]]) -> int\n\
1173\n\
1174Return the lowest index in B where subsection sub is found,\n\
1175such that sub is contained within s[start,end]. Optional\n\
1176arguments start and end are interpreted as in slice notation.\n\
1177\n\
1178Return -1 on failure.");
1179
1180static PyObject *
1181bytes_find(PyByteArrayObject *self, PyObject *args)
1182{
1183 Py_ssize_t result = bytes_find_internal(self, args, +1);
1184 if (result == -2)
1185 return NULL;
1186 return PyInt_FromSsize_t(result);
1187}
1188
1189PyDoc_STRVAR(count__doc__,
1190"B.count(sub [,start [,end]]) -> int\n\
1191\n\
1192Return the number of non-overlapping occurrences of subsection sub in\n\
1193bytes B[start:end]. Optional arguments start and end are interpreted\n\
1194as in slice notation.");
1195
1196static PyObject *
1197bytes_count(PyByteArrayObject *self, PyObject *args)
1198{
1199 PyObject *sub_obj;
1200 const char *str = PyByteArray_AS_STRING(self);
1201 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1202 Py_buffer vsub;
1203 PyObject *count_obj;
1204
1205 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1206 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1207 return NULL;
1208
1209 if (_getbuffer(sub_obj, &vsub) < 0)
1210 return NULL;
1211
1212 _adjust_indices(&start, &end, PyByteArray_GET_SIZE(self));
1213
1214 count_obj = PyInt_FromSsize_t(
1215 stringlib_count(str + start, end - start, vsub.buf, vsub.len)
1216 );
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001217 PyBuffer_Release(&vsub);
Christian Heimes44720832008-05-26 13:01:01 +00001218 return count_obj;
1219}
1220
1221
1222PyDoc_STRVAR(index__doc__,
1223"B.index(sub [,start [,end]]) -> int\n\
1224\n\
1225Like B.find() but raise ValueError when the subsection is not found.");
1226
1227static PyObject *
1228bytes_index(PyByteArrayObject *self, PyObject *args)
1229{
1230 Py_ssize_t result = bytes_find_internal(self, args, +1);
1231 if (result == -2)
1232 return NULL;
1233 if (result == -1) {
1234 PyErr_SetString(PyExc_ValueError,
1235 "subsection not found");
1236 return NULL;
1237 }
1238 return PyInt_FromSsize_t(result);
1239}
1240
1241
1242PyDoc_STRVAR(rfind__doc__,
1243"B.rfind(sub [,start [,end]]) -> int\n\
1244\n\
1245Return the highest index in B where subsection sub is found,\n\
1246such that sub is contained within s[start,end]. Optional\n\
1247arguments start and end are interpreted as in slice notation.\n\
1248\n\
1249Return -1 on failure.");
1250
1251static PyObject *
1252bytes_rfind(PyByteArrayObject *self, PyObject *args)
1253{
1254 Py_ssize_t result = bytes_find_internal(self, args, -1);
1255 if (result == -2)
1256 return NULL;
1257 return PyInt_FromSsize_t(result);
1258}
1259
1260
1261PyDoc_STRVAR(rindex__doc__,
1262"B.rindex(sub [,start [,end]]) -> int\n\
1263\n\
1264Like B.rfind() but raise ValueError when the subsection is not found.");
1265
1266static PyObject *
1267bytes_rindex(PyByteArrayObject *self, PyObject *args)
1268{
1269 Py_ssize_t result = bytes_find_internal(self, args, -1);
1270 if (result == -2)
1271 return NULL;
1272 if (result == -1) {
1273 PyErr_SetString(PyExc_ValueError,
1274 "subsection not found");
1275 return NULL;
1276 }
1277 return PyInt_FromSsize_t(result);
1278}
1279
1280
1281static int
1282bytes_contains(PyObject *self, PyObject *arg)
1283{
1284 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1285 if (ival == -1 && PyErr_Occurred()) {
1286 Py_buffer varg;
1287 int pos;
1288 PyErr_Clear();
1289 if (_getbuffer(arg, &varg) < 0)
1290 return -1;
1291 pos = stringlib_find(PyByteArray_AS_STRING(self), Py_SIZE(self),
1292 varg.buf, varg.len, 0);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001293 PyBuffer_Release(&varg);
Christian Heimes44720832008-05-26 13:01:01 +00001294 return pos >= 0;
1295 }
1296 if (ival < 0 || ival >= 256) {
1297 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1298 return -1;
1299 }
1300
1301 return memchr(PyByteArray_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
1302}
1303
1304
1305/* Matches the end (direction >= 0) or start (direction < 0) of self
1306 * against substr, using the start and end arguments. Returns
1307 * -1 on error, 0 if not found and 1 if found.
1308 */
1309Py_LOCAL(int)
1310_bytes_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start,
1311 Py_ssize_t end, int direction)
1312{
1313 Py_ssize_t len = PyByteArray_GET_SIZE(self);
1314 const char* str;
1315 Py_buffer vsubstr;
1316 int rv = 0;
1317
1318 str = PyByteArray_AS_STRING(self);
1319
1320 if (_getbuffer(substr, &vsubstr) < 0)
1321 return -1;
1322
1323 _adjust_indices(&start, &end, len);
1324
1325 if (direction < 0) {
1326 /* startswith */
1327 if (start+vsubstr.len > len) {
1328 goto done;
1329 }
1330 } else {
1331 /* endswith */
1332 if (end-start < vsubstr.len || start > len) {
1333 goto done;
1334 }
1335
1336 if (end-vsubstr.len > start)
1337 start = end - vsubstr.len;
1338 }
1339 if (end-start >= vsubstr.len)
1340 rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1341
1342done:
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001343 PyBuffer_Release(&vsubstr);
Christian Heimes44720832008-05-26 13:01:01 +00001344 return rv;
1345}
1346
1347
1348PyDoc_STRVAR(startswith__doc__,
1349"B.startswith(prefix [,start [,end]]) -> bool\n\
1350\n\
1351Return True if B starts with the specified prefix, False otherwise.\n\
1352With optional start, test B beginning at that position.\n\
1353With optional end, stop comparing B at that position.\n\
1354prefix can also be a tuple of strings to try.");
1355
1356static PyObject *
1357bytes_startswith(PyByteArrayObject *self, PyObject *args)
1358{
1359 Py_ssize_t start = 0;
1360 Py_ssize_t end = PY_SSIZE_T_MAX;
1361 PyObject *subobj;
1362 int result;
1363
1364 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1365 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1366 return NULL;
1367 if (PyTuple_Check(subobj)) {
1368 Py_ssize_t i;
1369 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1370 result = _bytes_tailmatch(self,
1371 PyTuple_GET_ITEM(subobj, i),
1372 start, end, -1);
1373 if (result == -1)
1374 return NULL;
1375 else if (result) {
1376 Py_RETURN_TRUE;
1377 }
1378 }
1379 Py_RETURN_FALSE;
1380 }
1381 result = _bytes_tailmatch(self, subobj, start, end, -1);
1382 if (result == -1)
1383 return NULL;
1384 else
1385 return PyBool_FromLong(result);
1386}
1387
1388PyDoc_STRVAR(endswith__doc__,
1389"B.endswith(suffix [,start [,end]]) -> bool\n\
1390\n\
1391Return True if B ends with the specified suffix, False otherwise.\n\
1392With optional start, test B beginning at that position.\n\
1393With optional end, stop comparing B at that position.\n\
1394suffix can also be a tuple of strings to try.");
1395
1396static PyObject *
1397bytes_endswith(PyByteArrayObject *self, PyObject *args)
1398{
1399 Py_ssize_t start = 0;
1400 Py_ssize_t end = PY_SSIZE_T_MAX;
1401 PyObject *subobj;
1402 int result;
1403
1404 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1405 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1406 return NULL;
1407 if (PyTuple_Check(subobj)) {
1408 Py_ssize_t i;
1409 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1410 result = _bytes_tailmatch(self,
1411 PyTuple_GET_ITEM(subobj, i),
1412 start, end, +1);
1413 if (result == -1)
1414 return NULL;
1415 else if (result) {
1416 Py_RETURN_TRUE;
1417 }
1418 }
1419 Py_RETURN_FALSE;
1420 }
1421 result = _bytes_tailmatch(self, subobj, start, end, +1);
1422 if (result == -1)
1423 return NULL;
1424 else
1425 return PyBool_FromLong(result);
1426}
1427
1428
1429PyDoc_STRVAR(translate__doc__,
1430"B.translate(table[, deletechars]) -> bytearray\n\
1431\n\
1432Return a copy of B, where all characters occurring in the\n\
1433optional argument deletechars are removed, and the remaining\n\
1434characters have been mapped through the given translation\n\
1435table, which must be a bytes object of length 256.");
1436
1437static PyObject *
1438bytes_translate(PyByteArrayObject *self, PyObject *args)
1439{
1440 register char *input, *output;
1441 register const char *table;
Benjamin Peterson866eba92008-11-19 22:05:53 +00001442 register Py_ssize_t i, c;
Christian Heimes44720832008-05-26 13:01:01 +00001443 PyObject *input_obj = (PyObject*)self;
1444 const char *output_start;
1445 Py_ssize_t inlen;
1446 PyObject *result;
1447 int trans_table[256];
1448 PyObject *tableobj, *delobj = NULL;
1449 Py_buffer vtable, vdel;
1450
1451 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1452 &tableobj, &delobj))
1453 return NULL;
1454
1455 if (_getbuffer(tableobj, &vtable) < 0)
1456 return NULL;
1457
1458 if (vtable.len != 256) {
1459 PyErr_SetString(PyExc_ValueError,
1460 "translation table must be 256 characters long");
Georg Brandl11a81b22009-07-22 12:03:09 +00001461 PyBuffer_Release(&vtable);
1462 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001463 }
1464
1465 if (delobj != NULL) {
1466 if (_getbuffer(delobj, &vdel) < 0) {
Georg Brandl11a81b22009-07-22 12:03:09 +00001467 PyBuffer_Release(&vtable);
1468 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001469 }
1470 }
1471 else {
1472 vdel.buf = NULL;
1473 vdel.len = 0;
1474 }
1475
1476 table = (const char *)vtable.buf;
1477 inlen = PyByteArray_GET_SIZE(input_obj);
1478 result = PyByteArray_FromStringAndSize((char *)NULL, inlen);
1479 if (result == NULL)
1480 goto done;
1481 output_start = output = PyByteArray_AsString(result);
1482 input = PyByteArray_AS_STRING(input_obj);
1483
1484 if (vdel.len == 0) {
1485 /* If no deletions are required, use faster code */
1486 for (i = inlen; --i >= 0; ) {
1487 c = Py_CHARMASK(*input++);
Benjamin Peterson866eba92008-11-19 22:05:53 +00001488 *output++ = table[c];
Christian Heimes44720832008-05-26 13:01:01 +00001489 }
Christian Heimes44720832008-05-26 13:01:01 +00001490 goto done;
1491 }
Antoine Pitrou599db7f2008-12-07 00:07:51 +00001492
Christian Heimes44720832008-05-26 13:01:01 +00001493 for (i = 0; i < 256; i++)
1494 trans_table[i] = Py_CHARMASK(table[i]);
1495
1496 for (i = 0; i < vdel.len; i++)
1497 trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
1498
1499 for (i = inlen; --i >= 0; ) {
1500 c = Py_CHARMASK(*input++);
1501 if (trans_table[c] != -1)
1502 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1503 continue;
Christian Heimes44720832008-05-26 13:01:01 +00001504 }
1505 /* Fix the size of the resulting string */
1506 if (inlen > 0)
1507 PyByteArray_Resize(result, output - output_start);
1508
1509done:
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001510 PyBuffer_Release(&vtable);
Christian Heimes44720832008-05-26 13:01:01 +00001511 if (delobj != NULL)
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001512 PyBuffer_Release(&vdel);
Christian Heimes44720832008-05-26 13:01:01 +00001513 return result;
1514}
1515
1516
1517#define FORWARD 1
1518#define REVERSE -1
1519
1520/* find and count characters and substrings */
1521
1522#define findchar(target, target_len, c) \
1523 ((char *)memchr((const void *)(target), c, target_len))
1524
1525/* Don't call if length < 2 */
1526#define Py_STRING_MATCH(target, offset, pattern, length) \
1527 (target[offset] == pattern[0] && \
1528 target[offset+length-1] == pattern[length-1] && \
1529 !memcmp(target+offset+1, pattern+1, length-2) )
1530
1531
Benjamin Peterson866eba92008-11-19 22:05:53 +00001532/* Bytes ops must return a string, create a copy */
Christian Heimes44720832008-05-26 13:01:01 +00001533Py_LOCAL(PyByteArrayObject *)
1534return_self(PyByteArrayObject *self)
1535{
Christian Heimes44720832008-05-26 13:01:01 +00001536 return (PyByteArrayObject *)PyByteArray_FromStringAndSize(
1537 PyByteArray_AS_STRING(self),
1538 PyByteArray_GET_SIZE(self));
1539}
1540
1541Py_LOCAL_INLINE(Py_ssize_t)
1542countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
1543{
1544 Py_ssize_t count=0;
1545 const char *start=target;
1546 const char *end=target+target_len;
1547
1548 while ( (start=findchar(start, end-start, c)) != NULL ) {
1549 count++;
1550 if (count >= maxcount)
1551 break;
1552 start += 1;
1553 }
1554 return count;
1555}
1556
1557Py_LOCAL(Py_ssize_t)
1558findstring(const char *target, Py_ssize_t target_len,
1559 const char *pattern, Py_ssize_t pattern_len,
1560 Py_ssize_t start,
1561 Py_ssize_t end,
1562 int direction)
1563{
1564 if (start < 0) {
1565 start += target_len;
1566 if (start < 0)
1567 start = 0;
1568 }
1569 if (end > target_len) {
1570 end = target_len;
1571 } else if (end < 0) {
1572 end += target_len;
1573 if (end < 0)
1574 end = 0;
1575 }
1576
1577 /* zero-length substrings always match at the first attempt */
1578 if (pattern_len == 0)
1579 return (direction > 0) ? start : end;
1580
1581 end -= pattern_len;
1582
1583 if (direction < 0) {
1584 for (; end >= start; end--)
1585 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1586 return end;
1587 } else {
1588 for (; start <= end; start++)
1589 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1590 return start;
1591 }
1592 return -1;
1593}
1594
1595Py_LOCAL_INLINE(Py_ssize_t)
1596countstring(const char *target, Py_ssize_t target_len,
1597 const char *pattern, Py_ssize_t pattern_len,
1598 Py_ssize_t start,
1599 Py_ssize_t end,
1600 int direction, Py_ssize_t maxcount)
1601{
1602 Py_ssize_t count=0;
1603
1604 if (start < 0) {
1605 start += target_len;
1606 if (start < 0)
1607 start = 0;
1608 }
1609 if (end > target_len) {
1610 end = target_len;
1611 } else if (end < 0) {
1612 end += target_len;
1613 if (end < 0)
1614 end = 0;
1615 }
1616
1617 /* zero-length substrings match everywhere */
1618 if (pattern_len == 0 || maxcount == 0) {
1619 if (target_len+1 < maxcount)
1620 return target_len+1;
1621 return maxcount;
1622 }
1623
1624 end -= pattern_len;
1625 if (direction < 0) {
1626 for (; (end >= start); end--)
1627 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1628 count++;
1629 if (--maxcount <= 0) break;
1630 end -= pattern_len-1;
1631 }
1632 } else {
1633 for (; (start <= end); start++)
1634 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1635 count++;
1636 if (--maxcount <= 0)
1637 break;
1638 start += pattern_len-1;
1639 }
1640 }
1641 return count;
1642}
1643
1644
1645/* Algorithms for different cases of string replacement */
1646
1647/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1648Py_LOCAL(PyByteArrayObject *)
1649replace_interleave(PyByteArrayObject *self,
1650 const char *to_s, Py_ssize_t to_len,
1651 Py_ssize_t maxcount)
1652{
1653 char *self_s, *result_s;
1654 Py_ssize_t self_len, result_len;
1655 Py_ssize_t count, i, product;
1656 PyByteArrayObject *result;
1657
1658 self_len = PyByteArray_GET_SIZE(self);
1659
1660 /* 1 at the end plus 1 after every character */
1661 count = self_len+1;
1662 if (maxcount < count)
1663 count = maxcount;
1664
1665 /* Check for overflow */
1666 /* result_len = count * to_len + self_len; */
1667 product = count * to_len;
1668 if (product / to_len != count) {
1669 PyErr_SetString(PyExc_OverflowError,
1670 "replace string is too long");
1671 return NULL;
1672 }
1673 result_len = product + self_len;
1674 if (result_len < 0) {
1675 PyErr_SetString(PyExc_OverflowError,
1676 "replace string is too long");
1677 return NULL;
1678 }
1679
1680 if (! (result = (PyByteArrayObject *)
1681 PyByteArray_FromStringAndSize(NULL, result_len)) )
1682 return NULL;
1683
1684 self_s = PyByteArray_AS_STRING(self);
1685 result_s = PyByteArray_AS_STRING(result);
1686
1687 /* TODO: special case single character, which doesn't need memcpy */
1688
1689 /* Lay the first one down (guaranteed this will occur) */
1690 Py_MEMCPY(result_s, to_s, to_len);
1691 result_s += to_len;
1692 count -= 1;
1693
1694 for (i=0; i<count; i++) {
1695 *result_s++ = *self_s++;
1696 Py_MEMCPY(result_s, to_s, to_len);
1697 result_s += to_len;
1698 }
1699
1700 /* Copy the rest of the original string */
1701 Py_MEMCPY(result_s, self_s, self_len-i);
1702
1703 return result;
1704}
1705
1706/* Special case for deleting a single character */
1707/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1708Py_LOCAL(PyByteArrayObject *)
1709replace_delete_single_character(PyByteArrayObject *self,
1710 char from_c, Py_ssize_t maxcount)
1711{
1712 char *self_s, *result_s;
1713 char *start, *next, *end;
1714 Py_ssize_t self_len, result_len;
1715 Py_ssize_t count;
1716 PyByteArrayObject *result;
1717
1718 self_len = PyByteArray_GET_SIZE(self);
1719 self_s = PyByteArray_AS_STRING(self);
1720
1721 count = countchar(self_s, self_len, from_c, maxcount);
1722 if (count == 0) {
1723 return return_self(self);
1724 }
1725
1726 result_len = self_len - count; /* from_len == 1 */
1727 assert(result_len>=0);
1728
1729 if ( (result = (PyByteArrayObject *)
1730 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1731 return NULL;
1732 result_s = PyByteArray_AS_STRING(result);
1733
1734 start = self_s;
1735 end = self_s + self_len;
1736 while (count-- > 0) {
1737 next = findchar(start, end-start, from_c);
1738 if (next == NULL)
1739 break;
1740 Py_MEMCPY(result_s, start, next-start);
1741 result_s += (next-start);
1742 start = next+1;
1743 }
1744 Py_MEMCPY(result_s, start, end-start);
1745
1746 return result;
1747}
1748
1749/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1750
1751Py_LOCAL(PyByteArrayObject *)
1752replace_delete_substring(PyByteArrayObject *self,
1753 const char *from_s, Py_ssize_t from_len,
1754 Py_ssize_t maxcount)
1755{
1756 char *self_s, *result_s;
1757 char *start, *next, *end;
1758 Py_ssize_t self_len, result_len;
1759 Py_ssize_t count, offset;
1760 PyByteArrayObject *result;
1761
1762 self_len = PyByteArray_GET_SIZE(self);
1763 self_s = PyByteArray_AS_STRING(self);
1764
1765 count = countstring(self_s, self_len,
1766 from_s, from_len,
1767 0, self_len, 1,
1768 maxcount);
1769
1770 if (count == 0) {
1771 /* no matches */
1772 return return_self(self);
1773 }
1774
1775 result_len = self_len - (count * from_len);
1776 assert (result_len>=0);
1777
1778 if ( (result = (PyByteArrayObject *)
1779 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL )
1780 return NULL;
1781
1782 result_s = PyByteArray_AS_STRING(result);
1783
1784 start = self_s;
1785 end = self_s + self_len;
1786 while (count-- > 0) {
1787 offset = findstring(start, end-start,
1788 from_s, from_len,
1789 0, end-start, FORWARD);
1790 if (offset == -1)
1791 break;
1792 next = start + offset;
1793
1794 Py_MEMCPY(result_s, start, next-start);
1795
1796 result_s += (next-start);
1797 start = next+from_len;
1798 }
1799 Py_MEMCPY(result_s, start, end-start);
1800 return result;
1801}
1802
1803/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1804Py_LOCAL(PyByteArrayObject *)
1805replace_single_character_in_place(PyByteArrayObject *self,
1806 char from_c, char to_c,
1807 Py_ssize_t maxcount)
1808{
1809 char *self_s, *result_s, *start, *end, *next;
1810 Py_ssize_t self_len;
1811 PyByteArrayObject *result;
1812
1813 /* The result string will be the same size */
1814 self_s = PyByteArray_AS_STRING(self);
1815 self_len = PyByteArray_GET_SIZE(self);
1816
1817 next = findchar(self_s, self_len, from_c);
1818
1819 if (next == NULL) {
1820 /* No matches; return the original bytes */
1821 return return_self(self);
1822 }
1823
1824 /* Need to make a new bytes */
1825 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1826 if (result == NULL)
1827 return NULL;
1828 result_s = PyByteArray_AS_STRING(result);
1829 Py_MEMCPY(result_s, self_s, self_len);
1830
1831 /* change everything in-place, starting with this one */
1832 start = result_s + (next-self_s);
1833 *start = to_c;
1834 start++;
1835 end = result_s + self_len;
1836
1837 while (--maxcount > 0) {
1838 next = findchar(start, end-start, from_c);
1839 if (next == NULL)
1840 break;
1841 *next = to_c;
1842 start = next+1;
1843 }
1844
1845 return result;
1846}
1847
1848/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1849Py_LOCAL(PyByteArrayObject *)
1850replace_substring_in_place(PyByteArrayObject *self,
1851 const char *from_s, Py_ssize_t from_len,
1852 const char *to_s, Py_ssize_t to_len,
1853 Py_ssize_t maxcount)
1854{
1855 char *result_s, *start, *end;
1856 char *self_s;
1857 Py_ssize_t self_len, offset;
1858 PyByteArrayObject *result;
1859
1860 /* The result bytes will be the same size */
1861
1862 self_s = PyByteArray_AS_STRING(self);
1863 self_len = PyByteArray_GET_SIZE(self);
1864
1865 offset = findstring(self_s, self_len,
1866 from_s, from_len,
1867 0, self_len, FORWARD);
1868 if (offset == -1) {
1869 /* No matches; return the original bytes */
1870 return return_self(self);
1871 }
1872
1873 /* Need to make a new bytes */
1874 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1875 if (result == NULL)
1876 return NULL;
1877 result_s = PyByteArray_AS_STRING(result);
1878 Py_MEMCPY(result_s, self_s, self_len);
1879
1880 /* change everything in-place, starting with this one */
1881 start = result_s + offset;
1882 Py_MEMCPY(start, to_s, from_len);
1883 start += from_len;
1884 end = result_s + self_len;
1885
1886 while ( --maxcount > 0) {
1887 offset = findstring(start, end-start,
1888 from_s, from_len,
1889 0, end-start, FORWARD);
1890 if (offset==-1)
1891 break;
1892 Py_MEMCPY(start+offset, to_s, from_len);
1893 start += offset+from_len;
1894 }
1895
1896 return result;
1897}
1898
1899/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1900Py_LOCAL(PyByteArrayObject *)
1901replace_single_character(PyByteArrayObject *self,
1902 char from_c,
1903 const char *to_s, Py_ssize_t to_len,
1904 Py_ssize_t maxcount)
1905{
1906 char *self_s, *result_s;
1907 char *start, *next, *end;
1908 Py_ssize_t self_len, result_len;
1909 Py_ssize_t count, product;
1910 PyByteArrayObject *result;
1911
1912 self_s = PyByteArray_AS_STRING(self);
1913 self_len = PyByteArray_GET_SIZE(self);
1914
1915 count = countchar(self_s, self_len, from_c, maxcount);
1916 if (count == 0) {
1917 /* no matches, return unchanged */
1918 return return_self(self);
1919 }
1920
1921 /* use the difference between current and new, hence the "-1" */
1922 /* result_len = self_len + count * (to_len-1) */
1923 product = count * (to_len-1);
1924 if (product / (to_len-1) != count) {
1925 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1926 return NULL;
1927 }
1928 result_len = self_len + product;
1929 if (result_len < 0) {
1930 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1931 return NULL;
1932 }
1933
1934 if ( (result = (PyByteArrayObject *)
1935 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1936 return NULL;
1937 result_s = PyByteArray_AS_STRING(result);
1938
1939 start = self_s;
1940 end = self_s + self_len;
1941 while (count-- > 0) {
1942 next = findchar(start, end-start, from_c);
1943 if (next == NULL)
1944 break;
1945
1946 if (next == start) {
1947 /* replace with the 'to' */
1948 Py_MEMCPY(result_s, to_s, to_len);
1949 result_s += to_len;
1950 start += 1;
1951 } else {
1952 /* copy the unchanged old then the 'to' */
1953 Py_MEMCPY(result_s, start, next-start);
1954 result_s += (next-start);
1955 Py_MEMCPY(result_s, to_s, to_len);
1956 result_s += to_len;
1957 start = next+1;
1958 }
1959 }
1960 /* Copy the remainder of the remaining bytes */
1961 Py_MEMCPY(result_s, start, end-start);
1962
1963 return result;
1964}
1965
1966/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1967Py_LOCAL(PyByteArrayObject *)
1968replace_substring(PyByteArrayObject *self,
1969 const char *from_s, Py_ssize_t from_len,
1970 const char *to_s, Py_ssize_t to_len,
1971 Py_ssize_t maxcount)
1972{
1973 char *self_s, *result_s;
1974 char *start, *next, *end;
1975 Py_ssize_t self_len, result_len;
1976 Py_ssize_t count, offset, product;
1977 PyByteArrayObject *result;
1978
1979 self_s = PyByteArray_AS_STRING(self);
1980 self_len = PyByteArray_GET_SIZE(self);
1981
1982 count = countstring(self_s, self_len,
1983 from_s, from_len,
1984 0, self_len, FORWARD, maxcount);
1985 if (count == 0) {
1986 /* no matches, return unchanged */
1987 return return_self(self);
1988 }
1989
1990 /* Check for overflow */
1991 /* result_len = self_len + count * (to_len-from_len) */
1992 product = count * (to_len-from_len);
1993 if (product / (to_len-from_len) != count) {
1994 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1995 return NULL;
1996 }
1997 result_len = self_len + product;
1998 if (result_len < 0) {
1999 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
2000 return NULL;
2001 }
2002
2003 if ( (result = (PyByteArrayObject *)
2004 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
2005 return NULL;
2006 result_s = PyByteArray_AS_STRING(result);
2007
2008 start = self_s;
2009 end = self_s + self_len;
2010 while (count-- > 0) {
2011 offset = findstring(start, end-start,
2012 from_s, from_len,
2013 0, end-start, FORWARD);
2014 if (offset == -1)
2015 break;
2016 next = start+offset;
2017 if (next == start) {
2018 /* replace with the 'to' */
2019 Py_MEMCPY(result_s, to_s, to_len);
2020 result_s += to_len;
2021 start += from_len;
2022 } else {
2023 /* copy the unchanged old then the 'to' */
2024 Py_MEMCPY(result_s, start, next-start);
2025 result_s += (next-start);
2026 Py_MEMCPY(result_s, to_s, to_len);
2027 result_s += to_len;
2028 start = next+from_len;
2029 }
2030 }
2031 /* Copy the remainder of the remaining bytes */
2032 Py_MEMCPY(result_s, start, end-start);
2033
2034 return result;
2035}
2036
2037
2038Py_LOCAL(PyByteArrayObject *)
2039replace(PyByteArrayObject *self,
2040 const char *from_s, Py_ssize_t from_len,
2041 const char *to_s, Py_ssize_t to_len,
2042 Py_ssize_t maxcount)
2043{
2044 if (maxcount < 0) {
2045 maxcount = PY_SSIZE_T_MAX;
2046 } else if (maxcount == 0 || PyByteArray_GET_SIZE(self) == 0) {
2047 /* nothing to do; return the original bytes */
2048 return return_self(self);
2049 }
2050
2051 if (maxcount == 0 ||
2052 (from_len == 0 && to_len == 0)) {
2053 /* nothing to do; return the original bytes */
2054 return return_self(self);
2055 }
2056
2057 /* Handle zero-length special cases */
2058
2059 if (from_len == 0) {
2060 /* insert the 'to' bytes everywhere. */
2061 /* >>> "Python".replace("", ".") */
2062 /* '.P.y.t.h.o.n.' */
2063 return replace_interleave(self, to_s, to_len, maxcount);
2064 }
2065
2066 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2067 /* point for an empty self bytes to generate a non-empty bytes */
2068 /* Special case so the remaining code always gets a non-empty bytes */
2069 if (PyByteArray_GET_SIZE(self) == 0) {
2070 return return_self(self);
2071 }
2072
2073 if (to_len == 0) {
2074 /* delete all occurances of 'from' bytes */
2075 if (from_len == 1) {
2076 return replace_delete_single_character(
2077 self, from_s[0], maxcount);
2078 } else {
2079 return replace_delete_substring(self, from_s, from_len, maxcount);
2080 }
2081 }
2082
2083 /* Handle special case where both bytes have the same length */
2084
2085 if (from_len == to_len) {
2086 if (from_len == 1) {
2087 return replace_single_character_in_place(
2088 self,
2089 from_s[0],
2090 to_s[0],
2091 maxcount);
2092 } else {
2093 return replace_substring_in_place(
2094 self, from_s, from_len, to_s, to_len, maxcount);
2095 }
2096 }
2097
2098 /* Otherwise use the more generic algorithms */
2099 if (from_len == 1) {
2100 return replace_single_character(self, from_s[0],
2101 to_s, to_len, maxcount);
2102 } else {
2103 /* len('from')>=2, len('to')>=1 */
2104 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2105 }
2106}
2107
2108
2109PyDoc_STRVAR(replace__doc__,
2110"B.replace(old, new[, count]) -> bytes\n\
2111\n\
2112Return a copy of B with all occurrences of subsection\n\
2113old replaced by new. If the optional argument count is\n\
2114given, only the first count occurrences are replaced.");
2115
2116static PyObject *
2117bytes_replace(PyByteArrayObject *self, PyObject *args)
2118{
2119 Py_ssize_t count = -1;
2120 PyObject *from, *to, *res;
2121 Py_buffer vfrom, vto;
2122
2123 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2124 return NULL;
2125
2126 if (_getbuffer(from, &vfrom) < 0)
2127 return NULL;
2128 if (_getbuffer(to, &vto) < 0) {
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002129 PyBuffer_Release(&vfrom);
Christian Heimes44720832008-05-26 13:01:01 +00002130 return NULL;
2131 }
2132
2133 res = (PyObject *)replace((PyByteArrayObject *) self,
2134 vfrom.buf, vfrom.len,
2135 vto.buf, vto.len, count);
2136
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002137 PyBuffer_Release(&vfrom);
2138 PyBuffer_Release(&vto);
Christian Heimes44720832008-05-26 13:01:01 +00002139 return res;
2140}
2141
2142
2143/* Overallocate the initial list to reduce the number of reallocs for small
2144 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2145 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2146 text (roughly 11 words per line) and field delimited data (usually 1-10
2147 fields). For large strings the split algorithms are bandwidth limited
2148 so increasing the preallocation likely will not improve things.*/
2149
2150#define MAX_PREALLOC 12
2151
2152/* 5 splits gives 6 elements */
2153#define PREALLOC_SIZE(maxsplit) \
2154 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2155
2156#define SPLIT_APPEND(data, left, right) \
2157 str = PyByteArray_FromStringAndSize((data) + (left), \
2158 (right) - (left)); \
2159 if (str == NULL) \
2160 goto onError; \
2161 if (PyList_Append(list, str)) { \
2162 Py_DECREF(str); \
2163 goto onError; \
2164 } \
2165 else \
2166 Py_DECREF(str);
2167
2168#define SPLIT_ADD(data, left, right) { \
2169 str = PyByteArray_FromStringAndSize((data) + (left), \
2170 (right) - (left)); \
2171 if (str == NULL) \
2172 goto onError; \
2173 if (count < MAX_PREALLOC) { \
2174 PyList_SET_ITEM(list, count, str); \
2175 } else { \
2176 if (PyList_Append(list, str)) { \
2177 Py_DECREF(str); \
2178 goto onError; \
2179 } \
2180 else \
2181 Py_DECREF(str); \
2182 } \
2183 count++; }
2184
2185/* Always force the list to the expected size. */
2186#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
2187
2188
2189Py_LOCAL_INLINE(PyObject *)
2190split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2191{
2192 register Py_ssize_t i, j, count = 0;
2193 PyObject *str;
2194 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2195
2196 if (list == NULL)
2197 return NULL;
2198
2199 i = j = 0;
2200 while ((j < len) && (maxcount-- > 0)) {
2201 for(; j < len; j++) {
2202 /* I found that using memchr makes no difference */
2203 if (s[j] == ch) {
2204 SPLIT_ADD(s, i, j);
2205 i = j = j + 1;
2206 break;
2207 }
2208 }
2209 }
2210 if (i <= len) {
2211 SPLIT_ADD(s, i, len);
2212 }
2213 FIX_PREALLOC_SIZE(list);
2214 return list;
2215
2216 onError:
2217 Py_DECREF(list);
2218 return NULL;
2219}
2220
2221
2222Py_LOCAL_INLINE(PyObject *)
2223split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2224{
2225 register Py_ssize_t i, j, count = 0;
2226 PyObject *str;
2227 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2228
2229 if (list == NULL)
2230 return NULL;
2231
2232 for (i = j = 0; i < len; ) {
2233 /* find a token */
2234 while (i < len && ISSPACE(s[i]))
2235 i++;
2236 j = i;
2237 while (i < len && !ISSPACE(s[i]))
2238 i++;
2239 if (j < i) {
2240 if (maxcount-- <= 0)
2241 break;
2242 SPLIT_ADD(s, j, i);
2243 while (i < len && ISSPACE(s[i]))
2244 i++;
2245 j = i;
2246 }
2247 }
2248 if (j < len) {
2249 SPLIT_ADD(s, j, len);
2250 }
2251 FIX_PREALLOC_SIZE(list);
2252 return list;
2253
2254 onError:
2255 Py_DECREF(list);
2256 return NULL;
2257}
2258
2259PyDoc_STRVAR(split__doc__,
2260"B.split([sep[, maxsplit]]) -> list of bytearray\n\
2261\n\
2262Return a list of the sections in B, using sep as the delimiter.\n\
2263If sep is not given, B is split on ASCII whitespace characters\n\
2264(space, tab, return, newline, formfeed, vertical tab).\n\
2265If maxsplit is given, at most maxsplit splits are done.");
2266
2267static PyObject *
2268bytes_split(PyByteArrayObject *self, PyObject *args)
2269{
2270 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
2271 Py_ssize_t maxsplit = -1, count = 0;
2272 const char *s = PyByteArray_AS_STRING(self), *sub;
2273 PyObject *list, *str, *subobj = Py_None;
2274 Py_buffer vsub;
2275#ifdef USE_FAST
2276 Py_ssize_t pos;
2277#endif
2278
2279 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
2280 return NULL;
2281 if (maxsplit < 0)
2282 maxsplit = PY_SSIZE_T_MAX;
2283
2284 if (subobj == Py_None)
2285 return split_whitespace(s, len, maxsplit);
2286
2287 if (_getbuffer(subobj, &vsub) < 0)
2288 return NULL;
2289 sub = vsub.buf;
2290 n = vsub.len;
2291
2292 if (n == 0) {
2293 PyErr_SetString(PyExc_ValueError, "empty separator");
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002294 PyBuffer_Release(&vsub);
Christian Heimes44720832008-05-26 13:01:01 +00002295 return NULL;
2296 }
Amaury Forgeot d'Arc313bda12008-08-17 21:05:18 +00002297 if (n == 1) {
2298 list = split_char(s, len, sub[0], maxsplit);
2299 PyBuffer_Release(&vsub);
2300 return list;
2301 }
Christian Heimes44720832008-05-26 13:01:01 +00002302
2303 list = PyList_New(PREALLOC_SIZE(maxsplit));
2304 if (list == NULL) {
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002305 PyBuffer_Release(&vsub);
Christian Heimes44720832008-05-26 13:01:01 +00002306 return NULL;
2307 }
2308
2309#ifdef USE_FAST
2310 i = j = 0;
2311 while (maxsplit-- > 0) {
2312 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2313 if (pos < 0)
2314 break;
2315 j = i+pos;
2316 SPLIT_ADD(s, i, j);
2317 i = j + n;
2318 }
2319#else
2320 i = j = 0;
2321 while ((j+n <= len) && (maxsplit-- > 0)) {
2322 for (; j+n <= len; j++) {
2323 if (Py_STRING_MATCH(s, j, sub, n)) {
2324 SPLIT_ADD(s, i, j);
2325 i = j = j + n;
2326 break;
2327 }
2328 }
2329 }
2330#endif
2331 SPLIT_ADD(s, i, len);
2332 FIX_PREALLOC_SIZE(list);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002333 PyBuffer_Release(&vsub);
Christian Heimes44720832008-05-26 13:01:01 +00002334 return list;
2335
2336 onError:
2337 Py_DECREF(list);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002338 PyBuffer_Release(&vsub);
Christian Heimes44720832008-05-26 13:01:01 +00002339 return NULL;
2340}
2341
2342/* stringlib's partition shares nullbytes in some cases.
2343 undo this, we don't want the nullbytes to be shared. */
2344static PyObject *
2345make_nullbytes_unique(PyObject *result)
2346{
2347 if (result != NULL) {
2348 int i;
2349 assert(PyTuple_Check(result));
2350 assert(PyTuple_GET_SIZE(result) == 3);
2351 for (i = 0; i < 3; i++) {
2352 if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) {
2353 PyObject *new = PyByteArray_FromStringAndSize(NULL, 0);
2354 if (new == NULL) {
2355 Py_DECREF(result);
2356 result = NULL;
2357 break;
2358 }
2359 Py_DECREF(nullbytes);
2360 PyTuple_SET_ITEM(result, i, new);
2361 }
2362 }
2363 }
2364 return result;
2365}
2366
2367PyDoc_STRVAR(partition__doc__,
2368"B.partition(sep) -> (head, sep, tail)\n\
2369\n\
2370Searches for the separator sep in B, and returns the part before it,\n\
2371the separator itself, and the part after it. If the separator is not\n\
2372found, returns B and two empty bytearray objects.");
2373
2374static PyObject *
2375bytes_partition(PyByteArrayObject *self, PyObject *sep_obj)
2376{
2377 PyObject *bytesep, *result;
2378
2379 bytesep = PyByteArray_FromObject(sep_obj);
2380 if (! bytesep)
2381 return NULL;
2382
2383 result = stringlib_partition(
2384 (PyObject*) self,
2385 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2386 bytesep,
2387 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2388 );
2389
2390 Py_DECREF(bytesep);
2391 return make_nullbytes_unique(result);
2392}
2393
2394PyDoc_STRVAR(rpartition__doc__,
Ezio Melottidabb5f72010-01-25 11:46:11 +00002395"B.rpartition(sep) -> (head, sep, tail)\n\
Christian Heimes44720832008-05-26 13:01:01 +00002396\n\
2397Searches for the separator sep in B, starting at the end of B,\n\
2398and returns the part before it, the separator itself, and the\n\
2399part after it. If the separator is not found, returns two empty\n\
2400bytearray objects and B.");
2401
2402static PyObject *
2403bytes_rpartition(PyByteArrayObject *self, PyObject *sep_obj)
2404{
2405 PyObject *bytesep, *result;
2406
2407 bytesep = PyByteArray_FromObject(sep_obj);
2408 if (! bytesep)
2409 return NULL;
2410
2411 result = stringlib_rpartition(
2412 (PyObject*) self,
2413 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2414 bytesep,
2415 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2416 );
2417
2418 Py_DECREF(bytesep);
2419 return make_nullbytes_unique(result);
2420}
2421
2422Py_LOCAL_INLINE(PyObject *)
2423rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2424{
2425 register Py_ssize_t i, j, count=0;
2426 PyObject *str;
2427 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2428
2429 if (list == NULL)
2430 return NULL;
2431
2432 i = j = len - 1;
2433 while ((i >= 0) && (maxcount-- > 0)) {
2434 for (; i >= 0; i--) {
2435 if (s[i] == ch) {
2436 SPLIT_ADD(s, i + 1, j + 1);
2437 j = i = i - 1;
2438 break;
2439 }
2440 }
2441 }
2442 if (j >= -1) {
2443 SPLIT_ADD(s, 0, j + 1);
2444 }
2445 FIX_PREALLOC_SIZE(list);
2446 if (PyList_Reverse(list) < 0)
2447 goto onError;
2448
2449 return list;
2450
2451 onError:
2452 Py_DECREF(list);
2453 return NULL;
2454}
2455
2456Py_LOCAL_INLINE(PyObject *)
2457rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2458{
2459 register Py_ssize_t i, j, count = 0;
2460 PyObject *str;
2461 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2462
2463 if (list == NULL)
2464 return NULL;
2465
2466 for (i = j = len - 1; i >= 0; ) {
2467 /* find a token */
2468 while (i >= 0 && ISSPACE(s[i]))
2469 i--;
2470 j = i;
2471 while (i >= 0 && !ISSPACE(s[i]))
2472 i--;
2473 if (j > i) {
2474 if (maxcount-- <= 0)
2475 break;
2476 SPLIT_ADD(s, i + 1, j + 1);
2477 while (i >= 0 && ISSPACE(s[i]))
2478 i--;
2479 j = i;
2480 }
2481 }
2482 if (j >= 0) {
2483 SPLIT_ADD(s, 0, j + 1);
2484 }
2485 FIX_PREALLOC_SIZE(list);
2486 if (PyList_Reverse(list) < 0)
2487 goto onError;
2488
2489 return list;
2490
2491 onError:
2492 Py_DECREF(list);
2493 return NULL;
2494}
2495
2496PyDoc_STRVAR(rsplit__doc__,
2497"B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2498\n\
2499Return a list of the sections in B, using sep as the delimiter,\n\
2500starting at the end of B and working to the front.\n\
2501If sep is not given, B is split on ASCII whitespace characters\n\
2502(space, tab, return, newline, formfeed, vertical tab).\n\
2503If maxsplit is given, at most maxsplit splits are done.");
2504
2505static PyObject *
2506bytes_rsplit(PyByteArrayObject *self, PyObject *args)
2507{
2508 Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j;
2509 Py_ssize_t maxsplit = -1, count = 0;
2510 const char *s = PyByteArray_AS_STRING(self), *sub;
2511 PyObject *list, *str, *subobj = Py_None;
2512 Py_buffer vsub;
2513
2514 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
2515 return NULL;
2516 if (maxsplit < 0)
2517 maxsplit = PY_SSIZE_T_MAX;
2518
2519 if (subobj == Py_None)
2520 return rsplit_whitespace(s, len, maxsplit);
2521
2522 if (_getbuffer(subobj, &vsub) < 0)
2523 return NULL;
2524 sub = vsub.buf;
2525 n = vsub.len;
2526
2527 if (n == 0) {
2528 PyErr_SetString(PyExc_ValueError, "empty separator");
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002529 PyBuffer_Release(&vsub);
Christian Heimes44720832008-05-26 13:01:01 +00002530 return NULL;
2531 }
Amaury Forgeot d'Arc313bda12008-08-17 21:05:18 +00002532 else if (n == 1) {
2533 list = rsplit_char(s, len, sub[0], maxsplit);
2534 PyBuffer_Release(&vsub);
2535 return list;
2536 }
Christian Heimes44720832008-05-26 13:01:01 +00002537
2538 list = PyList_New(PREALLOC_SIZE(maxsplit));
2539 if (list == NULL) {
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002540 PyBuffer_Release(&vsub);
Christian Heimes44720832008-05-26 13:01:01 +00002541 return NULL;
2542 }
2543
2544 j = len;
2545 i = j - n;
2546
2547 while ( (i >= 0) && (maxsplit-- > 0) ) {
2548 for (; i>=0; i--) {
2549 if (Py_STRING_MATCH(s, i, sub, n)) {
2550 SPLIT_ADD(s, i + n, j);
2551 j = i;
2552 i -= n;
2553 break;
2554 }
2555 }
2556 }
2557 SPLIT_ADD(s, 0, j);
2558 FIX_PREALLOC_SIZE(list);
2559 if (PyList_Reverse(list) < 0)
2560 goto onError;
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002561 PyBuffer_Release(&vsub);
Christian Heimes44720832008-05-26 13:01:01 +00002562 return list;
2563
2564onError:
2565 Py_DECREF(list);
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002566 PyBuffer_Release(&vsub);
Christian Heimes44720832008-05-26 13:01:01 +00002567 return NULL;
2568}
2569
2570PyDoc_STRVAR(reverse__doc__,
2571"B.reverse() -> None\n\
2572\n\
2573Reverse the order of the values in B in place.");
2574static PyObject *
2575bytes_reverse(PyByteArrayObject *self, PyObject *unused)
2576{
2577 char swap, *head, *tail;
2578 Py_ssize_t i, j, n = Py_SIZE(self);
2579
2580 j = n / 2;
2581 head = self->ob_bytes;
2582 tail = head + n - 1;
2583 for (i = 0; i < j; i++) {
2584 swap = *head;
2585 *head++ = *tail;
2586 *tail-- = swap;
2587 }
2588
2589 Py_RETURN_NONE;
2590}
2591
2592PyDoc_STRVAR(insert__doc__,
2593"B.insert(index, int) -> None\n\
2594\n\
2595Insert a single item into the bytearray before the given index.");
2596static PyObject *
2597bytes_insert(PyByteArrayObject *self, PyObject *args)
2598{
Georg Brandl3e483f62008-07-16 22:57:41 +00002599 PyObject *value;
2600 int ival;
Christian Heimes44720832008-05-26 13:01:01 +00002601 Py_ssize_t where, n = Py_SIZE(self);
2602
Georg Brandl3e483f62008-07-16 22:57:41 +00002603 if (!PyArg_ParseTuple(args, "nO:insert", &where, &value))
Christian Heimes44720832008-05-26 13:01:01 +00002604 return NULL;
2605
2606 if (n == PY_SSIZE_T_MAX) {
2607 PyErr_SetString(PyExc_OverflowError,
Mark Dickinson76e96432009-09-06 10:33:12 +00002608 "cannot add more objects to bytearray");
Christian Heimes44720832008-05-26 13:01:01 +00002609 return NULL;
2610 }
Georg Brandl3e483f62008-07-16 22:57:41 +00002611 if (!_getbytevalue(value, &ival))
Christian Heimes44720832008-05-26 13:01:01 +00002612 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002613 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2614 return NULL;
2615
2616 if (where < 0) {
2617 where += n;
2618 if (where < 0)
2619 where = 0;
2620 }
2621 if (where > n)
2622 where = n;
2623 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
Georg Brandl3e483f62008-07-16 22:57:41 +00002624 self->ob_bytes[where] = ival;
Christian Heimes44720832008-05-26 13:01:01 +00002625
2626 Py_RETURN_NONE;
2627}
2628
2629PyDoc_STRVAR(append__doc__,
2630"B.append(int) -> None\n\
2631\n\
2632Append a single item to the end of B.");
2633static PyObject *
2634bytes_append(PyByteArrayObject *self, PyObject *arg)
2635{
2636 int value;
2637 Py_ssize_t n = Py_SIZE(self);
2638
2639 if (! _getbytevalue(arg, &value))
2640 return NULL;
2641 if (n == PY_SSIZE_T_MAX) {
2642 PyErr_SetString(PyExc_OverflowError,
Mark Dickinson76e96432009-09-06 10:33:12 +00002643 "cannot add more objects to bytearray");
Christian Heimes44720832008-05-26 13:01:01 +00002644 return NULL;
2645 }
2646 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2647 return NULL;
2648
2649 self->ob_bytes[n] = value;
2650
2651 Py_RETURN_NONE;
2652}
2653
2654PyDoc_STRVAR(extend__doc__,
2655"B.extend(iterable int) -> None\n\
2656\n\
2657Append all the elements from the iterator or sequence to the\n\
2658end of B.");
2659static PyObject *
2660bytes_extend(PyByteArrayObject *self, PyObject *arg)
2661{
2662 PyObject *it, *item, *bytes_obj;
2663 Py_ssize_t buf_size = 0, len = 0;
2664 int value;
2665 char *buf;
2666
2667 /* bytes_setslice code only accepts something supporting PEP 3118. */
2668 if (PyObject_CheckBuffer(arg)) {
2669 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), arg) == -1)
2670 return NULL;
2671
2672 Py_RETURN_NONE;
2673 }
2674
2675 it = PyObject_GetIter(arg);
2676 if (it == NULL)
2677 return NULL;
2678
2679 /* Try to determine the length of the argument. 32 is abitrary. */
2680 buf_size = _PyObject_LengthHint(arg, 32);
Georg Brandle9b91212009-04-05 21:26:31 +00002681 if (buf_size == -1) {
2682 Py_DECREF(it);
2683 return NULL;
2684 }
Christian Heimes44720832008-05-26 13:01:01 +00002685
2686 bytes_obj = PyByteArray_FromStringAndSize(NULL, buf_size);
2687 if (bytes_obj == NULL)
2688 return NULL;
2689 buf = PyByteArray_AS_STRING(bytes_obj);
2690
2691 while ((item = PyIter_Next(it)) != NULL) {
2692 if (! _getbytevalue(item, &value)) {
2693 Py_DECREF(item);
2694 Py_DECREF(it);
2695 Py_DECREF(bytes_obj);
2696 return NULL;
2697 }
2698 buf[len++] = value;
2699 Py_DECREF(item);
2700
2701 if (len >= buf_size) {
2702 buf_size = len + (len >> 1) + 1;
2703 if (PyByteArray_Resize((PyObject *)bytes_obj, buf_size) < 0) {
2704 Py_DECREF(it);
2705 Py_DECREF(bytes_obj);
2706 return NULL;
2707 }
2708 /* Recompute the `buf' pointer, since the resizing operation may
2709 have invalidated it. */
2710 buf = PyByteArray_AS_STRING(bytes_obj);
2711 }
2712 }
2713 Py_DECREF(it);
2714
2715 /* Resize down to exact size. */
2716 if (PyByteArray_Resize((PyObject *)bytes_obj, len) < 0) {
2717 Py_DECREF(bytes_obj);
2718 return NULL;
2719 }
2720
2721 if (bytes_setslice(self, Py_SIZE(self), Py_SIZE(self), bytes_obj) == -1)
2722 return NULL;
2723 Py_DECREF(bytes_obj);
2724
2725 Py_RETURN_NONE;
2726}
2727
2728PyDoc_STRVAR(pop__doc__,
2729"B.pop([index]) -> int\n\
2730\n\
2731Remove and return a single item from B. If no index\n\
Andrew M. Kuchlingd8972642008-06-21 13:29:12 +00002732argument is given, will pop the last value.");
Christian Heimes44720832008-05-26 13:01:01 +00002733static PyObject *
2734bytes_pop(PyByteArrayObject *self, PyObject *args)
2735{
2736 int value;
2737 Py_ssize_t where = -1, n = Py_SIZE(self);
2738
2739 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2740 return NULL;
2741
2742 if (n == 0) {
2743 PyErr_SetString(PyExc_OverflowError,
Mark Dickinson76e96432009-09-06 10:33:12 +00002744 "cannot pop an empty bytearray");
Christian Heimes44720832008-05-26 13:01:01 +00002745 return NULL;
2746 }
2747 if (where < 0)
2748 where += Py_SIZE(self);
2749 if (where < 0 || where >= Py_SIZE(self)) {
2750 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2751 return NULL;
2752 }
Antoine Pitrou599db7f2008-12-07 00:07:51 +00002753 if (!_canresize(self))
2754 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002755
2756 value = self->ob_bytes[where];
2757 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2758 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2759 return NULL;
2760
Mark Dickinsonb61c0352009-09-06 10:05:28 +00002761 return PyInt_FromLong((unsigned char)value);
Christian Heimes44720832008-05-26 13:01:01 +00002762}
2763
2764PyDoc_STRVAR(remove__doc__,
2765"B.remove(int) -> None\n\
2766\n\
2767Remove the first occurance of a value in B.");
2768static PyObject *
2769bytes_remove(PyByteArrayObject *self, PyObject *arg)
2770{
2771 int value;
2772 Py_ssize_t where, n = Py_SIZE(self);
2773
2774 if (! _getbytevalue(arg, &value))
2775 return NULL;
2776
2777 for (where = 0; where < n; where++) {
2778 if (self->ob_bytes[where] == value)
2779 break;
2780 }
2781 if (where == n) {
Mark Dickinson76e96432009-09-06 10:33:12 +00002782 PyErr_SetString(PyExc_ValueError, "value not found in bytearray");
Christian Heimes44720832008-05-26 13:01:01 +00002783 return NULL;
2784 }
Antoine Pitrou599db7f2008-12-07 00:07:51 +00002785 if (!_canresize(self))
2786 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002787
2788 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2789 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2790 return NULL;
2791
2792 Py_RETURN_NONE;
2793}
2794
2795/* XXX These two helpers could be optimized if argsize == 1 */
2796
2797static Py_ssize_t
2798lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2799 void *argptr, Py_ssize_t argsize)
2800{
2801 Py_ssize_t i = 0;
2802 while (i < mysize && memchr(argptr, myptr[i], argsize))
2803 i++;
2804 return i;
2805}
2806
2807static Py_ssize_t
2808rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2809 void *argptr, Py_ssize_t argsize)
2810{
2811 Py_ssize_t i = mysize - 1;
2812 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2813 i--;
2814 return i + 1;
2815}
2816
2817PyDoc_STRVAR(strip__doc__,
2818"B.strip([bytes]) -> bytearray\n\
2819\n\
2820Strip leading and trailing bytes contained in the argument.\n\
2821If the argument is omitted, strip ASCII whitespace.");
2822static PyObject *
2823bytes_strip(PyByteArrayObject *self, PyObject *args)
2824{
2825 Py_ssize_t left, right, mysize, argsize;
2826 void *myptr, *argptr;
2827 PyObject *arg = Py_None;
2828 Py_buffer varg;
2829 if (!PyArg_ParseTuple(args, "|O:strip", &arg))
2830 return NULL;
2831 if (arg == Py_None) {
2832 argptr = "\t\n\r\f\v ";
2833 argsize = 6;
2834 }
2835 else {
2836 if (_getbuffer(arg, &varg) < 0)
2837 return NULL;
2838 argptr = varg.buf;
2839 argsize = varg.len;
2840 }
2841 myptr = self->ob_bytes;
2842 mysize = Py_SIZE(self);
2843 left = lstrip_helper(myptr, mysize, argptr, argsize);
2844 if (left == mysize)
2845 right = left;
2846 else
2847 right = rstrip_helper(myptr, mysize, argptr, argsize);
2848 if (arg != Py_None)
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002849 PyBuffer_Release(&varg);
Christian Heimes44720832008-05-26 13:01:01 +00002850 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2851}
2852
2853PyDoc_STRVAR(lstrip__doc__,
2854"B.lstrip([bytes]) -> bytearray\n\
2855\n\
2856Strip leading bytes contained in the argument.\n\
2857If the argument is omitted, strip leading ASCII whitespace.");
2858static PyObject *
2859bytes_lstrip(PyByteArrayObject *self, PyObject *args)
2860{
2861 Py_ssize_t left, right, mysize, argsize;
2862 void *myptr, *argptr;
2863 PyObject *arg = Py_None;
2864 Py_buffer varg;
2865 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
2866 return NULL;
2867 if (arg == Py_None) {
2868 argptr = "\t\n\r\f\v ";
2869 argsize = 6;
2870 }
2871 else {
2872 if (_getbuffer(arg, &varg) < 0)
2873 return NULL;
2874 argptr = varg.buf;
2875 argsize = varg.len;
2876 }
2877 myptr = self->ob_bytes;
2878 mysize = Py_SIZE(self);
2879 left = lstrip_helper(myptr, mysize, argptr, argsize);
2880 right = mysize;
2881 if (arg != Py_None)
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002882 PyBuffer_Release(&varg);
Christian Heimes44720832008-05-26 13:01:01 +00002883 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2884}
2885
2886PyDoc_STRVAR(rstrip__doc__,
2887"B.rstrip([bytes]) -> bytearray\n\
2888\n\
2889Strip trailing bytes contained in the argument.\n\
2890If the argument is omitted, strip trailing ASCII whitespace.");
2891static PyObject *
2892bytes_rstrip(PyByteArrayObject *self, PyObject *args)
2893{
2894 Py_ssize_t left, right, mysize, argsize;
2895 void *myptr, *argptr;
2896 PyObject *arg = Py_None;
2897 Py_buffer varg;
2898 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
2899 return NULL;
2900 if (arg == Py_None) {
2901 argptr = "\t\n\r\f\v ";
2902 argsize = 6;
2903 }
2904 else {
2905 if (_getbuffer(arg, &varg) < 0)
2906 return NULL;
2907 argptr = varg.buf;
2908 argsize = varg.len;
2909 }
2910 myptr = self->ob_bytes;
2911 mysize = Py_SIZE(self);
2912 left = 0;
2913 right = rstrip_helper(myptr, mysize, argptr, argsize);
2914 if (arg != Py_None)
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00002915 PyBuffer_Release(&varg);
Christian Heimes44720832008-05-26 13:01:01 +00002916 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2917}
2918
2919PyDoc_STRVAR(decode_doc,
2920"B.decode([encoding[, errors]]) -> unicode object.\n\
2921\n\
2922Decodes B using the codec registered for encoding. encoding defaults\n\
2923to the default encoding. errors may be given to set a different error\n\
2924handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2925a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2926as well as any other name registered with codecs.register_error that is\n\
2927able to handle UnicodeDecodeErrors.");
2928
2929static PyObject *
2930bytes_decode(PyObject *self, PyObject *args)
2931{
2932 const char *encoding = NULL;
2933 const char *errors = NULL;
2934
2935 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2936 return NULL;
2937 if (encoding == NULL)
2938 encoding = PyUnicode_GetDefaultEncoding();
2939 return PyCodec_Decode(self, encoding, errors);
2940}
2941
2942PyDoc_STRVAR(alloc_doc,
2943"B.__alloc__() -> int\n\
2944\n\
2945Returns the number of bytes actually allocated.");
2946
2947static PyObject *
2948bytes_alloc(PyByteArrayObject *self)
2949{
2950 return PyInt_FromSsize_t(self->ob_alloc);
2951}
2952
2953PyDoc_STRVAR(join_doc,
2954"B.join(iterable_of_bytes) -> bytes\n\
2955\n\
2956Concatenates any number of bytearray objects, with B in between each pair.");
2957
2958static PyObject *
2959bytes_join(PyByteArrayObject *self, PyObject *it)
2960{
2961 PyObject *seq;
2962 Py_ssize_t mysize = Py_SIZE(self);
2963 Py_ssize_t i;
2964 Py_ssize_t n;
2965 PyObject **items;
2966 Py_ssize_t totalsize = 0;
2967 PyObject *result;
2968 char *dest;
2969
2970 seq = PySequence_Fast(it, "can only join an iterable");
2971 if (seq == NULL)
2972 return NULL;
2973 n = PySequence_Fast_GET_SIZE(seq);
2974 items = PySequence_Fast_ITEMS(seq);
2975
2976 /* Compute the total size, and check that they are all bytes */
2977 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2978 for (i = 0; i < n; i++) {
2979 PyObject *obj = items[i];
2980 if (!PyByteArray_Check(obj) && !PyBytes_Check(obj)) {
2981 PyErr_Format(PyExc_TypeError,
2982 "can only join an iterable of bytes "
2983 "(item %ld has type '%.100s')",
2984 /* XXX %ld isn't right on Win64 */
2985 (long)i, Py_TYPE(obj)->tp_name);
2986 goto error;
2987 }
2988 if (i > 0)
2989 totalsize += mysize;
2990 totalsize += Py_SIZE(obj);
2991 if (totalsize < 0) {
2992 PyErr_NoMemory();
2993 goto error;
2994 }
2995 }
2996
2997 /* Allocate the result, and copy the bytes */
2998 result = PyByteArray_FromStringAndSize(NULL, totalsize);
2999 if (result == NULL)
3000 goto error;
3001 dest = PyByteArray_AS_STRING(result);
3002 for (i = 0; i < n; i++) {
3003 PyObject *obj = items[i];
3004 Py_ssize_t size = Py_SIZE(obj);
3005 char *buf;
3006 if (PyByteArray_Check(obj))
3007 buf = PyByteArray_AS_STRING(obj);
3008 else
3009 buf = PyBytes_AS_STRING(obj);
3010 if (i) {
3011 memcpy(dest, self->ob_bytes, mysize);
3012 dest += mysize;
3013 }
3014 memcpy(dest, buf, size);
3015 dest += size;
3016 }
3017
3018 /* Done */
3019 Py_DECREF(seq);
3020 return result;
3021
3022 /* Error handling */
3023 error:
3024 Py_DECREF(seq);
3025 return NULL;
3026}
3027
3028PyDoc_STRVAR(fromhex_doc,
3029"bytearray.fromhex(string) -> bytearray\n\
3030\n\
3031Create a bytearray object from a string of hexadecimal numbers.\n\
3032Spaces between two numbers are accepted.\n\
3033Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
3034
3035static int
3036hex_digit_to_int(Py_UNICODE c)
3037{
3038 if (c >= 128)
3039 return -1;
3040 if (ISDIGIT(c))
3041 return c - '0';
3042 else {
3043 if (ISUPPER(c))
3044 c = TOLOWER(c);
3045 if (c >= 'a' && c <= 'f')
3046 return c - 'a' + 10;
3047 }
3048 return -1;
3049}
3050
3051static PyObject *
3052bytes_fromhex(PyObject *cls, PyObject *args)
3053{
3054 PyObject *newbytes, *hexobj;
3055 char *buf;
3056 Py_UNICODE *hex;
3057 Py_ssize_t hexlen, byteslen, i, j;
3058 int top, bot;
3059
3060 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
3061 return NULL;
3062 assert(PyUnicode_Check(hexobj));
3063 hexlen = PyUnicode_GET_SIZE(hexobj);
3064 hex = PyUnicode_AS_UNICODE(hexobj);
3065 byteslen = hexlen/2; /* This overestimates if there are spaces */
3066 newbytes = PyByteArray_FromStringAndSize(NULL, byteslen);
3067 if (!newbytes)
3068 return NULL;
3069 buf = PyByteArray_AS_STRING(newbytes);
3070 for (i = j = 0; i < hexlen; i += 2) {
3071 /* skip over spaces in the input */
3072 while (hex[i] == ' ')
3073 i++;
3074 if (i >= hexlen)
3075 break;
3076 top = hex_digit_to_int(hex[i]);
3077 bot = hex_digit_to_int(hex[i+1]);
3078 if (top == -1 || bot == -1) {
3079 PyErr_Format(PyExc_ValueError,
3080 "non-hexadecimal number found in "
3081 "fromhex() arg at position %zd", i);
3082 goto error;
3083 }
3084 buf[j++] = (top << 4) + bot;
3085 }
3086 if (PyByteArray_Resize(newbytes, j) < 0)
3087 goto error;
3088 return newbytes;
3089
3090 error:
3091 Py_DECREF(newbytes);
3092 return NULL;
3093}
3094
3095PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3096
3097static PyObject *
3098bytes_reduce(PyByteArrayObject *self)
3099{
3100 PyObject *latin1, *dict;
3101 if (self->ob_bytes)
3102 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
3103 Py_SIZE(self), NULL);
3104 else
3105 latin1 = PyUnicode_FromString("");
3106
3107 dict = PyObject_GetAttrString((PyObject *)self, "__dict__");
3108 if (dict == NULL) {
3109 PyErr_Clear();
3110 dict = Py_None;
3111 Py_INCREF(dict);
3112 }
3113
3114 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self), latin1, "latin-1", dict);
3115}
3116
Robert Schuppenies9be2ec12008-07-10 15:24:04 +00003117PyDoc_STRVAR(sizeof_doc,
3118"B.__sizeof__() -> int\n\
3119 \n\
3120Returns the size of B in memory, in bytes");
3121static PyObject *
3122bytes_sizeof(PyByteArrayObject *self)
3123{
Georg Brandle9b91212009-04-05 21:26:31 +00003124 Py_ssize_t res;
Robert Schuppenies9be2ec12008-07-10 15:24:04 +00003125
Georg Brandle9b91212009-04-05 21:26:31 +00003126 res = sizeof(PyByteArrayObject) + self->ob_alloc * sizeof(char);
3127 return PyInt_FromSsize_t(res);
Robert Schuppenies9be2ec12008-07-10 15:24:04 +00003128}
3129
Christian Heimes44720832008-05-26 13:01:01 +00003130static PySequenceMethods bytes_as_sequence = {
3131 (lenfunc)bytes_length, /* sq_length */
3132 (binaryfunc)PyByteArray_Concat, /* sq_concat */
3133 (ssizeargfunc)bytes_repeat, /* sq_repeat */
3134 (ssizeargfunc)bytes_getitem, /* sq_item */
3135 0, /* sq_slice */
3136 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
3137 0, /* sq_ass_slice */
3138 (objobjproc)bytes_contains, /* sq_contains */
3139 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
3140 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
3141};
3142
3143static PyMappingMethods bytes_as_mapping = {
3144 (lenfunc)bytes_length,
3145 (binaryfunc)bytes_subscript,
3146 (objobjargproc)bytes_ass_subscript,
3147};
3148
3149static PyBufferProcs bytes_as_buffer = {
3150 (readbufferproc)bytes_buffer_getreadbuf,
3151 (writebufferproc)bytes_buffer_getwritebuf,
3152 (segcountproc)bytes_buffer_getsegcount,
3153 (charbufferproc)bytes_buffer_getcharbuf,
3154 (getbufferproc)bytes_getbuffer,
3155 (releasebufferproc)bytes_releasebuffer,
3156};
3157
3158static PyMethodDef
3159bytes_methods[] = {
3160 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
3161 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
Robert Schuppenies9be2ec12008-07-10 15:24:04 +00003162 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS, sizeof_doc},
Christian Heimes44720832008-05-26 13:01:01 +00003163 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
3164 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3165 _Py_capitalize__doc__},
3166 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3167 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
3168 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
3169 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
3170 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
3171 expandtabs__doc__},
3172 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
3173 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
3174 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
3175 fromhex_doc},
3176 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3177 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
3178 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3179 _Py_isalnum__doc__},
3180 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3181 _Py_isalpha__doc__},
3182 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3183 _Py_isdigit__doc__},
3184 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3185 _Py_islower__doc__},
3186 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3187 _Py_isspace__doc__},
3188 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3189 _Py_istitle__doc__},
3190 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3191 _Py_isupper__doc__},
3192 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
3193 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3194 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
3195 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
3196 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
3197 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
3198 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
3199 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
3200 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
3201 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3202 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3203 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
3204 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
3205 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
3206 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
3207 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
3208 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
3209 splitlines__doc__},
3210 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS ,
3211 startswith__doc__},
3212 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
3213 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3214 _Py_swapcase__doc__},
3215 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
3216 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
3217 translate__doc__},
3218 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3219 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
3220 {NULL}
3221};
3222
3223PyDoc_STRVAR(bytes_doc,
3224"bytearray(iterable_of_ints) -> bytearray.\n\
3225bytearray(string, encoding[, errors]) -> bytearray.\n\
3226bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
3227bytearray(memory_view) -> bytearray.\n\
3228\n\
3229Construct an mutable bytearray object from:\n\
3230 - an iterable yielding integers in range(256)\n\
3231 - a text string encoded using the specified encoding\n\
3232 - a bytes or a bytearray object\n\
3233 - any object implementing the buffer API.\n\
3234\n\
3235bytearray(int) -> bytearray.\n\
3236\n\
3237Construct a zero-initialized bytearray of the given length.");
3238
3239
3240static PyObject *bytes_iter(PyObject *seq);
3241
3242PyTypeObject PyByteArray_Type = {
3243 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3244 "bytearray",
3245 sizeof(PyByteArrayObject),
3246 0,
3247 (destructor)bytes_dealloc, /* tp_dealloc */
3248 0, /* tp_print */
3249 0, /* tp_getattr */
3250 0, /* tp_setattr */
3251 0, /* tp_compare */
3252 (reprfunc)bytes_repr, /* tp_repr */
3253 0, /* tp_as_number */
3254 &bytes_as_sequence, /* tp_as_sequence */
3255 &bytes_as_mapping, /* tp_as_mapping */
3256 0, /* tp_hash */
3257 0, /* tp_call */
3258 bytes_str, /* tp_str */
3259 PyObject_GenericGetAttr, /* tp_getattro */
3260 0, /* tp_setattro */
3261 &bytes_as_buffer, /* tp_as_buffer */
3262 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3263 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3264 bytes_doc, /* tp_doc */
3265 0, /* tp_traverse */
3266 0, /* tp_clear */
3267 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3268 0, /* tp_weaklistoffset */
3269 bytes_iter, /* tp_iter */
3270 0, /* tp_iternext */
3271 bytes_methods, /* tp_methods */
3272 0, /* tp_members */
3273 0, /* tp_getset */
3274 0, /* tp_base */
3275 0, /* tp_dict */
3276 0, /* tp_descr_get */
3277 0, /* tp_descr_set */
3278 0, /* tp_dictoffset */
3279 (initproc)bytes_init, /* tp_init */
3280 PyType_GenericAlloc, /* tp_alloc */
3281 PyType_GenericNew, /* tp_new */
3282 PyObject_Del, /* tp_free */
3283};
3284
3285/*********************** Bytes Iterator ****************************/
3286
3287typedef struct {
3288 PyObject_HEAD
3289 Py_ssize_t it_index;
3290 PyByteArrayObject *it_seq; /* Set to NULL when iterator is exhausted */
3291} bytesiterobject;
3292
3293static void
3294bytesiter_dealloc(bytesiterobject *it)
3295{
3296 _PyObject_GC_UNTRACK(it);
3297 Py_XDECREF(it->it_seq);
3298 PyObject_GC_Del(it);
3299}
3300
3301static int
3302bytesiter_traverse(bytesiterobject *it, visitproc visit, void *arg)
3303{
3304 Py_VISIT(it->it_seq);
3305 return 0;
3306}
3307
3308static PyObject *
3309bytesiter_next(bytesiterobject *it)
3310{
3311 PyByteArrayObject *seq;
3312 PyObject *item;
3313
3314 assert(it != NULL);
3315 seq = it->it_seq;
3316 if (seq == NULL)
3317 return NULL;
3318 assert(PyByteArray_Check(seq));
3319
3320 if (it->it_index < PyByteArray_GET_SIZE(seq)) {
3321 item = PyInt_FromLong(
3322 (unsigned char)seq->ob_bytes[it->it_index]);
3323 if (item != NULL)
3324 ++it->it_index;
3325 return item;
3326 }
3327
3328 Py_DECREF(seq);
3329 it->it_seq = NULL;
3330 return NULL;
3331}
3332
3333static PyObject *
3334bytesiter_length_hint(bytesiterobject *it)
3335{
3336 Py_ssize_t len = 0;
3337 if (it->it_seq)
3338 len = PyByteArray_GET_SIZE(it->it_seq) - it->it_index;
3339 return PyInt_FromSsize_t(len);
3340}
3341
3342PyDoc_STRVAR(length_hint_doc,
3343 "Private method returning an estimate of len(list(it)).");
3344
3345static PyMethodDef bytesiter_methods[] = {
3346 {"__length_hint__", (PyCFunction)bytesiter_length_hint, METH_NOARGS,
3347 length_hint_doc},
3348 {NULL, NULL} /* sentinel */
3349};
3350
3351PyTypeObject PyByteArrayIter_Type = {
3352 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3353 "bytearray_iterator", /* tp_name */
3354 sizeof(bytesiterobject), /* tp_basicsize */
3355 0, /* tp_itemsize */
3356 /* methods */
3357 (destructor)bytesiter_dealloc, /* tp_dealloc */
3358 0, /* tp_print */
3359 0, /* tp_getattr */
3360 0, /* tp_setattr */
3361 0, /* tp_compare */
3362 0, /* tp_repr */
3363 0, /* tp_as_number */
3364 0, /* tp_as_sequence */
3365 0, /* tp_as_mapping */
3366 0, /* tp_hash */
3367 0, /* tp_call */
3368 0, /* tp_str */
3369 PyObject_GenericGetAttr, /* tp_getattro */
3370 0, /* tp_setattro */
3371 0, /* tp_as_buffer */
3372 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3373 0, /* tp_doc */
3374 (traverseproc)bytesiter_traverse, /* tp_traverse */
3375 0, /* tp_clear */
3376 0, /* tp_richcompare */
3377 0, /* tp_weaklistoffset */
3378 PyObject_SelfIter, /* tp_iter */
3379 (iternextfunc)bytesiter_next, /* tp_iternext */
3380 bytesiter_methods, /* tp_methods */
3381 0,
3382};
3383
3384static PyObject *
3385bytes_iter(PyObject *seq)
3386{
3387 bytesiterobject *it;
3388
3389 if (!PyByteArray_Check(seq)) {
3390 PyErr_BadInternalCall();
3391 return NULL;
3392 }
3393 it = PyObject_GC_New(bytesiterobject, &PyByteArrayIter_Type);
3394 if (it == NULL)
3395 return NULL;
3396 it->it_index = 0;
3397 Py_INCREF(seq);
3398 it->it_seq = (PyByteArrayObject *)seq;
3399 _PyObject_GC_TRACK(it);
3400 return (PyObject *)it;
3401}