blob: 213dbfc8911b67c218fcdc8d41ca97cc153cb05b [file] [log] [blame]
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001/* Bytes object implementation */
2
3/* XXX TO DO: optimizations */
4
5#define PY_SSIZE_T_CLEAN
6#include "Python.h"
Guido van Rossuma0867f72006-05-05 04:34:18 +00007#include "structmember.h"
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00008
Neal Norwitz6968b052007-02-27 19:02:19 +00009/* The nullbytes are used by the stringlib during partition.
10 * If partition is removed from bytes, nullbytes and its helper
11 * Init/Fini should also be removed.
12 */
13static PyBytesObject *nullbytes = NULL;
14
15void
16PyBytes_Fini(void)
17{
18 Py_CLEAR(nullbytes);
19}
20
21int
22PyBytes_Init(void)
23{
24 nullbytes = PyObject_New(PyBytesObject, &PyBytes_Type);
25 if (nullbytes == NULL)
26 return 0;
27 nullbytes->ob_bytes = NULL;
28 nullbytes->ob_size = nullbytes->ob_alloc = 0;
29 return 1;
30}
31
32/* end nullbytes support */
33
Guido van Rossumad7d8d12007-04-13 01:39:34 +000034/* Helpers */
35
36static int
37_getbytevalue(PyObject* arg, int *value)
Neal Norwitz6968b052007-02-27 19:02:19 +000038{
39 PyObject *intarg = PyNumber_Int(arg);
40 if (! intarg)
41 return 0;
42 *value = PyInt_AsLong(intarg);
43 Py_DECREF(intarg);
44 if (*value < 0 || *value >= 256) {
45 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
46 return 0;
47 }
48 return 1;
49}
50
Guido van Rossumad7d8d12007-04-13 01:39:34 +000051Py_ssize_t
52_getbuffer(PyObject *obj, void **ptr)
53{
54 PyBufferProcs *buffer = obj->ob_type->tp_as_buffer;
55
56 if (buffer == NULL ||
57 PyUnicode_Check(obj) ||
58 buffer->bf_getreadbuffer == NULL ||
59 buffer->bf_getsegcount == NULL ||
60 buffer->bf_getsegcount(obj, NULL) != 1)
61 {
62 *ptr = NULL;
63 return -1;
64 }
65
66 return buffer->bf_getreadbuffer(obj, 0, ptr);
67}
68
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000069/* Direct API functions */
70
71PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +000072PyBytes_FromObject(PyObject *input)
73{
74 return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
75 input, NULL);
76}
77
78PyObject *
79PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000080{
81 PyBytesObject *new;
82
Guido van Rossumd624f182006-04-24 13:47:05 +000083 assert(size >= 0);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000084
85 new = PyObject_New(PyBytesObject, &PyBytes_Type);
86 if (new == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +000087 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000088
Guido van Rossumd624f182006-04-24 13:47:05 +000089 if (size == 0)
90 new->ob_bytes = NULL;
91 else {
92 new->ob_bytes = PyMem_Malloc(size);
93 if (new->ob_bytes == NULL) {
94 Py_DECREF(new);
95 return NULL;
96 }
97 if (bytes != NULL)
98 memcpy(new->ob_bytes, bytes, size);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000099 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000100 new->ob_size = new->ob_alloc = size;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000101
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000102 return (PyObject *)new;
103}
104
105Py_ssize_t
106PyBytes_Size(PyObject *self)
107{
108 assert(self != NULL);
109 assert(PyBytes_Check(self));
110
Guido van Rossum20188312006-05-05 15:15:40 +0000111 return PyBytes_GET_SIZE(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000112}
113
114char *
115PyBytes_AsString(PyObject *self)
116{
117 assert(self != NULL);
118 assert(PyBytes_Check(self));
119
Guido van Rossum20188312006-05-05 15:15:40 +0000120 return PyBytes_AS_STRING(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000121}
122
123int
124PyBytes_Resize(PyObject *self, Py_ssize_t size)
125{
126 void *sval;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000127 Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000128
129 assert(self != NULL);
130 assert(PyBytes_Check(self));
131 assert(size >= 0);
132
Guido van Rossuma0867f72006-05-05 04:34:18 +0000133 if (size < alloc / 2) {
134 /* Major downsize; resize down to exact size */
135 alloc = size;
136 }
137 else if (size <= alloc) {
138 /* Within allocated size; quick exit */
139 ((PyBytesObject *)self)->ob_size = size;
140 return 0;
141 }
142 else if (size <= alloc * 1.125) {
143 /* Moderate upsize; overallocate similar to list_resize() */
144 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
145 }
146 else {
147 /* Major upsize; resize up to exact size */
148 alloc = size;
149 }
150
151 sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000152 if (sval == NULL) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000153 PyErr_NoMemory();
154 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000155 }
156
Guido van Rossumd624f182006-04-24 13:47:05 +0000157 ((PyBytesObject *)self)->ob_bytes = sval;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000158 ((PyBytesObject *)self)->ob_size = size;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000159 ((PyBytesObject *)self)->ob_alloc = alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000160
161 return 0;
162}
163
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000164PyObject *
165PyBytes_Concat(PyObject *a, PyObject *b)
166{
167 Py_ssize_t asize, bsize, size;
168 void *aptr, *bptr;
169 PyBytesObject *result;
170
171 asize = _getbuffer(a, &aptr);
172 bsize = _getbuffer(b, &bptr);
173 if (asize < 0 || bsize < 0) {
174 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
175 a->ob_type->tp_name, b->ob_type->tp_name);
176 return NULL;
177 }
178
179 size = asize + bsize;
180 if (size < 0)
181 return PyErr_NoMemory();
182
183 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
184 if (result != NULL) {
185 memcpy(result->ob_bytes, aptr, asize);
186 memcpy(result->ob_bytes + asize, bptr, bsize);
187 }
188 return (PyObject *)result;
189}
190
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000191/* Functions stuffed into the type object */
192
193static Py_ssize_t
194bytes_length(PyBytesObject *self)
195{
196 return self->ob_size;
197}
198
199static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000200bytes_concat(PyBytesObject *self, PyObject *other)
201{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000202 return PyBytes_Concat((PyObject *)self, other);
Guido van Rossumd624f182006-04-24 13:47:05 +0000203}
204
205static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000206bytes_iconcat(PyBytesObject *self, PyObject *other)
207{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000208 void *optr;
Guido van Rossum13e57212006-04-27 22:54:26 +0000209 Py_ssize_t osize;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000210 Py_ssize_t mysize;
Guido van Rossum13e57212006-04-27 22:54:26 +0000211 Py_ssize_t size;
212
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000213 osize = _getbuffer(other, &optr);
214 if (osize < 0) {
Guido van Rossum13e57212006-04-27 22:54:26 +0000215 PyErr_Format(PyExc_TypeError,
216 "can't concat bytes to %.100s", other->ob_type->tp_name);
217 return NULL;
218 }
219
220 mysize = self->ob_size;
Guido van Rossum13e57212006-04-27 22:54:26 +0000221 size = mysize + osize;
222 if (size < 0)
223 return PyErr_NoMemory();
Guido van Rossuma0867f72006-05-05 04:34:18 +0000224 if (size <= self->ob_alloc)
225 self->ob_size = size;
226 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000227 return NULL;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000228 memcpy(self->ob_bytes + mysize, optr, osize);
Guido van Rossum13e57212006-04-27 22:54:26 +0000229 Py_INCREF(self);
230 return (PyObject *)self;
231}
232
233static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000234bytes_repeat(PyBytesObject *self, Py_ssize_t count)
235{
236 PyBytesObject *result;
237 Py_ssize_t mysize;
238 Py_ssize_t size;
239
240 if (count < 0)
241 count = 0;
242 mysize = self->ob_size;
243 size = mysize * count;
244 if (count != 0 && size / count != mysize)
245 return PyErr_NoMemory();
246 result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
247 if (result != NULL && size != 0) {
248 if (mysize == 1)
249 memset(result->ob_bytes, self->ob_bytes[0], size);
250 else {
Guido van Rossum13e57212006-04-27 22:54:26 +0000251 Py_ssize_t i;
Guido van Rossumd624f182006-04-24 13:47:05 +0000252 for (i = 0; i < count; i++)
253 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
254 }
255 }
256 return (PyObject *)result;
257}
258
259static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000260bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
261{
262 Py_ssize_t mysize;
263 Py_ssize_t size;
264
265 if (count < 0)
266 count = 0;
267 mysize = self->ob_size;
268 size = mysize * count;
269 if (count != 0 && size / count != mysize)
270 return PyErr_NoMemory();
Guido van Rossuma0867f72006-05-05 04:34:18 +0000271 if (size <= self->ob_alloc)
272 self->ob_size = size;
273 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000274 return NULL;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000275
Guido van Rossum13e57212006-04-27 22:54:26 +0000276 if (mysize == 1)
277 memset(self->ob_bytes, self->ob_bytes[0], size);
278 else {
279 Py_ssize_t i;
280 for (i = 1; i < count; i++)
281 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
282 }
283
284 Py_INCREF(self);
285 return (PyObject *)self;
286}
287
288static int
289bytes_substring(PyBytesObject *self, PyBytesObject *other)
290{
291 Py_ssize_t i;
292
293 if (other->ob_size == 1) {
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000294 return memchr(self->ob_bytes, other->ob_bytes[0],
Guido van Rossum13e57212006-04-27 22:54:26 +0000295 self->ob_size) != NULL;
296 }
297 if (other->ob_size == 0)
298 return 1; /* Edge case */
299 for (i = 0; i + other->ob_size <= self->ob_size; i++) {
300 /* XXX Yeah, yeah, lots of optimizations possible... */
301 if (memcmp(self->ob_bytes + i, other->ob_bytes, other->ob_size) == 0)
302 return 1;
303 }
304 return 0;
305}
306
307static int
308bytes_contains(PyBytesObject *self, PyObject *value)
309{
310 Py_ssize_t ival;
311
312 if (PyBytes_Check(value))
313 return bytes_substring(self, (PyBytesObject *)value);
314
Thomas Woutersd204a712006-08-22 13:41:17 +0000315 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossum13e57212006-04-27 22:54:26 +0000316 if (ival == -1 && PyErr_Occurred())
317 return -1;
Guido van Rossum13e57212006-04-27 22:54:26 +0000318 if (ival < 0 || ival >= 256) {
319 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
320 return -1;
321 }
322
323 return memchr(self->ob_bytes, ival, self->ob_size) != NULL;
324}
325
326static PyObject *
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000327bytes_getitem(PyBytesObject *self, Py_ssize_t i)
328{
329 if (i < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000330 i += self->ob_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000331 if (i < 0 || i >= self->ob_size) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000332 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
333 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000334 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000335 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
336}
337
338static PyObject *
Thomas Wouters376446d2006-12-19 08:30:14 +0000339bytes_subscript(PyBytesObject *self, PyObject *item)
Guido van Rossumd624f182006-04-24 13:47:05 +0000340{
Thomas Wouters376446d2006-12-19 08:30:14 +0000341 if (PyIndex_Check(item)) {
342 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000343
Thomas Wouters376446d2006-12-19 08:30:14 +0000344 if (i == -1 && PyErr_Occurred())
345 return NULL;
346
347 if (i < 0)
348 i += PyBytes_GET_SIZE(self);
349
350 if (i < 0 || i >= self->ob_size) {
351 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
352 return NULL;
353 }
354 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
355 }
356 else if (PySlice_Check(item)) {
357 Py_ssize_t start, stop, step, slicelength, cur, i;
358 if (PySlice_GetIndicesEx((PySliceObject *)item,
359 PyBytes_GET_SIZE(self),
360 &start, &stop, &step, &slicelength) < 0) {
361 return NULL;
362 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000363
Thomas Wouters376446d2006-12-19 08:30:14 +0000364 if (slicelength <= 0)
365 return PyBytes_FromStringAndSize("", 0);
366 else if (step == 1) {
367 return PyBytes_FromStringAndSize(self->ob_bytes + start,
368 slicelength);
369 }
370 else {
371 char *source_buf = PyBytes_AS_STRING(self);
372 char *result_buf = (char *)PyMem_Malloc(slicelength);
373 PyObject *result;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000374
Thomas Wouters376446d2006-12-19 08:30:14 +0000375 if (result_buf == NULL)
376 return PyErr_NoMemory();
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000377
Thomas Wouters376446d2006-12-19 08:30:14 +0000378 for (cur = start, i = 0; i < slicelength;
379 cur += step, i++) {
380 result_buf[i] = source_buf[cur];
381 }
382 result = PyBytes_FromStringAndSize(result_buf, slicelength);
383 PyMem_Free(result_buf);
384 return result;
385 }
386 }
387 else {
388 PyErr_SetString(PyExc_TypeError, "bytes indices must be integers");
389 return NULL;
390 }
391}
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000392
Guido van Rossumd624f182006-04-24 13:47:05 +0000393static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000394bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
Guido van Rossumd624f182006-04-24 13:47:05 +0000395 PyObject *values)
396{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000397 Py_ssize_t avail, needed;
398 void *bytes;
Guido van Rossumd624f182006-04-24 13:47:05 +0000399
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000400 if (values == (PyObject *)self) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000401 /* Make a copy an call this function recursively */
402 int err;
403 values = PyBytes_FromObject(values);
404 if (values == NULL)
405 return -1;
406 err = bytes_setslice(self, lo, hi, values);
407 Py_DECREF(values);
408 return err;
409 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000410 if (values == NULL) {
411 /* del b[lo:hi] */
412 bytes = NULL;
413 needed = 0;
414 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000415 else {
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000416 needed = _getbuffer(values, &bytes);
417 if (needed < 0) {
418 PyErr_Format(PyExc_TypeError,
419 "can't set bytes slice from %.100s",
420 values->ob_type->tp_name);
421 return -1;
422 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000423 }
424
425 if (lo < 0)
426 lo = 0;
Thomas Wouters9a6e62b2006-08-23 23:20:29 +0000427 if (hi < lo)
428 hi = lo;
Guido van Rossumd624f182006-04-24 13:47:05 +0000429 if (hi > self->ob_size)
430 hi = self->ob_size;
431
432 avail = hi - lo;
433 if (avail < 0)
434 lo = hi = avail = 0;
435
436 if (avail != needed) {
437 if (avail > needed) {
438 /*
439 0 lo hi old_size
440 | |<----avail----->|<-----tomove------>|
441 | |<-needed->|<-----tomove------>|
442 0 lo new_hi new_size
443 */
444 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
445 self->ob_size - hi);
446 }
Thomas Wouters376446d2006-12-19 08:30:14 +0000447 if (PyBytes_Resize((PyObject *)self,
Guido van Rossumd624f182006-04-24 13:47:05 +0000448 self->ob_size + needed - avail) < 0)
449 return -1;
450 if (avail < needed) {
451 /*
452 0 lo hi old_size
453 | |<-avail->|<-----tomove------>|
454 | |<----needed---->|<-----tomove------>|
455 0 lo new_hi new_size
456 */
457 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
458 self->ob_size - lo - needed);
459 }
460 }
461
462 if (needed > 0)
463 memcpy(self->ob_bytes + lo, bytes, needed);
464
465 return 0;
466}
467
468static int
469bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
470{
471 Py_ssize_t ival;
472
473 if (i < 0)
474 i += self->ob_size;
475
476 if (i < 0 || i >= self->ob_size) {
477 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
478 return -1;
479 }
480
481 if (value == NULL)
482 return bytes_setslice(self, i, i+1, NULL);
483
Thomas Woutersd204a712006-08-22 13:41:17 +0000484 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000485 if (ival == -1 && PyErr_Occurred())
486 return -1;
487
488 if (ival < 0 || ival >= 256) {
489 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
490 return -1;
491 }
492
493 self->ob_bytes[i] = ival;
494 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000495}
496
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000497static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000498bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
499{
500 Py_ssize_t start, stop, step, slicelen, needed;
501 char *bytes;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000502
Thomas Wouters376446d2006-12-19 08:30:14 +0000503 if (PyIndex_Check(item)) {
504 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
505
506 if (i == -1 && PyErr_Occurred())
507 return -1;
508
509 if (i < 0)
510 i += PyBytes_GET_SIZE(self);
511
512 if (i < 0 || i >= self->ob_size) {
513 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
514 return -1;
515 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000516
Thomas Wouters376446d2006-12-19 08:30:14 +0000517 if (values == NULL) {
518 /* Fall through to slice assignment */
519 start = i;
520 stop = i + 1;
521 step = 1;
522 slicelen = 1;
523 }
524 else {
525 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
526 if (ival == -1 && PyErr_Occurred())
527 return -1;
528 if (ival < 0 || ival >= 256) {
529 PyErr_SetString(PyExc_ValueError,
530 "byte must be in range(0, 256)");
531 return -1;
532 }
533 self->ob_bytes[i] = (char)ival;
534 return 0;
535 }
536 }
537 else if (PySlice_Check(item)) {
538 if (PySlice_GetIndicesEx((PySliceObject *)item,
539 PyBytes_GET_SIZE(self),
540 &start, &stop, &step, &slicelen) < 0) {
541 return -1;
542 }
543 }
544 else {
545 PyErr_SetString(PyExc_TypeError, "bytes indices must be integer");
546 return -1;
547 }
548
549 if (values == NULL) {
550 bytes = NULL;
551 needed = 0;
552 }
553 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
554 /* Make a copy an call this function recursively */
555 int err;
556 values = PyBytes_FromObject(values);
557 if (values == NULL)
558 return -1;
559 err = bytes_ass_subscript(self, item, values);
560 Py_DECREF(values);
561 return err;
562 }
563 else {
564 assert(PyBytes_Check(values));
565 bytes = ((PyBytesObject *)values)->ob_bytes;
566 needed = ((PyBytesObject *)values)->ob_size;
567 }
568 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
569 if ((step < 0 && start < stop) ||
570 (step > 0 && start > stop))
571 stop = start;
572 if (step == 1) {
573 if (slicelen != needed) {
574 if (slicelen > needed) {
575 /*
576 0 start stop old_size
577 | |<---slicelen--->|<-----tomove------>|
578 | |<-needed->|<-----tomove------>|
579 0 lo new_hi new_size
580 */
581 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
582 self->ob_size - stop);
583 }
584 if (PyBytes_Resize((PyObject *)self,
585 self->ob_size + needed - slicelen) < 0)
586 return -1;
587 if (slicelen < needed) {
588 /*
589 0 lo hi old_size
590 | |<-avail->|<-----tomove------>|
591 | |<----needed---->|<-----tomove------>|
592 0 lo new_hi new_size
593 */
594 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
595 self->ob_size - start - needed);
596 }
597 }
598
599 if (needed > 0)
600 memcpy(self->ob_bytes + start, bytes, needed);
601
602 return 0;
603 }
604 else {
605 if (needed == 0) {
606 /* Delete slice */
607 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000608
Thomas Wouters376446d2006-12-19 08:30:14 +0000609 if (step < 0) {
610 stop = start + 1;
611 start = stop + step * (slicelen - 1) - 1;
612 step = -step;
613 }
614 for (cur = start, i = 0;
615 i < slicelen; cur += step, i++) {
616 Py_ssize_t lim = step - 1;
617
618 if (cur + step >= PyBytes_GET_SIZE(self))
619 lim = PyBytes_GET_SIZE(self) - cur - 1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000620
Thomas Wouters376446d2006-12-19 08:30:14 +0000621 memmove(self->ob_bytes + cur - i,
622 self->ob_bytes + cur + 1, lim);
623 }
624 /* Move the tail of the bytes, in one chunk */
625 cur = start + slicelen*step;
626 if (cur < PyBytes_GET_SIZE(self)) {
627 memmove(self->ob_bytes + cur - slicelen,
628 self->ob_bytes + cur,
629 PyBytes_GET_SIZE(self) - cur);
630 }
631 if (PyBytes_Resize((PyObject *)self,
632 PyBytes_GET_SIZE(self) - slicelen) < 0)
633 return -1;
634
635 return 0;
636 }
637 else {
638 /* Assign slice */
639 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000640
Thomas Wouters376446d2006-12-19 08:30:14 +0000641 if (needed != slicelen) {
642 PyErr_Format(PyExc_ValueError,
643 "attempt to assign bytes of size %zd "
644 "to extended slice of size %zd",
645 needed, slicelen);
646 return -1;
647 }
648 for (cur = start, i = 0; i < slicelen; cur += step, i++)
649 self->ob_bytes[cur] = bytes[i];
650 return 0;
651 }
652 }
653}
654
655static int
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000656bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
657{
Guido van Rossumd624f182006-04-24 13:47:05 +0000658 static char *kwlist[] = {"source", "encoding", "errors", 0};
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000659 PyObject *arg = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +0000660 const char *encoding = NULL;
661 const char *errors = NULL;
662 Py_ssize_t count;
663 PyObject *it;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000664 PyObject *(*iternext)(PyObject *);
665
Guido van Rossuma0867f72006-05-05 04:34:18 +0000666 if (self->ob_size != 0) {
667 /* Empty previous contents (yes, do this first of all!) */
668 if (PyBytes_Resize((PyObject *)self, 0) < 0)
669 return -1;
670 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000671
Guido van Rossumd624f182006-04-24 13:47:05 +0000672 /* Parse arguments */
673 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
674 &arg, &encoding, &errors))
675 return -1;
676
677 /* Make a quick exit if no first argument */
678 if (arg == NULL) {
679 if (encoding != NULL || errors != NULL) {
680 PyErr_SetString(PyExc_TypeError,
681 "encoding or errors without sequence argument");
682 return -1;
683 }
684 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000685 }
686
Guido van Rossumd624f182006-04-24 13:47:05 +0000687 if (PyUnicode_Check(arg)) {
688 /* Encode via the codec registry */
689 PyObject *encoded;
690 char *bytes;
691 Py_ssize_t size;
692 if (encoding == NULL)
693 encoding = PyUnicode_GetDefaultEncoding();
694 encoded = PyCodec_Encode(arg, encoding, errors);
695 if (encoded == NULL)
696 return -1;
697 if (!PyString_Check(encoded)) {
698 PyErr_Format(PyExc_TypeError,
699 "encoder did not return a string object (type=%.400s)",
700 encoded->ob_type->tp_name);
701 Py_DECREF(encoded);
702 return -1;
703 }
704 bytes = PyString_AS_STRING(encoded);
705 size = PyString_GET_SIZE(encoded);
Guido van Rossuma0867f72006-05-05 04:34:18 +0000706 if (size <= self->ob_alloc)
707 self->ob_size = size;
708 else if (PyBytes_Resize((PyObject *)self, size) < 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000709 Py_DECREF(encoded);
710 return -1;
711 }
712 memcpy(self->ob_bytes, bytes, size);
713 Py_DECREF(encoded);
714 return 0;
715 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000716
Guido van Rossumd624f182006-04-24 13:47:05 +0000717 /* If it's not unicode, there can't be encoding or errors */
718 if (encoding != NULL || errors != NULL) {
719 PyErr_SetString(PyExc_TypeError,
720 "encoding or errors without a string argument");
721 return -1;
722 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000723
Guido van Rossumd624f182006-04-24 13:47:05 +0000724 /* Is it an int? */
Thomas Woutersd204a712006-08-22 13:41:17 +0000725 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000726 if (count == -1 && PyErr_Occurred())
727 PyErr_Clear();
728 else {
729 if (count < 0) {
730 PyErr_SetString(PyExc_ValueError, "negative count");
731 return -1;
732 }
733 if (count > 0) {
734 if (PyBytes_Resize((PyObject *)self, count))
735 return -1;
736 memset(self->ob_bytes, 0, count);
737 }
738 return 0;
739 }
740
741 if (PyObject_CheckReadBuffer(arg)) {
742 const void *bytes;
743 Py_ssize_t size;
744 if (PyObject_AsReadBuffer(arg, &bytes, &size) < 0)
745 return -1;
746 if (PyBytes_Resize((PyObject *)self, size) < 0)
747 return -1;
748 memcpy(self->ob_bytes, bytes, size);
749 return 0;
750 }
751
752 /* XXX Optimize this if the arguments is a list, tuple */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000753
754 /* Get the iterator */
755 it = PyObject_GetIter(arg);
756 if (it == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000757 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000758 iternext = *it->ob_type->tp_iternext;
759
760 /* Run the iterator to exhaustion */
761 for (;;) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000762 PyObject *item;
763 Py_ssize_t value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000764
Guido van Rossumd624f182006-04-24 13:47:05 +0000765 /* Get the next item */
766 item = iternext(it);
767 if (item == NULL) {
768 if (PyErr_Occurred()) {
769 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
770 goto error;
771 PyErr_Clear();
772 }
773 break;
774 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000775
Guido van Rossumd624f182006-04-24 13:47:05 +0000776 /* Interpret it as an int (__index__) */
Thomas Woutersd204a712006-08-22 13:41:17 +0000777 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000778 Py_DECREF(item);
779 if (value == -1 && PyErr_Occurred())
780 goto error;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000781
Guido van Rossumd624f182006-04-24 13:47:05 +0000782 /* Range check */
783 if (value < 0 || value >= 256) {
784 PyErr_SetString(PyExc_ValueError,
785 "bytes must be in range(0, 256)");
786 goto error;
787 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000788
Guido van Rossumd624f182006-04-24 13:47:05 +0000789 /* Append the byte */
Guido van Rossuma0867f72006-05-05 04:34:18 +0000790 if (self->ob_size < self->ob_alloc)
791 self->ob_size++;
792 else if (PyBytes_Resize((PyObject *)self, self->ob_size+1) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000793 goto error;
794 self->ob_bytes[self->ob_size-1] = value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000795 }
796
797 /* Clean up and return success */
798 Py_DECREF(it);
799 return 0;
800
801 error:
802 /* Error handling when it != NULL */
803 Py_DECREF(it);
804 return -1;
805}
806
Georg Brandlee91be42007-02-24 19:41:35 +0000807/* Mostly copied from string_repr, but without the
808 "smart quote" functionality. */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000809static PyObject *
810bytes_repr(PyBytesObject *self)
811{
Georg Brandlee91be42007-02-24 19:41:35 +0000812 size_t newsize = 3 + 4 * self->ob_size;
813 PyObject *v;
814 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != self->ob_size) {
815 PyErr_SetString(PyExc_OverflowError,
816 "bytes object is too large to make repr");
Guido van Rossumd624f182006-04-24 13:47:05 +0000817 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000818 }
Georg Brandlee91be42007-02-24 19:41:35 +0000819 v = PyString_FromStringAndSize((char *)NULL, newsize);
820 if (v == NULL) {
821 return NULL;
822 }
823 else {
824 register Py_ssize_t i;
825 register char c;
826 register char *p;
827 int quote = '\'';
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000828
Georg Brandlee91be42007-02-24 19:41:35 +0000829 p = PyString_AS_STRING(v);
830 *p++ = 'b';
831 *p++ = quote;
832 for (i = 0; i < self->ob_size; i++) {
833 /* There's at least enough room for a hex escape
834 and a closing quote. */
835 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
836 c = self->ob_bytes[i];
837 if (c == quote || c == '\\')
838 *p++ = '\\', *p++ = c;
839 else if (c == '\t')
840 *p++ = '\\', *p++ = 't';
841 else if (c == '\n')
842 *p++ = '\\', *p++ = 'n';
843 else if (c == '\r')
844 *p++ = '\\', *p++ = 'r';
845 else if (c == 0)
846 *p++ = '\\', *p++ = '0';
847 else if (c < ' ' || c >= 0x7f) {
848 /* For performance, we don't want to call
849 PyOS_snprintf here (extra layers of
850 function call). */
851 sprintf(p, "\\x%02x", c & 0xff);
852 p += 4;
853 }
854 else
855 *p++ = c;
856 }
857 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
858 *p++ = quote;
859 *p = '\0';
860 _PyString_Resize(
861 &v, (p - PyString_AS_STRING(v)));
862 return v;
863 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000864}
865
866static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000867bytes_str(PyBytesObject *self)
868{
869 return PyString_FromStringAndSize(self->ob_bytes, self->ob_size);
870}
871
872static PyObject *
Guido van Rossum343e97f2007-04-09 00:43:24 +0000873bytes_richcompare(PyObject *self, PyObject *other, int op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000874{
Guido van Rossum343e97f2007-04-09 00:43:24 +0000875 Py_ssize_t self_size, other_size;
876 void *self_bytes, *other_bytes;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000877 PyObject *res;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000878 Py_ssize_t minsize;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000879 int cmp;
880
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000881 /* Bytes can be compared to anything that supports the (binary) buffer
882 API. Except Unicode. */
Guido van Rossumebea9be2007-04-09 00:49:13 +0000883
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000884 self_size = _getbuffer(self, &self_bytes);
885 if (self_size < 0) {
Guido van Rossumebea9be2007-04-09 00:49:13 +0000886 Py_INCREF(Py_NotImplemented);
887 return Py_NotImplemented;
888 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000889
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000890 other_size = _getbuffer(other, &other_bytes);
891 if (other_size < 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000892 Py_INCREF(Py_NotImplemented);
893 return Py_NotImplemented;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000894 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000895
896 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000897 /* Shortcut: if the lengths differ, the objects differ */
898 cmp = (op == Py_NE);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000899 }
900 else {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000901 minsize = self_size;
902 if (other_size < minsize)
903 minsize = other_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000904
Guido van Rossum343e97f2007-04-09 00:43:24 +0000905 cmp = memcmp(self_bytes, other_bytes, minsize);
Guido van Rossumd624f182006-04-24 13:47:05 +0000906 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000907
Guido van Rossumd624f182006-04-24 13:47:05 +0000908 if (cmp == 0) {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000909 if (self_size < other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +0000910 cmp = -1;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000911 else if (self_size > other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +0000912 cmp = 1;
913 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000914
Guido van Rossumd624f182006-04-24 13:47:05 +0000915 switch (op) {
916 case Py_LT: cmp = cmp < 0; break;
917 case Py_LE: cmp = cmp <= 0; break;
918 case Py_EQ: cmp = cmp == 0; break;
919 case Py_NE: cmp = cmp != 0; break;
920 case Py_GT: cmp = cmp > 0; break;
921 case Py_GE: cmp = cmp >= 0; break;
922 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000923 }
924
925 res = cmp ? Py_True : Py_False;
926 Py_INCREF(res);
927 return res;
928}
929
930static void
931bytes_dealloc(PyBytesObject *self)
932{
Guido van Rossumd624f182006-04-24 13:47:05 +0000933 if (self->ob_bytes != 0) {
934 PyMem_Free(self->ob_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000935 }
936 self->ob_type->tp_free((PyObject *)self);
937}
938
Guido van Rossumd624f182006-04-24 13:47:05 +0000939static Py_ssize_t
940bytes_getbuffer(PyBytesObject *self, Py_ssize_t index, const void **ptr)
941{
942 if (index != 0) {
943 PyErr_SetString(PyExc_SystemError,
Neal Norwitz6968b052007-02-27 19:02:19 +0000944 "accessing non-existent bytes segment");
Guido van Rossumd624f182006-04-24 13:47:05 +0000945 return -1;
946 }
947 *ptr = (void *)self->ob_bytes;
948 return self->ob_size;
949}
950
951static Py_ssize_t
952bytes_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
953{
954 if (lenp)
955 *lenp = self->ob_size;
956 return 1;
957}
958
Neal Norwitz6968b052007-02-27 19:02:19 +0000959
960
961/* -------------------------------------------------------------------- */
962/* Methods */
963
964#define STRINGLIB_CHAR char
965#define STRINGLIB_CMP memcmp
966#define STRINGLIB_LEN PyBytes_GET_SIZE
967#define STRINGLIB_NEW PyBytes_FromStringAndSize
968#define STRINGLIB_EMPTY nullbytes
969
970#include "stringlib/fastsearch.h"
971#include "stringlib/count.h"
972#include "stringlib/find.h"
973#include "stringlib/partition.h"
974
975
976/* The following Py_LOCAL_INLINE and Py_LOCAL functions
977were copied from the old char* style string object. */
978
979Py_LOCAL_INLINE(void)
980_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
981{
982 if (*end > len)
983 *end = len;
984 else if (*end < 0)
985 *end += len;
986 if (*end < 0)
987 *end = 0;
988 if (*start < 0)
989 *start += len;
990 if (*start < 0)
991 *start = 0;
992}
993
994
995Py_LOCAL_INLINE(Py_ssize_t)
996bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
997{
998 PyObject *subobj;
999 const char *sub;
1000 Py_ssize_t sub_len;
1001 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1002
1003 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1004 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1005 return -2;
1006 if (PyBytes_Check(subobj)) {
1007 sub = PyBytes_AS_STRING(subobj);
1008 sub_len = PyBytes_GET_SIZE(subobj);
1009 }
1010 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1011 /* XXX - the "expected a character buffer object" is pretty
1012 confusing for a non-expert. remap to something else ? */
1013 return -2;
1014
1015 if (dir > 0)
1016 return stringlib_find_slice(
1017 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1018 sub, sub_len, start, end);
1019 else
1020 return stringlib_rfind_slice(
1021 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1022 sub, sub_len, start, end);
1023}
1024
1025
1026PyDoc_STRVAR(find__doc__,
1027"B.find(sub [,start [,end]]) -> int\n\
1028\n\
1029Return the lowest index in B where subsection sub is found,\n\
1030such that sub is contained within s[start,end]. Optional\n\
1031arguments start and end are interpreted as in slice notation.\n\
1032\n\
1033Return -1 on failure.");
1034
1035static PyObject *
1036bytes_find(PyBytesObject *self, PyObject *args)
1037{
1038 Py_ssize_t result = bytes_find_internal(self, args, +1);
1039 if (result == -2)
1040 return NULL;
1041 return PyInt_FromSsize_t(result);
1042}
1043
1044PyDoc_STRVAR(count__doc__,
1045"B.count(sub[, start[, end]]) -> int\n\
1046\n\
1047Return the number of non-overlapping occurrences of subsection sub in\n\
1048bytes B[start:end]. Optional arguments start and end are interpreted\n\
1049as in slice notation.");
1050
1051static PyObject *
1052bytes_count(PyBytesObject *self, PyObject *args)
1053{
1054 PyObject *sub_obj;
1055 const char *str = PyBytes_AS_STRING(self), *sub;
1056 Py_ssize_t sub_len;
1057 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1058
1059 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1060 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1061 return NULL;
1062
1063 if (PyBytes_Check(sub_obj)) {
1064 sub = PyBytes_AS_STRING(sub_obj);
1065 sub_len = PyBytes_GET_SIZE(sub_obj);
1066 }
1067 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1068 return NULL;
1069
1070 _adjust_indices(&start, &end, PyString_GET_SIZE(self));
1071
1072 return PyInt_FromSsize_t(
1073 stringlib_count(str + start, end - start, sub, sub_len)
1074 );
1075}
1076
1077
1078PyDoc_STRVAR(index__doc__,
1079"B.index(sub [,start [,end]]) -> int\n\
1080\n\
1081Like B.find() but raise ValueError when the subsection is not found.");
1082
1083static PyObject *
1084bytes_index(PyBytesObject *self, PyObject *args)
1085{
1086 Py_ssize_t result = bytes_find_internal(self, args, +1);
1087 if (result == -2)
1088 return NULL;
1089 if (result == -1) {
1090 PyErr_SetString(PyExc_ValueError,
1091 "subsection not found");
1092 return NULL;
1093 }
1094 return PyInt_FromSsize_t(result);
1095}
1096
1097
1098PyDoc_STRVAR(rfind__doc__,
1099"B.rfind(sub [,start [,end]]) -> int\n\
1100\n\
1101Return the highest index in B where subsection sub is found,\n\
1102such that sub is contained within s[start,end]. Optional\n\
1103arguments start and end are interpreted as in slice notation.\n\
1104\n\
1105Return -1 on failure.");
1106
1107static PyObject *
1108bytes_rfind(PyBytesObject *self, PyObject *args)
1109{
1110 Py_ssize_t result = bytes_find_internal(self, args, -1);
1111 if (result == -2)
1112 return NULL;
1113 return PyInt_FromSsize_t(result);
1114}
1115
1116
1117PyDoc_STRVAR(rindex__doc__,
1118"B.rindex(sub [,start [,end]]) -> int\n\
1119\n\
1120Like B.rfind() but raise ValueError when the subsection is not found.");
1121
1122static PyObject *
1123bytes_rindex(PyBytesObject *self, PyObject *args)
1124{
1125 Py_ssize_t result = bytes_find_internal(self, args, -1);
1126 if (result == -2)
1127 return NULL;
1128 if (result == -1) {
1129 PyErr_SetString(PyExc_ValueError,
1130 "subsection not found");
1131 return NULL;
1132 }
1133 return PyInt_FromSsize_t(result);
1134}
1135
1136
1137/* Matches the end (direction >= 0) or start (direction < 0) of self
1138 * against substr, using the start and end arguments. Returns
1139 * -1 on error, 0 if not found and 1 if found.
1140 */
1141Py_LOCAL(int)
1142_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
1143 Py_ssize_t end, int direction)
1144{
1145 Py_ssize_t len = PyBytes_GET_SIZE(self);
1146 Py_ssize_t slen;
1147 const char* sub;
1148 const char* str;
1149
1150 if (PyBytes_Check(substr)) {
1151 sub = PyBytes_AS_STRING(substr);
1152 slen = PyBytes_GET_SIZE(substr);
1153 }
1154 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
1155 return -1;
1156 str = PyBytes_AS_STRING(self);
1157
1158 _adjust_indices(&start, &end, len);
1159
1160 if (direction < 0) {
1161 /* startswith */
1162 if (start+slen > len)
1163 return 0;
1164 } else {
1165 /* endswith */
1166 if (end-start < slen || start > len)
1167 return 0;
1168
1169 if (end-slen > start)
1170 start = end - slen;
1171 }
1172 if (end-start >= slen)
1173 return ! memcmp(str+start, sub, slen);
1174 return 0;
1175}
1176
1177
1178PyDoc_STRVAR(startswith__doc__,
1179"B.startswith(prefix[, start[, end]]) -> bool\n\
1180\n\
1181Return True if B starts with the specified prefix, False otherwise.\n\
1182With optional start, test B beginning at that position.\n\
1183With optional end, stop comparing B at that position.\n\
1184prefix can also be a tuple of strings to try.");
1185
1186static PyObject *
1187bytes_startswith(PyBytesObject *self, PyObject *args)
1188{
1189 Py_ssize_t start = 0;
1190 Py_ssize_t end = PY_SSIZE_T_MAX;
1191 PyObject *subobj;
1192 int result;
1193
1194 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1195 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1196 return NULL;
1197 if (PyTuple_Check(subobj)) {
1198 Py_ssize_t i;
1199 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1200 result = _bytes_tailmatch(self,
1201 PyTuple_GET_ITEM(subobj, i),
1202 start, end, -1);
1203 if (result == -1)
1204 return NULL;
1205 else if (result) {
1206 Py_RETURN_TRUE;
1207 }
1208 }
1209 Py_RETURN_FALSE;
1210 }
1211 result = _bytes_tailmatch(self, subobj, start, end, -1);
1212 if (result == -1)
1213 return NULL;
1214 else
1215 return PyBool_FromLong(result);
1216}
1217
1218PyDoc_STRVAR(endswith__doc__,
1219"B.endswith(suffix[, start[, end]]) -> bool\n\
1220\n\
1221Return True if B ends with the specified suffix, False otherwise.\n\
1222With optional start, test B beginning at that position.\n\
1223With optional end, stop comparing B at that position.\n\
1224suffix can also be a tuple of strings to try.");
1225
1226static PyObject *
1227bytes_endswith(PyBytesObject *self, PyObject *args)
1228{
1229 Py_ssize_t start = 0;
1230 Py_ssize_t end = PY_SSIZE_T_MAX;
1231 PyObject *subobj;
1232 int result;
1233
1234 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1235 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1236 return NULL;
1237 if (PyTuple_Check(subobj)) {
1238 Py_ssize_t i;
1239 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1240 result = _bytes_tailmatch(self,
1241 PyTuple_GET_ITEM(subobj, i),
1242 start, end, +1);
1243 if (result == -1)
1244 return NULL;
1245 else if (result) {
1246 Py_RETURN_TRUE;
1247 }
1248 }
1249 Py_RETURN_FALSE;
1250 }
1251 result = _bytes_tailmatch(self, subobj, start, end, +1);
1252 if (result == -1)
1253 return NULL;
1254 else
1255 return PyBool_FromLong(result);
1256}
1257
1258
1259
1260PyDoc_STRVAR(translate__doc__,
1261"B.translate(table [,deletechars]) -> bytes\n\
1262\n\
1263Return a copy of the bytes B, where all characters occurring\n\
1264in the optional argument deletechars are removed, and the\n\
1265remaining characters have been mapped through the given\n\
1266translation table, which must be a bytes of length 256.");
1267
1268static PyObject *
1269bytes_translate(PyBytesObject *self, PyObject *args)
1270{
1271 register char *input, *output;
1272 register const char *table;
1273 register Py_ssize_t i, c, changed = 0;
1274 PyObject *input_obj = (PyObject*)self;
1275 const char *table1, *output_start, *del_table=NULL;
1276 Py_ssize_t inlen, tablen, dellen = 0;
1277 PyObject *result;
1278 int trans_table[256];
1279 PyObject *tableobj, *delobj = NULL;
1280
1281 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1282 &tableobj, &delobj))
1283 return NULL;
1284
1285 if (PyBytes_Check(tableobj)) {
1286 table1 = PyBytes_AS_STRING(tableobj);
1287 tablen = PyBytes_GET_SIZE(tableobj);
1288 }
1289 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1290 return NULL;
1291
1292 if (tablen != 256) {
1293 PyErr_SetString(PyExc_ValueError,
1294 "translation table must be 256 characters long");
1295 return NULL;
1296 }
1297
1298 if (delobj != NULL) {
1299 if (PyBytes_Check(delobj)) {
1300 del_table = PyBytes_AS_STRING(delobj);
1301 dellen = PyBytes_GET_SIZE(delobj);
1302 }
1303 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1304 return NULL;
1305 }
1306 else {
1307 del_table = NULL;
1308 dellen = 0;
1309 }
1310
1311 table = table1;
1312 inlen = PyBytes_GET_SIZE(input_obj);
1313 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1314 if (result == NULL)
1315 return NULL;
1316 output_start = output = PyBytes_AsString(result);
1317 input = PyBytes_AS_STRING(input_obj);
1318
1319 if (dellen == 0) {
1320 /* If no deletions are required, use faster code */
1321 for (i = inlen; --i >= 0; ) {
1322 c = Py_CHARMASK(*input++);
1323 if (Py_CHARMASK((*output++ = table[c])) != c)
1324 changed = 1;
1325 }
1326 if (changed || !PyBytes_CheckExact(input_obj))
1327 return result;
1328 Py_DECREF(result);
1329 Py_INCREF(input_obj);
1330 return input_obj;
1331 }
1332
1333 for (i = 0; i < 256; i++)
1334 trans_table[i] = Py_CHARMASK(table[i]);
1335
1336 for (i = 0; i < dellen; i++)
1337 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1338
1339 for (i = inlen; --i >= 0; ) {
1340 c = Py_CHARMASK(*input++);
1341 if (trans_table[c] != -1)
1342 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1343 continue;
1344 changed = 1;
1345 }
1346 if (!changed && PyBytes_CheckExact(input_obj)) {
1347 Py_DECREF(result);
1348 Py_INCREF(input_obj);
1349 return input_obj;
1350 }
1351 /* Fix the size of the resulting string */
1352 if (inlen > 0)
1353 PyBytes_Resize(result, output - output_start);
1354 return result;
1355}
1356
1357
1358#define FORWARD 1
1359#define REVERSE -1
1360
1361/* find and count characters and substrings */
1362
1363#define findchar(target, target_len, c) \
1364 ((char *)memchr((const void *)(target), c, target_len))
1365
1366/* Don't call if length < 2 */
1367#define Py_STRING_MATCH(target, offset, pattern, length) \
1368 (target[offset] == pattern[0] && \
1369 target[offset+length-1] == pattern[length-1] && \
1370 !memcmp(target+offset+1, pattern+1, length-2) )
1371
1372
1373/* Bytes ops must return a string. */
1374/* If the object is subclass of bytes, create a copy */
1375Py_LOCAL(PyBytesObject *)
1376return_self(PyBytesObject *self)
1377{
1378 if (PyBytes_CheckExact(self)) {
1379 Py_INCREF(self);
1380 return (PyBytesObject *)self;
1381 }
1382 return (PyBytesObject *)PyBytes_FromStringAndSize(
1383 PyBytes_AS_STRING(self),
1384 PyBytes_GET_SIZE(self));
1385}
1386
1387Py_LOCAL_INLINE(Py_ssize_t)
1388countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1389{
1390 Py_ssize_t count=0;
1391 const char *start=target;
1392 const char *end=target+target_len;
1393
1394 while ( (start=findchar(start, end-start, c)) != NULL ) {
1395 count++;
1396 if (count >= maxcount)
1397 break;
1398 start += 1;
1399 }
1400 return count;
1401}
1402
1403Py_LOCAL(Py_ssize_t)
1404findstring(const char *target, Py_ssize_t target_len,
1405 const char *pattern, Py_ssize_t pattern_len,
1406 Py_ssize_t start,
1407 Py_ssize_t end,
1408 int direction)
1409{
1410 if (start < 0) {
1411 start += target_len;
1412 if (start < 0)
1413 start = 0;
1414 }
1415 if (end > target_len) {
1416 end = target_len;
1417 } else if (end < 0) {
1418 end += target_len;
1419 if (end < 0)
1420 end = 0;
1421 }
1422
1423 /* zero-length substrings always match at the first attempt */
1424 if (pattern_len == 0)
1425 return (direction > 0) ? start : end;
1426
1427 end -= pattern_len;
1428
1429 if (direction < 0) {
1430 for (; end >= start; end--)
1431 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1432 return end;
1433 } else {
1434 for (; start <= end; start++)
1435 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1436 return start;
1437 }
1438 return -1;
1439}
1440
1441Py_LOCAL_INLINE(Py_ssize_t)
1442countstring(const char *target, Py_ssize_t target_len,
1443 const char *pattern, Py_ssize_t pattern_len,
1444 Py_ssize_t start,
1445 Py_ssize_t end,
1446 int direction, Py_ssize_t maxcount)
1447{
1448 Py_ssize_t count=0;
1449
1450 if (start < 0) {
1451 start += target_len;
1452 if (start < 0)
1453 start = 0;
1454 }
1455 if (end > target_len) {
1456 end = target_len;
1457 } else if (end < 0) {
1458 end += target_len;
1459 if (end < 0)
1460 end = 0;
1461 }
1462
1463 /* zero-length substrings match everywhere */
1464 if (pattern_len == 0 || maxcount == 0) {
1465 if (target_len+1 < maxcount)
1466 return target_len+1;
1467 return maxcount;
1468 }
1469
1470 end -= pattern_len;
1471 if (direction < 0) {
1472 for (; (end >= start); end--)
1473 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1474 count++;
1475 if (--maxcount <= 0) break;
1476 end -= pattern_len-1;
1477 }
1478 } else {
1479 for (; (start <= end); start++)
1480 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1481 count++;
1482 if (--maxcount <= 0)
1483 break;
1484 start += pattern_len-1;
1485 }
1486 }
1487 return count;
1488}
1489
1490
1491/* Algorithms for different cases of string replacement */
1492
1493/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1494Py_LOCAL(PyBytesObject *)
1495replace_interleave(PyBytesObject *self,
1496 const char *to_s, Py_ssize_t to_len,
1497 Py_ssize_t maxcount)
1498{
1499 char *self_s, *result_s;
1500 Py_ssize_t self_len, result_len;
1501 Py_ssize_t count, i, product;
1502 PyBytesObject *result;
1503
1504 self_len = PyBytes_GET_SIZE(self);
1505
1506 /* 1 at the end plus 1 after every character */
1507 count = self_len+1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001508 if (maxcount < count)
Neal Norwitz6968b052007-02-27 19:02:19 +00001509 count = maxcount;
1510
1511 /* Check for overflow */
1512 /* result_len = count * to_len + self_len; */
1513 product = count * to_len;
1514 if (product / to_len != count) {
1515 PyErr_SetString(PyExc_OverflowError,
1516 "replace string is too long");
1517 return NULL;
1518 }
1519 result_len = product + self_len;
1520 if (result_len < 0) {
1521 PyErr_SetString(PyExc_OverflowError,
1522 "replace string is too long");
1523 return NULL;
1524 }
1525
1526 if (! (result = (PyBytesObject *)
1527 PyBytes_FromStringAndSize(NULL, result_len)) )
1528 return NULL;
1529
1530 self_s = PyBytes_AS_STRING(self);
1531 result_s = PyBytes_AS_STRING(result);
1532
1533 /* TODO: special case single character, which doesn't need memcpy */
1534
1535 /* Lay the first one down (guaranteed this will occur) */
1536 Py_MEMCPY(result_s, to_s, to_len);
1537 result_s += to_len;
1538 count -= 1;
1539
1540 for (i=0; i<count; i++) {
1541 *result_s++ = *self_s++;
1542 Py_MEMCPY(result_s, to_s, to_len);
1543 result_s += to_len;
1544 }
1545
1546 /* Copy the rest of the original string */
1547 Py_MEMCPY(result_s, self_s, self_len-i);
1548
1549 return result;
1550}
1551
1552/* Special case for deleting a single character */
1553/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1554Py_LOCAL(PyBytesObject *)
1555replace_delete_single_character(PyBytesObject *self,
1556 char from_c, Py_ssize_t maxcount)
1557{
1558 char *self_s, *result_s;
1559 char *start, *next, *end;
1560 Py_ssize_t self_len, result_len;
1561 Py_ssize_t count;
1562 PyBytesObject *result;
1563
1564 self_len = PyBytes_GET_SIZE(self);
1565 self_s = PyBytes_AS_STRING(self);
1566
1567 count = countchar(self_s, self_len, from_c, maxcount);
1568 if (count == 0) {
1569 return return_self(self);
1570 }
1571
1572 result_len = self_len - count; /* from_len == 1 */
1573 assert(result_len>=0);
1574
1575 if ( (result = (PyBytesObject *)
1576 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1577 return NULL;
1578 result_s = PyBytes_AS_STRING(result);
1579
1580 start = self_s;
1581 end = self_s + self_len;
1582 while (count-- > 0) {
1583 next = findchar(start, end-start, from_c);
1584 if (next == NULL)
1585 break;
1586 Py_MEMCPY(result_s, start, next-start);
1587 result_s += (next-start);
1588 start = next+1;
1589 }
1590 Py_MEMCPY(result_s, start, end-start);
1591
1592 return result;
1593}
1594
1595/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1596
1597Py_LOCAL(PyBytesObject *)
1598replace_delete_substring(PyBytesObject *self,
1599 const char *from_s, Py_ssize_t from_len,
1600 Py_ssize_t maxcount)
1601{
1602 char *self_s, *result_s;
1603 char *start, *next, *end;
1604 Py_ssize_t self_len, result_len;
1605 Py_ssize_t count, offset;
1606 PyBytesObject *result;
1607
1608 self_len = PyBytes_GET_SIZE(self);
1609 self_s = PyBytes_AS_STRING(self);
1610
1611 count = countstring(self_s, self_len,
1612 from_s, from_len,
1613 0, self_len, 1,
1614 maxcount);
1615
1616 if (count == 0) {
1617 /* no matches */
1618 return return_self(self);
1619 }
1620
1621 result_len = self_len - (count * from_len);
1622 assert (result_len>=0);
1623
1624 if ( (result = (PyBytesObject *)
1625 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1626 return NULL;
1627
1628 result_s = PyBytes_AS_STRING(result);
1629
1630 start = self_s;
1631 end = self_s + self_len;
1632 while (count-- > 0) {
1633 offset = findstring(start, end-start,
1634 from_s, from_len,
1635 0, end-start, FORWARD);
1636 if (offset == -1)
1637 break;
1638 next = start + offset;
1639
1640 Py_MEMCPY(result_s, start, next-start);
1641
1642 result_s += (next-start);
1643 start = next+from_len;
1644 }
1645 Py_MEMCPY(result_s, start, end-start);
1646 return result;
1647}
1648
1649/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1650Py_LOCAL(PyBytesObject *)
1651replace_single_character_in_place(PyBytesObject *self,
1652 char from_c, char to_c,
1653 Py_ssize_t maxcount)
1654{
1655 char *self_s, *result_s, *start, *end, *next;
1656 Py_ssize_t self_len;
1657 PyBytesObject *result;
1658
1659 /* The result string will be the same size */
1660 self_s = PyBytes_AS_STRING(self);
1661 self_len = PyBytes_GET_SIZE(self);
1662
1663 next = findchar(self_s, self_len, from_c);
1664
1665 if (next == NULL) {
1666 /* No matches; return the original bytes */
1667 return return_self(self);
1668 }
1669
1670 /* Need to make a new bytes */
1671 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1672 if (result == NULL)
1673 return NULL;
1674 result_s = PyBytes_AS_STRING(result);
1675 Py_MEMCPY(result_s, self_s, self_len);
1676
1677 /* change everything in-place, starting with this one */
1678 start = result_s + (next-self_s);
1679 *start = to_c;
1680 start++;
1681 end = result_s + self_len;
1682
1683 while (--maxcount > 0) {
1684 next = findchar(start, end-start, from_c);
1685 if (next == NULL)
1686 break;
1687 *next = to_c;
1688 start = next+1;
1689 }
1690
1691 return result;
1692}
1693
1694/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1695Py_LOCAL(PyBytesObject *)
1696replace_substring_in_place(PyBytesObject *self,
1697 const char *from_s, Py_ssize_t from_len,
1698 const char *to_s, Py_ssize_t to_len,
1699 Py_ssize_t maxcount)
1700{
1701 char *result_s, *start, *end;
1702 char *self_s;
1703 Py_ssize_t self_len, offset;
1704 PyBytesObject *result;
1705
1706 /* The result bytes will be the same size */
1707
1708 self_s = PyBytes_AS_STRING(self);
1709 self_len = PyBytes_GET_SIZE(self);
1710
1711 offset = findstring(self_s, self_len,
1712 from_s, from_len,
1713 0, self_len, FORWARD);
1714 if (offset == -1) {
1715 /* No matches; return the original bytes */
1716 return return_self(self);
1717 }
1718
1719 /* Need to make a new bytes */
1720 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1721 if (result == NULL)
1722 return NULL;
1723 result_s = PyBytes_AS_STRING(result);
1724 Py_MEMCPY(result_s, self_s, self_len);
1725
1726 /* change everything in-place, starting with this one */
1727 start = result_s + offset;
1728 Py_MEMCPY(start, to_s, from_len);
1729 start += from_len;
1730 end = result_s + self_len;
1731
1732 while ( --maxcount > 0) {
1733 offset = findstring(start, end-start,
1734 from_s, from_len,
1735 0, end-start, FORWARD);
1736 if (offset==-1)
1737 break;
1738 Py_MEMCPY(start+offset, to_s, from_len);
1739 start += offset+from_len;
1740 }
1741
1742 return result;
1743}
1744
1745/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1746Py_LOCAL(PyBytesObject *)
1747replace_single_character(PyBytesObject *self,
1748 char from_c,
1749 const char *to_s, Py_ssize_t to_len,
1750 Py_ssize_t maxcount)
1751{
1752 char *self_s, *result_s;
1753 char *start, *next, *end;
1754 Py_ssize_t self_len, result_len;
1755 Py_ssize_t count, product;
1756 PyBytesObject *result;
1757
1758 self_s = PyBytes_AS_STRING(self);
1759 self_len = PyBytes_GET_SIZE(self);
1760
1761 count = countchar(self_s, self_len, from_c, maxcount);
1762 if (count == 0) {
1763 /* no matches, return unchanged */
1764 return return_self(self);
1765 }
1766
1767 /* use the difference between current and new, hence the "-1" */
1768 /* result_len = self_len + count * (to_len-1) */
1769 product = count * (to_len-1);
1770 if (product / (to_len-1) != count) {
1771 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1772 return NULL;
1773 }
1774 result_len = self_len + product;
1775 if (result_len < 0) {
1776 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1777 return NULL;
1778 }
1779
1780 if ( (result = (PyBytesObject *)
1781 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1782 return NULL;
1783 result_s = PyBytes_AS_STRING(result);
1784
1785 start = self_s;
1786 end = self_s + self_len;
1787 while (count-- > 0) {
1788 next = findchar(start, end-start, from_c);
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001789 if (next == NULL)
Neal Norwitz6968b052007-02-27 19:02:19 +00001790 break;
1791
1792 if (next == start) {
1793 /* replace with the 'to' */
1794 Py_MEMCPY(result_s, to_s, to_len);
1795 result_s += to_len;
1796 start += 1;
1797 } else {
1798 /* copy the unchanged old then the 'to' */
1799 Py_MEMCPY(result_s, start, next-start);
1800 result_s += (next-start);
1801 Py_MEMCPY(result_s, to_s, to_len);
1802 result_s += to_len;
1803 start = next+1;
1804 }
1805 }
1806 /* Copy the remainder of the remaining bytes */
1807 Py_MEMCPY(result_s, start, end-start);
1808
1809 return result;
1810}
1811
1812/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1813Py_LOCAL(PyBytesObject *)
1814replace_substring(PyBytesObject *self,
1815 const char *from_s, Py_ssize_t from_len,
1816 const char *to_s, Py_ssize_t to_len,
1817 Py_ssize_t maxcount)
1818{
1819 char *self_s, *result_s;
1820 char *start, *next, *end;
1821 Py_ssize_t self_len, result_len;
1822 Py_ssize_t count, offset, product;
1823 PyBytesObject *result;
1824
1825 self_s = PyBytes_AS_STRING(self);
1826 self_len = PyBytes_GET_SIZE(self);
1827
1828 count = countstring(self_s, self_len,
1829 from_s, from_len,
1830 0, self_len, FORWARD, maxcount);
1831 if (count == 0) {
1832 /* no matches, return unchanged */
1833 return return_self(self);
1834 }
1835
1836 /* Check for overflow */
1837 /* result_len = self_len + count * (to_len-from_len) */
1838 product = count * (to_len-from_len);
1839 if (product / (to_len-from_len) != count) {
1840 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1841 return NULL;
1842 }
1843 result_len = self_len + product;
1844 if (result_len < 0) {
1845 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1846 return NULL;
1847 }
1848
1849 if ( (result = (PyBytesObject *)
1850 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1851 return NULL;
1852 result_s = PyBytes_AS_STRING(result);
1853
1854 start = self_s;
1855 end = self_s + self_len;
1856 while (count-- > 0) {
1857 offset = findstring(start, end-start,
1858 from_s, from_len,
1859 0, end-start, FORWARD);
1860 if (offset == -1)
1861 break;
1862 next = start+offset;
1863 if (next == start) {
1864 /* replace with the 'to' */
1865 Py_MEMCPY(result_s, to_s, to_len);
1866 result_s += to_len;
1867 start += from_len;
1868 } else {
1869 /* copy the unchanged old then the 'to' */
1870 Py_MEMCPY(result_s, start, next-start);
1871 result_s += (next-start);
1872 Py_MEMCPY(result_s, to_s, to_len);
1873 result_s += to_len;
1874 start = next+from_len;
1875 }
1876 }
1877 /* Copy the remainder of the remaining bytes */
1878 Py_MEMCPY(result_s, start, end-start);
1879
1880 return result;
1881}
1882
1883
1884Py_LOCAL(PyBytesObject *)
1885replace(PyBytesObject *self,
1886 const char *from_s, Py_ssize_t from_len,
1887 const char *to_s, Py_ssize_t to_len,
1888 Py_ssize_t maxcount)
1889{
1890 if (maxcount < 0) {
1891 maxcount = PY_SSIZE_T_MAX;
1892 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
1893 /* nothing to do; return the original bytes */
1894 return return_self(self);
1895 }
1896
1897 if (maxcount == 0 ||
1898 (from_len == 0 && to_len == 0)) {
1899 /* nothing to do; return the original bytes */
1900 return return_self(self);
1901 }
1902
1903 /* Handle zero-length special cases */
1904
1905 if (from_len == 0) {
1906 /* insert the 'to' bytes everywhere. */
1907 /* >>> "Python".replace("", ".") */
1908 /* '.P.y.t.h.o.n.' */
1909 return replace_interleave(self, to_s, to_len, maxcount);
1910 }
1911
1912 /* Except for "".replace("", "A") == "A" there is no way beyond this */
1913 /* point for an empty self bytes to generate a non-empty bytes */
1914 /* Special case so the remaining code always gets a non-empty bytes */
1915 if (PyBytes_GET_SIZE(self) == 0) {
1916 return return_self(self);
1917 }
1918
1919 if (to_len == 0) {
1920 /* delete all occurances of 'from' bytes */
1921 if (from_len == 1) {
1922 return replace_delete_single_character(
1923 self, from_s[0], maxcount);
1924 } else {
1925 return replace_delete_substring(self, from_s, from_len, maxcount);
1926 }
1927 }
1928
1929 /* Handle special case where both bytes have the same length */
1930
1931 if (from_len == to_len) {
1932 if (from_len == 1) {
1933 return replace_single_character_in_place(
1934 self,
1935 from_s[0],
1936 to_s[0],
1937 maxcount);
1938 } else {
1939 return replace_substring_in_place(
1940 self, from_s, from_len, to_s, to_len, maxcount);
1941 }
1942 }
1943
1944 /* Otherwise use the more generic algorithms */
1945 if (from_len == 1) {
1946 return replace_single_character(self, from_s[0],
1947 to_s, to_len, maxcount);
1948 } else {
1949 /* len('from')>=2, len('to')>=1 */
1950 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
1951 }
1952}
1953
1954PyDoc_STRVAR(replace__doc__,
1955"B.replace (old, new[, count]) -> bytes\n\
1956\n\
1957Return a copy of bytes B with all occurrences of subsection\n\
1958old replaced by new. If the optional argument count is\n\
1959given, only the first count occurrences are replaced.");
1960
1961static PyObject *
1962bytes_replace(PyBytesObject *self, PyObject *args)
1963{
1964 Py_ssize_t count = -1;
1965 PyObject *from, *to;
1966 const char *from_s, *to_s;
1967 Py_ssize_t from_len, to_len;
1968
1969 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
1970 return NULL;
1971
1972 if (PyBytes_Check(from)) {
1973 from_s = PyBytes_AS_STRING(from);
1974 from_len = PyBytes_GET_SIZE(from);
1975 }
1976 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
1977 return NULL;
1978
1979 if (PyBytes_Check(to)) {
1980 to_s = PyBytes_AS_STRING(to);
1981 to_len = PyBytes_GET_SIZE(to);
1982 }
1983 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
1984 return NULL;
1985
1986 return (PyObject *)replace((PyBytesObject *) self,
1987 from_s, from_len,
1988 to_s, to_len, count);
1989}
1990
1991
1992/* Overallocate the initial list to reduce the number of reallocs for small
1993 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1994 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1995 text (roughly 11 words per line) and field delimited data (usually 1-10
1996 fields). For large strings the split algorithms are bandwidth limited
1997 so increasing the preallocation likely will not improve things.*/
1998
1999#define MAX_PREALLOC 12
2000
2001/* 5 splits gives 6 elements */
2002#define PREALLOC_SIZE(maxsplit) \
2003 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2004
2005#define SPLIT_APPEND(data, left, right) \
2006 str = PyBytes_FromStringAndSize((data) + (left), \
2007 (right) - (left)); \
2008 if (str == NULL) \
2009 goto onError; \
2010 if (PyList_Append(list, str)) { \
2011 Py_DECREF(str); \
2012 goto onError; \
2013 } \
2014 else \
2015 Py_DECREF(str);
2016
2017#define SPLIT_ADD(data, left, right) { \
2018 str = PyBytes_FromStringAndSize((data) + (left), \
2019 (right) - (left)); \
2020 if (str == NULL) \
2021 goto onError; \
2022 if (count < MAX_PREALLOC) { \
2023 PyList_SET_ITEM(list, count, str); \
2024 } else { \
2025 if (PyList_Append(list, str)) { \
2026 Py_DECREF(str); \
2027 goto onError; \
2028 } \
2029 else \
2030 Py_DECREF(str); \
2031 } \
2032 count++; }
2033
2034/* Always force the list to the expected size. */
2035#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
2036
2037
2038Py_LOCAL_INLINE(PyObject *)
2039split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2040{
2041 register Py_ssize_t i, j, count=0;
2042 PyObject *str;
2043 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2044
2045 if (list == NULL)
2046 return NULL;
2047
2048 i = j = 0;
2049 while ((j < len) && (maxcount-- > 0)) {
2050 for(; j<len; j++) {
2051 /* I found that using memchr makes no difference */
2052 if (s[j] == ch) {
2053 SPLIT_ADD(s, i, j);
2054 i = j = j + 1;
2055 break;
2056 }
2057 }
2058 }
2059 if (i <= len) {
2060 SPLIT_ADD(s, i, len);
2061 }
2062 FIX_PREALLOC_SIZE(list);
2063 return list;
2064
2065 onError:
2066 Py_DECREF(list);
2067 return NULL;
2068}
2069
2070PyDoc_STRVAR(split__doc__,
2071"B.split(sep [,maxsplit]) -> list of bytes\n\
2072\n\
2073Return a list of the bytes in the string B, using sep as the\n\
2074delimiter. If maxsplit is given, at most maxsplit\n\
2075splits are done.");
2076
2077static PyObject *
2078bytes_split(PyBytesObject *self, PyObject *args)
2079{
2080 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2081 Py_ssize_t maxsplit = -1, count=0;
2082 const char *s = PyBytes_AS_STRING(self), *sub;
2083 PyObject *list, *str, *subobj;
2084#ifdef USE_FAST
2085 Py_ssize_t pos;
2086#endif
2087
2088 if (!PyArg_ParseTuple(args, "O|n:split", &subobj, &maxsplit))
2089 return NULL;
2090 if (maxsplit < 0)
2091 maxsplit = PY_SSIZE_T_MAX;
2092 if (PyBytes_Check(subobj)) {
2093 sub = PyBytes_AS_STRING(subobj);
2094 n = PyBytes_GET_SIZE(subobj);
2095 }
2096 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2097 return NULL;
2098
2099 if (n == 0) {
2100 PyErr_SetString(PyExc_ValueError, "empty separator");
2101 return NULL;
2102 }
2103 else if (n == 1)
2104 return split_char(s, len, sub[0], maxsplit);
2105
2106 list = PyList_New(PREALLOC_SIZE(maxsplit));
2107 if (list == NULL)
2108 return NULL;
2109
2110#ifdef USE_FAST
2111 i = j = 0;
2112 while (maxsplit-- > 0) {
2113 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2114 if (pos < 0)
2115 break;
2116 j = i+pos;
2117 SPLIT_ADD(s, i, j);
2118 i = j + n;
2119 }
2120#else
2121 i = j = 0;
2122 while ((j+n <= len) && (maxsplit-- > 0)) {
2123 for (; j+n <= len; j++) {
2124 if (Py_STRING_MATCH(s, j, sub, n)) {
2125 SPLIT_ADD(s, i, j);
2126 i = j = j + n;
2127 break;
2128 }
2129 }
2130 }
2131#endif
2132 SPLIT_ADD(s, i, len);
2133 FIX_PREALLOC_SIZE(list);
2134 return list;
2135
2136 onError:
2137 Py_DECREF(list);
2138 return NULL;
2139}
2140
2141PyDoc_STRVAR(partition__doc__,
2142"B.partition(sep) -> (head, sep, tail)\n\
2143\n\
2144Searches for the separator sep in B, and returns the part before it,\n\
2145the separator itself, and the part after it. If the separator is not\n\
2146found, returns B and two empty bytes.");
2147
2148static PyObject *
2149bytes_partition(PyBytesObject *self, PyObject *sep_obj)
2150{
2151 PyObject *bytesep, *result;
2152
2153 bytesep = PyBytes_FromObject(sep_obj);
2154 if (! bytesep)
2155 return NULL;
2156
2157 result = stringlib_partition(
2158 (PyObject*) self,
2159 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002160 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002161 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2162 );
2163
2164 Py_DECREF(bytesep);
2165 return result;
2166}
2167
2168PyDoc_STRVAR(rpartition__doc__,
2169"B.rpartition(sep) -> (tail, sep, head)\n\
2170\n\
2171Searches for the separator sep in B, starting at the end of B, and returns\n\
2172the part before it, the separator itself, and the part after it. If the\n\
2173separator is not found, returns two empty bytes and B.");
2174
2175static PyObject *
2176bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
2177{
2178 PyObject *bytesep, *result;
2179
2180 bytesep = PyBytes_FromObject(sep_obj);
2181 if (! bytesep)
2182 return NULL;
2183
2184 result = stringlib_rpartition(
2185 (PyObject*) self,
2186 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002187 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002188 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2189 );
2190
2191 Py_DECREF(bytesep);
2192 return result;
2193}
2194
2195Py_LOCAL_INLINE(PyObject *)
2196rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2197{
2198 register Py_ssize_t i, j, count=0;
2199 PyObject *str;
2200 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2201
2202 if (list == NULL)
2203 return NULL;
2204
2205 i = j = len - 1;
2206 while ((i >= 0) && (maxcount-- > 0)) {
2207 for (; i >= 0; i--) {
2208 if (s[i] == ch) {
2209 SPLIT_ADD(s, i + 1, j + 1);
2210 j = i = i - 1;
2211 break;
2212 }
2213 }
2214 }
2215 if (j >= -1) {
2216 SPLIT_ADD(s, 0, j + 1);
2217 }
2218 FIX_PREALLOC_SIZE(list);
2219 if (PyList_Reverse(list) < 0)
2220 goto onError;
2221
2222 return list;
2223
2224 onError:
2225 Py_DECREF(list);
2226 return NULL;
2227}
2228
2229PyDoc_STRVAR(rsplit__doc__,
2230"B.rsplit(sep [,maxsplit]) -> list of bytes\n\
2231\n\
2232Return a list of the sections in the byte B, using sep as the\n\
2233delimiter, starting at the end of the bytes and working\n\
2234to the front. If maxsplit is given, at most maxsplit splits are\n\
2235done.");
2236
2237static PyObject *
2238bytes_rsplit(PyBytesObject *self, PyObject *args)
2239{
2240 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2241 Py_ssize_t maxsplit = -1, count=0;
2242 const char *s = PyBytes_AS_STRING(self), *sub;
2243 PyObject *list, *str, *subobj;
2244
2245 if (!PyArg_ParseTuple(args, "O|n:rsplit", &subobj, &maxsplit))
2246 return NULL;
2247 if (maxsplit < 0)
2248 maxsplit = PY_SSIZE_T_MAX;
2249 if (PyBytes_Check(subobj)) {
2250 sub = PyBytes_AS_STRING(subobj);
2251 n = PyBytes_GET_SIZE(subobj);
2252 }
2253 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2254 return NULL;
2255
2256 if (n == 0) {
2257 PyErr_SetString(PyExc_ValueError, "empty separator");
2258 return NULL;
2259 }
2260 else if (n == 1)
2261 return rsplit_char(s, len, sub[0], maxsplit);
2262
2263 list = PyList_New(PREALLOC_SIZE(maxsplit));
2264 if (list == NULL)
2265 return NULL;
2266
2267 j = len;
2268 i = j - n;
2269
2270 while ( (i >= 0) && (maxsplit-- > 0) ) {
2271 for (; i>=0; i--) {
2272 if (Py_STRING_MATCH(s, i, sub, n)) {
2273 SPLIT_ADD(s, i + n, j);
2274 j = i;
2275 i -= n;
2276 break;
2277 }
2278 }
2279 }
2280 SPLIT_ADD(s, 0, j);
2281 FIX_PREALLOC_SIZE(list);
2282 if (PyList_Reverse(list) < 0)
2283 goto onError;
2284 return list;
2285
2286onError:
2287 Py_DECREF(list);
2288 return NULL;
2289}
2290
2291PyDoc_STRVAR(extend__doc__,
2292"B.extend(iterable int) -> None\n\
2293\n\
2294Append all the elements from the iterator or sequence to the\n\
2295end of the bytes.");
2296static PyObject *
2297bytes_extend(PyBytesObject *self, PyObject *arg)
2298{
2299 if (bytes_setslice(self, self->ob_size, self->ob_size, arg) == -1)
2300 return NULL;
2301 Py_RETURN_NONE;
2302}
2303
2304
2305PyDoc_STRVAR(reverse__doc__,
2306"B.reverse() -> None\n\
2307\n\
2308Reverse the order of the values in bytes in place.");
2309static PyObject *
2310bytes_reverse(PyBytesObject *self, PyObject *unused)
2311{
2312 char swap, *head, *tail;
2313 Py_ssize_t i, j, n = self->ob_size;
2314
2315 j = n / 2;
2316 head = self->ob_bytes;
2317 tail = head + n - 1;
2318 for (i = 0; i < j; i++) {
2319 swap = *head;
2320 *head++ = *tail;
2321 *tail-- = swap;
2322 }
2323
2324 Py_RETURN_NONE;
2325}
2326
2327PyDoc_STRVAR(insert__doc__,
2328"B.insert(index, int) -> None\n\
2329\n\
2330Insert a single item into the bytes before the given index.");
2331static PyObject *
2332bytes_insert(PyBytesObject *self, PyObject *args)
2333{
2334 int value;
2335 Py_ssize_t where, n = self->ob_size;
2336
2337 if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
2338 return NULL;
2339
2340 if (n == PY_SSIZE_T_MAX) {
2341 PyErr_SetString(PyExc_OverflowError,
2342 "cannot add more objects to bytes");
2343 return NULL;
2344 }
2345 if (value < 0 || value >= 256) {
2346 PyErr_SetString(PyExc_ValueError,
2347 "byte must be in range(0, 256)");
2348 return NULL;
2349 }
2350 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2351 return NULL;
2352
2353 if (where < 0) {
2354 where += n;
2355 if (where < 0)
2356 where = 0;
2357 }
2358 if (where > n)
2359 where = n;
Guido van Rossum4fc8ae42007-02-27 20:57:45 +00002360 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
Neal Norwitz6968b052007-02-27 19:02:19 +00002361 self->ob_bytes[where] = value;
2362
2363 Py_RETURN_NONE;
2364}
2365
2366PyDoc_STRVAR(append__doc__,
2367"B.append(int) -> None\n\
2368\n\
2369Append a single item to the end of the bytes.");
2370static PyObject *
2371bytes_append(PyBytesObject *self, PyObject *arg)
2372{
2373 int value;
2374 Py_ssize_t n = self->ob_size;
2375
2376 if (! _getbytevalue(arg, &value))
2377 return NULL;
2378 if (n == PY_SSIZE_T_MAX) {
2379 PyErr_SetString(PyExc_OverflowError,
2380 "cannot add more objects to bytes");
2381 return NULL;
2382 }
2383 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2384 return NULL;
2385
2386 self->ob_bytes[n] = value;
2387
2388 Py_RETURN_NONE;
2389}
2390
2391PyDoc_STRVAR(pop__doc__,
2392"B.pop([index]) -> int\n\
2393\n\
2394Remove and return a single item from the bytes. If no index\n\
2395argument is give, will pop the last value.");
2396static PyObject *
2397bytes_pop(PyBytesObject *self, PyObject *args)
2398{
2399 int value;
2400 Py_ssize_t where = -1, n = self->ob_size;
2401
2402 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2403 return NULL;
2404
2405 if (n == 0) {
2406 PyErr_SetString(PyExc_OverflowError,
2407 "cannot pop an empty bytes");
2408 return NULL;
2409 }
2410 if (where < 0)
2411 where += self->ob_size;
2412 if (where < 0 || where >= self->ob_size) {
2413 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2414 return NULL;
2415 }
2416
2417 value = self->ob_bytes[where];
2418 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2419 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2420 return NULL;
2421
2422 return PyInt_FromLong(value);
2423}
2424
2425PyDoc_STRVAR(remove__doc__,
2426"B.remove(int) -> None\n\
2427\n\
2428Remove the first occurance of a value in bytes");
2429static PyObject *
2430bytes_remove(PyBytesObject *self, PyObject *arg)
2431{
2432 int value;
2433 Py_ssize_t where, n = self->ob_size;
2434
2435 if (! _getbytevalue(arg, &value))
2436 return NULL;
2437
2438 for (where = 0; where < n; where++) {
2439 if (self->ob_bytes[where] == value)
2440 break;
2441 }
2442 if (where == n) {
2443 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2444 return NULL;
2445 }
2446
2447 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2448 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2449 return NULL;
2450
2451 Py_RETURN_NONE;
2452}
2453
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002454/* XXX These two helpers could be optimized if argsize == 1 */
2455
2456Py_ssize_t
2457lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2458 void *argptr, Py_ssize_t argsize)
2459{
2460 Py_ssize_t i = 0;
2461 while (i < mysize && memchr(argptr, myptr[i], argsize))
2462 i++;
2463 return i;
2464}
2465
2466Py_ssize_t
2467rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2468 void *argptr, Py_ssize_t argsize)
2469{
2470 Py_ssize_t i = mysize - 1;
2471 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2472 i--;
2473 return i + 1;
2474}
2475
2476PyDoc_STRVAR(strip__doc__,
2477"B.strip(bytes) -> bytes\n\
2478\n\
2479Strip leading and trailing bytes contained in the argument.");
2480static PyObject *
2481bytes_strip(PyBytesObject *self, PyObject *arg)
2482{
2483 Py_ssize_t left, right, mysize, argsize;
2484 void *myptr, *argptr;
2485 if (arg == NULL || !PyBytes_Check(arg)) {
2486 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2487 return NULL;
2488 }
2489 myptr = self->ob_bytes;
2490 mysize = self->ob_size;
2491 argptr = ((PyBytesObject *)arg)->ob_bytes;
2492 argsize = ((PyBytesObject *)arg)->ob_size;
2493 left = lstrip_helper(myptr, mysize, argptr, argsize);
2494 right = rstrip_helper(myptr, mysize, argptr, argsize);
2495 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2496}
2497
2498PyDoc_STRVAR(lstrip__doc__,
2499"B.lstrip(bytes) -> bytes\n\
2500\n\
2501Strip leading bytes contained in the argument.");
2502static PyObject *
2503bytes_lstrip(PyBytesObject *self, PyObject *arg)
2504{
2505 Py_ssize_t left, right, mysize, argsize;
2506 void *myptr, *argptr;
2507 if (arg == NULL || !PyBytes_Check(arg)) {
2508 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2509 return NULL;
2510 }
2511 myptr = self->ob_bytes;
2512 mysize = self->ob_size;
2513 argptr = ((PyBytesObject *)arg)->ob_bytes;
2514 argsize = ((PyBytesObject *)arg)->ob_size;
2515 left = lstrip_helper(myptr, mysize, argptr, argsize);
2516 right = mysize;
2517 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2518}
2519
2520PyDoc_STRVAR(rstrip__doc__,
2521"B.rstrip(bytes) -> bytes\n\
2522\n\
2523Strip trailing bytes contained in the argument.");
2524static PyObject *
2525bytes_rstrip(PyBytesObject *self, PyObject *arg)
2526{
2527 Py_ssize_t left, right, mysize, argsize;
2528 void *myptr, *argptr;
2529 if (arg == NULL || !PyBytes_Check(arg)) {
2530 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2531 return NULL;
2532 }
2533 myptr = self->ob_bytes;
2534 mysize = self->ob_size;
2535 argptr = ((PyBytesObject *)arg)->ob_bytes;
2536 argsize = ((PyBytesObject *)arg)->ob_size;
2537 left = 0;
2538 right = rstrip_helper(myptr, mysize, argptr, argsize);
2539 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2540}
Neal Norwitz6968b052007-02-27 19:02:19 +00002541
Guido van Rossumd624f182006-04-24 13:47:05 +00002542PyDoc_STRVAR(decode_doc,
2543"B.decode([encoding[,errors]]) -> unicode obect.\n\
2544\n\
2545Decodes B using the codec registered for encoding. encoding defaults\n\
2546to the default encoding. errors may be given to set a different error\n\
2547handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2548a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2549as well as any other name registerd with codecs.register_error that is\n\
2550able to handle UnicodeDecodeErrors.");
2551
2552static PyObject *
2553bytes_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002554{
Guido van Rossumd624f182006-04-24 13:47:05 +00002555 const char *encoding = NULL;
2556 const char *errors = NULL;
2557
2558 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2559 return NULL;
2560 if (encoding == NULL)
2561 encoding = PyUnicode_GetDefaultEncoding();
2562 return PyCodec_Decode(self, encoding, errors);
2563}
2564
Guido van Rossuma0867f72006-05-05 04:34:18 +00002565PyDoc_STRVAR(alloc_doc,
2566"B.__alloc__() -> int\n\
2567\n\
2568Returns the number of bytes actually allocated.");
2569
2570static PyObject *
2571bytes_alloc(PyBytesObject *self)
2572{
2573 return PyInt_FromSsize_t(self->ob_alloc);
2574}
2575
Guido van Rossum20188312006-05-05 15:15:40 +00002576PyDoc_STRVAR(join_doc,
2577"bytes.join(iterable_of_bytes) -> bytes\n\
2578\n\
2579Concatenates any number of bytes objects. Example:\n\
2580bytes.join([bytes('ab'), bytes('pq'), bytes('rs')]) -> bytes('abpqrs').");
2581
2582static PyObject *
2583bytes_join(PyObject *cls, PyObject *it)
2584{
2585 PyObject *seq;
2586 Py_ssize_t i;
2587 Py_ssize_t n;
2588 PyObject **items;
2589 Py_ssize_t totalsize = 0;
2590 PyObject *result;
2591 char *dest;
2592
2593 seq = PySequence_Fast(it, "can only join an iterable");
2594 if (seq == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002595 return NULL;
Guido van Rossum20188312006-05-05 15:15:40 +00002596 n = PySequence_Fast_GET_SIZE(seq);
2597 items = PySequence_Fast_ITEMS(seq);
2598
2599 /* Compute the total size, and check that they are all bytes */
2600 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002601 PyObject *obj = items[i];
2602 if (!PyBytes_Check(obj)) {
2603 PyErr_Format(PyExc_TypeError,
2604 "can only join an iterable of bytes "
2605 "(item %ld has type '%.100s')",
Guido van Rossum3cf5b1e2006-07-27 21:53:35 +00002606 /* XXX %ld isn't right on Win64 */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002607 (long)i, obj->ob_type->tp_name);
2608 goto error;
2609 }
2610 totalsize += PyBytes_GET_SIZE(obj);
2611 if (totalsize < 0) {
2612 PyErr_NoMemory();
2613 goto error;
2614 }
Guido van Rossum20188312006-05-05 15:15:40 +00002615 }
2616
2617 /* Allocate the result, and copy the bytes */
2618 result = PyBytes_FromStringAndSize(NULL, totalsize);
2619 if (result == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002620 goto error;
Guido van Rossum20188312006-05-05 15:15:40 +00002621 dest = PyBytes_AS_STRING(result);
2622 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002623 PyObject *obj = items[i];
2624 Py_ssize_t size = PyBytes_GET_SIZE(obj);
2625 memcpy(dest, PyBytes_AS_STRING(obj), size);
2626 dest += size;
Guido van Rossum20188312006-05-05 15:15:40 +00002627 }
2628
2629 /* Done */
2630 Py_DECREF(seq);
2631 return result;
2632
2633 /* Error handling */
2634 error:
2635 Py_DECREF(seq);
2636 return NULL;
2637}
2638
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002639PyDoc_STRVAR(fromhex_doc,
2640"bytes.fromhex(string) -> bytes\n\
2641\n\
2642Create a bytes object from a string of hexadecimal numbers.\n\
2643Spaces between two numbers are accepted. Example:\n\
2644bytes.fromhex('10 2030') -> bytes([0x10, 0x20, 0x30]).");
2645
2646static int
2647hex_digit_to_int(int c)
2648{
Georg Brandlb3f568f2007-02-27 08:49:18 +00002649 if (isdigit(c))
2650 return c - '0';
2651 else {
2652 if (isupper(c))
2653 c = tolower(c);
2654 if (c >= 'a' && c <= 'f')
2655 return c - 'a' + 10;
2656 }
2657 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002658}
2659
2660static PyObject *
2661bytes_fromhex(PyObject *cls, PyObject *args)
2662{
2663 PyObject *newbytes;
2664 char *hex, *buf;
2665 Py_ssize_t len, byteslen, i, j;
2666 int top, bot;
2667
2668 if (!PyArg_ParseTuple(args, "s#:fromhex", &hex, &len))
2669 return NULL;
2670
2671 byteslen = len / 2; /* max length if there are no spaces */
2672
2673 newbytes = PyBytes_FromStringAndSize(NULL, byteslen);
2674 if (!newbytes)
2675 return NULL;
2676 buf = PyBytes_AS_STRING(newbytes);
2677
2678 for (i = j = 0; ; i += 2) {
2679 /* skip over spaces in the input */
2680 while (Py_CHARMASK(hex[i]) == ' ')
2681 i++;
2682 if (i >= len)
2683 break;
2684 top = hex_digit_to_int(Py_CHARMASK(hex[i]));
2685 bot = hex_digit_to_int(Py_CHARMASK(hex[i+1]));
2686 if (top == -1 || bot == -1) {
2687 PyErr_Format(PyExc_ValueError,
2688 "non-hexadecimal number string '%c%c' found in "
2689 "fromhex() arg at position %zd",
2690 hex[i], hex[i+1], i);
2691 goto error;
2692 }
2693 buf[j++] = (top << 4) + bot;
2694 }
2695 if (PyBytes_Resize(newbytes, j) < 0)
2696 goto error;
2697 return newbytes;
2698
2699 error:
2700 Py_DECREF(newbytes);
2701 return NULL;
2702}
2703
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002704PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2705
2706static PyObject *
2707bytes_reduce(PyBytesObject *self)
2708{
2709 return Py_BuildValue("(O(s#))",
2710 self->ob_type,
2711 self->ob_bytes == NULL ? "" : self->ob_bytes,
2712 self->ob_size);
2713}
2714
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002715static PySequenceMethods bytes_as_sequence = {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002716 (lenfunc)bytes_length, /* sq_length */
2717 (binaryfunc)bytes_concat, /* sq_concat */
2718 (ssizeargfunc)bytes_repeat, /* sq_repeat */
2719 (ssizeargfunc)bytes_getitem, /* sq_item */
2720 0, /* sq_slice */
2721 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
2722 0, /* sq_ass_slice */
Guido van Rossumd624f182006-04-24 13:47:05 +00002723 (objobjproc)bytes_contains, /* sq_contains */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002724 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
2725 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002726};
2727
2728static PyMappingMethods bytes_as_mapping = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002729 (lenfunc)bytes_length,
Thomas Wouters376446d2006-12-19 08:30:14 +00002730 (binaryfunc)bytes_subscript,
2731 (objobjargproc)bytes_ass_subscript,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002732};
2733
2734static PyBufferProcs bytes_as_buffer = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002735 (readbufferproc)bytes_getbuffer,
2736 (writebufferproc)bytes_getbuffer,
2737 (segcountproc)bytes_getsegcount,
2738 /* XXX Bytes are not characters! But we need to implement
2739 bf_getcharbuffer() so we can be used as 't#' argument to codecs. */
2740 (charbufferproc)bytes_getbuffer,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002741};
2742
2743static PyMethodDef
2744bytes_methods[] = {
Neal Norwitz6968b052007-02-27 19:02:19 +00002745 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2746 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2747 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2748 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2749 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2750 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
2751 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2752 startswith__doc__},
2753 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2754 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, translate__doc__},
2755 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2756 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
2757 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2758 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2759 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
2760 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
2761 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
2762 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
2763 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
2764 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002765 {"strip", (PyCFunction)bytes_strip, METH_O, strip__doc__},
2766 {"lstrip", (PyCFunction)bytes_lstrip, METH_O, lstrip__doc__},
2767 {"rstrip", (PyCFunction)bytes_rstrip, METH_O, rstrip__doc__},
Guido van Rossumd624f182006-04-24 13:47:05 +00002768 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002769 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002770 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2771 fromhex_doc},
Guido van Rossum20188312006-05-05 15:15:40 +00002772 {"join", (PyCFunction)bytes_join, METH_O|METH_CLASS, join_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002773 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002774 {NULL}
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002775};
2776
2777PyDoc_STRVAR(bytes_doc,
2778"bytes([iterable]) -> new array of bytes.\n\
2779\n\
2780If an argument is given it must be an iterable yielding ints in range(256).");
2781
2782PyTypeObject PyBytes_Type = {
2783 PyObject_HEAD_INIT(&PyType_Type)
2784 0,
2785 "bytes",
2786 sizeof(PyBytesObject),
2787 0,
Guido van Rossumd624f182006-04-24 13:47:05 +00002788 (destructor)bytes_dealloc, /* tp_dealloc */
2789 0, /* tp_print */
2790 0, /* tp_getattr */
2791 0, /* tp_setattr */
2792 0, /* tp_compare */
2793 (reprfunc)bytes_repr, /* tp_repr */
2794 0, /* tp_as_number */
2795 &bytes_as_sequence, /* tp_as_sequence */
2796 &bytes_as_mapping, /* tp_as_mapping */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002797 0, /* tp_hash */
Guido van Rossumd624f182006-04-24 13:47:05 +00002798 0, /* tp_call */
2799 (reprfunc)bytes_str, /* tp_str */
2800 PyObject_GenericGetAttr, /* tp_getattro */
2801 0, /* tp_setattro */
2802 &bytes_as_buffer, /* tp_as_buffer */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002803 /* bytes is 'final' or 'sealed' */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002804 Py_TPFLAGS_DEFAULT, /* tp_flags */
Guido van Rossumd624f182006-04-24 13:47:05 +00002805 bytes_doc, /* tp_doc */
2806 0, /* tp_traverse */
2807 0, /* tp_clear */
2808 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2809 0, /* tp_weaklistoffset */
2810 0, /* tp_iter */
2811 0, /* tp_iternext */
2812 bytes_methods, /* tp_methods */
2813 0, /* tp_members */
2814 0, /* tp_getset */
2815 0, /* tp_base */
2816 0, /* tp_dict */
2817 0, /* tp_descr_get */
2818 0, /* tp_descr_set */
2819 0, /* tp_dictoffset */
2820 (initproc)bytes_init, /* tp_init */
2821 PyType_GenericAlloc, /* tp_alloc */
2822 PyType_GenericNew, /* tp_new */
2823 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002824};