blob: cb830e3cdb85eeb4f19a71ff4a81b84d567d1edf [file] [log] [blame]
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001/* Bytes object implementation */
2
3/* XXX TO DO: optimizations */
4
5#define PY_SSIZE_T_CLEAN
6#include "Python.h"
Guido van Rossuma0867f72006-05-05 04:34:18 +00007#include "structmember.h"
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00008
Neal Norwitz6968b052007-02-27 19:02:19 +00009/* The nullbytes are used by the stringlib during partition.
10 * If partition is removed from bytes, nullbytes and its helper
11 * Init/Fini should also be removed.
12 */
13static PyBytesObject *nullbytes = NULL;
14
15void
16PyBytes_Fini(void)
17{
18 Py_CLEAR(nullbytes);
19}
20
21int
22PyBytes_Init(void)
23{
24 nullbytes = PyObject_New(PyBytesObject, &PyBytes_Type);
25 if (nullbytes == NULL)
26 return 0;
27 nullbytes->ob_bytes = NULL;
28 nullbytes->ob_size = nullbytes->ob_alloc = 0;
29 return 1;
30}
31
32/* end nullbytes support */
33
Guido van Rossumad7d8d12007-04-13 01:39:34 +000034/* Helpers */
35
36static int
37_getbytevalue(PyObject* arg, int *value)
Neal Norwitz6968b052007-02-27 19:02:19 +000038{
39 PyObject *intarg = PyNumber_Int(arg);
40 if (! intarg)
41 return 0;
42 *value = PyInt_AsLong(intarg);
43 Py_DECREF(intarg);
44 if (*value < 0 || *value >= 256) {
45 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
46 return 0;
47 }
48 return 1;
49}
50
Guido van Rossumad7d8d12007-04-13 01:39:34 +000051Py_ssize_t
52_getbuffer(PyObject *obj, void **ptr)
53{
54 PyBufferProcs *buffer = obj->ob_type->tp_as_buffer;
55
56 if (buffer == NULL ||
57 PyUnicode_Check(obj) ||
58 buffer->bf_getreadbuffer == NULL ||
59 buffer->bf_getsegcount == NULL ||
60 buffer->bf_getsegcount(obj, NULL) != 1)
61 {
62 *ptr = NULL;
63 return -1;
64 }
65
66 return buffer->bf_getreadbuffer(obj, 0, ptr);
67}
68
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000069/* Direct API functions */
70
71PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +000072PyBytes_FromObject(PyObject *input)
73{
74 return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
75 input, NULL);
76}
77
78PyObject *
79PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000080{
81 PyBytesObject *new;
Guido van Rossumf15a29f2007-05-04 00:41:39 +000082 int alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000083
Guido van Rossumd624f182006-04-24 13:47:05 +000084 assert(size >= 0);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000085
86 new = PyObject_New(PyBytesObject, &PyBytes_Type);
87 if (new == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +000088 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000089
Guido van Rossumf15a29f2007-05-04 00:41:39 +000090 if (size == 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +000091 new->ob_bytes = NULL;
Guido van Rossumf15a29f2007-05-04 00:41:39 +000092 alloc = 0;
93 }
Guido van Rossumd624f182006-04-24 13:47:05 +000094 else {
Guido van Rossumf15a29f2007-05-04 00:41:39 +000095 alloc = size + 1;
96 new->ob_bytes = PyMem_Malloc(alloc);
Guido van Rossumd624f182006-04-24 13:47:05 +000097 if (new->ob_bytes == NULL) {
98 Py_DECREF(new);
99 return NULL;
100 }
101 if (bytes != NULL)
102 memcpy(new->ob_bytes, bytes, size);
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000103 new->ob_bytes[size] = '\0'; /* Trailing null byte */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000104 }
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000105 new->ob_size = size;
106 new->ob_alloc = alloc;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000107
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000108 return (PyObject *)new;
109}
110
111Py_ssize_t
112PyBytes_Size(PyObject *self)
113{
114 assert(self != NULL);
115 assert(PyBytes_Check(self));
116
Guido van Rossum20188312006-05-05 15:15:40 +0000117 return PyBytes_GET_SIZE(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000118}
119
120char *
121PyBytes_AsString(PyObject *self)
122{
123 assert(self != NULL);
124 assert(PyBytes_Check(self));
125
Guido van Rossum20188312006-05-05 15:15:40 +0000126 return PyBytes_AS_STRING(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000127}
128
129int
130PyBytes_Resize(PyObject *self, Py_ssize_t size)
131{
132 void *sval;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000133 Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000134
135 assert(self != NULL);
136 assert(PyBytes_Check(self));
137 assert(size >= 0);
138
Guido van Rossuma0867f72006-05-05 04:34:18 +0000139 if (size < alloc / 2) {
140 /* Major downsize; resize down to exact size */
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000141 alloc = size + 1;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000142 }
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000143 else if (size < alloc) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000144 /* Within allocated size; quick exit */
145 ((PyBytesObject *)self)->ob_size = size;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000146 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
Guido van Rossuma0867f72006-05-05 04:34:18 +0000147 return 0;
148 }
149 else if (size <= alloc * 1.125) {
150 /* Moderate upsize; overallocate similar to list_resize() */
151 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
152 }
153 else {
154 /* Major upsize; resize up to exact size */
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000155 alloc = size + 1;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000156 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000157
158 sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000159 if (sval == NULL) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000160 PyErr_NoMemory();
161 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000162 }
163
Guido van Rossumd624f182006-04-24 13:47:05 +0000164 ((PyBytesObject *)self)->ob_bytes = sval;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000165 ((PyBytesObject *)self)->ob_size = size;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000166 ((PyBytesObject *)self)->ob_alloc = alloc;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000167 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
168
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000169 return 0;
170}
171
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000172PyObject *
173PyBytes_Concat(PyObject *a, PyObject *b)
174{
175 Py_ssize_t asize, bsize, size;
176 void *aptr, *bptr;
177 PyBytesObject *result;
178
179 asize = _getbuffer(a, &aptr);
180 bsize = _getbuffer(b, &bptr);
181 if (asize < 0 || bsize < 0) {
182 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
183 a->ob_type->tp_name, b->ob_type->tp_name);
184 return NULL;
185 }
186
187 size = asize + bsize;
188 if (size < 0)
189 return PyErr_NoMemory();
190
191 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
192 if (result != NULL) {
193 memcpy(result->ob_bytes, aptr, asize);
194 memcpy(result->ob_bytes + asize, bptr, bsize);
195 }
196 return (PyObject *)result;
197}
198
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000199/* Functions stuffed into the type object */
200
201static Py_ssize_t
202bytes_length(PyBytesObject *self)
203{
204 return self->ob_size;
205}
206
207static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000208bytes_concat(PyBytesObject *self, PyObject *other)
209{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000210 return PyBytes_Concat((PyObject *)self, other);
Guido van Rossumd624f182006-04-24 13:47:05 +0000211}
212
213static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000214bytes_iconcat(PyBytesObject *self, PyObject *other)
215{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000216 void *optr;
Guido van Rossum13e57212006-04-27 22:54:26 +0000217 Py_ssize_t osize;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000218 Py_ssize_t mysize;
Guido van Rossum13e57212006-04-27 22:54:26 +0000219 Py_ssize_t size;
220
Guido van Rossum4355a472007-05-04 05:00:04 +0000221 /* XXX What if other == self? */
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000222 osize = _getbuffer(other, &optr);
223 if (osize < 0) {
Guido van Rossum13e57212006-04-27 22:54:26 +0000224 PyErr_Format(PyExc_TypeError,
225 "can't concat bytes to %.100s", other->ob_type->tp_name);
226 return NULL;
227 }
228
229 mysize = self->ob_size;
Guido van Rossum13e57212006-04-27 22:54:26 +0000230 size = mysize + osize;
231 if (size < 0)
232 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000233 if (size < self->ob_alloc) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000234 self->ob_size = size;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000235 self->ob_bytes[self->ob_size] = '\0'; /* Trailing null byte */
236 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000237 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000238 return NULL;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000239 memcpy(self->ob_bytes + mysize, optr, osize);
Guido van Rossum13e57212006-04-27 22:54:26 +0000240 Py_INCREF(self);
241 return (PyObject *)self;
242}
243
244static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000245bytes_repeat(PyBytesObject *self, Py_ssize_t count)
246{
247 PyBytesObject *result;
248 Py_ssize_t mysize;
249 Py_ssize_t size;
250
251 if (count < 0)
252 count = 0;
253 mysize = self->ob_size;
254 size = mysize * count;
255 if (count != 0 && size / count != mysize)
256 return PyErr_NoMemory();
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000257 result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
Guido van Rossumd624f182006-04-24 13:47:05 +0000258 if (result != NULL && size != 0) {
259 if (mysize == 1)
260 memset(result->ob_bytes, self->ob_bytes[0], size);
261 else {
Guido van Rossum13e57212006-04-27 22:54:26 +0000262 Py_ssize_t i;
Guido van Rossumd624f182006-04-24 13:47:05 +0000263 for (i = 0; i < count; i++)
264 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
265 }
266 }
267 return (PyObject *)result;
268}
269
270static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000271bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
272{
273 Py_ssize_t mysize;
274 Py_ssize_t size;
275
276 if (count < 0)
277 count = 0;
278 mysize = self->ob_size;
279 size = mysize * count;
280 if (count != 0 && size / count != mysize)
281 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000282 if (size < self->ob_alloc) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000283 self->ob_size = size;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000284 self->ob_bytes[self->ob_size] = '\0'; /* Trailing null byte */
285 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000286 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000287 return NULL;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000288
Guido van Rossum13e57212006-04-27 22:54:26 +0000289 if (mysize == 1)
290 memset(self->ob_bytes, self->ob_bytes[0], size);
291 else {
292 Py_ssize_t i;
293 for (i = 1; i < count; i++)
294 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
295 }
296
297 Py_INCREF(self);
298 return (PyObject *)self;
299}
300
301static int
302bytes_substring(PyBytesObject *self, PyBytesObject *other)
303{
304 Py_ssize_t i;
305
306 if (other->ob_size == 1) {
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000307 return memchr(self->ob_bytes, other->ob_bytes[0],
Guido van Rossum13e57212006-04-27 22:54:26 +0000308 self->ob_size) != NULL;
309 }
310 if (other->ob_size == 0)
311 return 1; /* Edge case */
312 for (i = 0; i + other->ob_size <= self->ob_size; i++) {
313 /* XXX Yeah, yeah, lots of optimizations possible... */
314 if (memcmp(self->ob_bytes + i, other->ob_bytes, other->ob_size) == 0)
315 return 1;
316 }
317 return 0;
318}
319
320static int
321bytes_contains(PyBytesObject *self, PyObject *value)
322{
323 Py_ssize_t ival;
324
325 if (PyBytes_Check(value))
326 return bytes_substring(self, (PyBytesObject *)value);
327
Thomas Woutersd204a712006-08-22 13:41:17 +0000328 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossum13e57212006-04-27 22:54:26 +0000329 if (ival == -1 && PyErr_Occurred())
330 return -1;
Guido van Rossum13e57212006-04-27 22:54:26 +0000331 if (ival < 0 || ival >= 256) {
332 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
333 return -1;
334 }
335
336 return memchr(self->ob_bytes, ival, self->ob_size) != NULL;
337}
338
339static PyObject *
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000340bytes_getitem(PyBytesObject *self, Py_ssize_t i)
341{
342 if (i < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000343 i += self->ob_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000344 if (i < 0 || i >= self->ob_size) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000345 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
346 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000347 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000348 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
349}
350
351static PyObject *
Thomas Wouters376446d2006-12-19 08:30:14 +0000352bytes_subscript(PyBytesObject *self, PyObject *item)
Guido van Rossumd624f182006-04-24 13:47:05 +0000353{
Thomas Wouters376446d2006-12-19 08:30:14 +0000354 if (PyIndex_Check(item)) {
355 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000356
Thomas Wouters376446d2006-12-19 08:30:14 +0000357 if (i == -1 && PyErr_Occurred())
358 return NULL;
359
360 if (i < 0)
361 i += PyBytes_GET_SIZE(self);
362
363 if (i < 0 || i >= self->ob_size) {
364 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
365 return NULL;
366 }
367 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
368 }
369 else if (PySlice_Check(item)) {
370 Py_ssize_t start, stop, step, slicelength, cur, i;
371 if (PySlice_GetIndicesEx((PySliceObject *)item,
372 PyBytes_GET_SIZE(self),
373 &start, &stop, &step, &slicelength) < 0) {
374 return NULL;
375 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000376
Thomas Wouters376446d2006-12-19 08:30:14 +0000377 if (slicelength <= 0)
378 return PyBytes_FromStringAndSize("", 0);
379 else if (step == 1) {
380 return PyBytes_FromStringAndSize(self->ob_bytes + start,
381 slicelength);
382 }
383 else {
384 char *source_buf = PyBytes_AS_STRING(self);
385 char *result_buf = (char *)PyMem_Malloc(slicelength);
386 PyObject *result;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000387
Thomas Wouters376446d2006-12-19 08:30:14 +0000388 if (result_buf == NULL)
389 return PyErr_NoMemory();
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000390
Thomas Wouters376446d2006-12-19 08:30:14 +0000391 for (cur = start, i = 0; i < slicelength;
392 cur += step, i++) {
393 result_buf[i] = source_buf[cur];
394 }
395 result = PyBytes_FromStringAndSize(result_buf, slicelength);
396 PyMem_Free(result_buf);
397 return result;
398 }
399 }
400 else {
401 PyErr_SetString(PyExc_TypeError, "bytes indices must be integers");
402 return NULL;
403 }
404}
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000405
Guido van Rossumd624f182006-04-24 13:47:05 +0000406static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000407bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
Guido van Rossumd624f182006-04-24 13:47:05 +0000408 PyObject *values)
409{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000410 Py_ssize_t avail, needed;
411 void *bytes;
Guido van Rossumd624f182006-04-24 13:47:05 +0000412
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000413 if (values == (PyObject *)self) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000414 /* Make a copy an call this function recursively */
415 int err;
416 values = PyBytes_FromObject(values);
417 if (values == NULL)
418 return -1;
419 err = bytes_setslice(self, lo, hi, values);
420 Py_DECREF(values);
421 return err;
422 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000423 if (values == NULL) {
424 /* del b[lo:hi] */
425 bytes = NULL;
426 needed = 0;
427 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000428 else {
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000429 needed = _getbuffer(values, &bytes);
430 if (needed < 0) {
431 PyErr_Format(PyExc_TypeError,
432 "can't set bytes slice from %.100s",
433 values->ob_type->tp_name);
434 return -1;
435 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000436 }
437
438 if (lo < 0)
439 lo = 0;
Thomas Wouters9a6e62b2006-08-23 23:20:29 +0000440 if (hi < lo)
441 hi = lo;
Guido van Rossumd624f182006-04-24 13:47:05 +0000442 if (hi > self->ob_size)
443 hi = self->ob_size;
444
445 avail = hi - lo;
446 if (avail < 0)
447 lo = hi = avail = 0;
448
449 if (avail != needed) {
450 if (avail > needed) {
451 /*
452 0 lo hi old_size
453 | |<----avail----->|<-----tomove------>|
454 | |<-needed->|<-----tomove------>|
455 0 lo new_hi new_size
456 */
457 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
458 self->ob_size - hi);
459 }
Thomas Wouters376446d2006-12-19 08:30:14 +0000460 if (PyBytes_Resize((PyObject *)self,
Guido van Rossumd624f182006-04-24 13:47:05 +0000461 self->ob_size + needed - avail) < 0)
462 return -1;
463 if (avail < needed) {
464 /*
465 0 lo hi old_size
466 | |<-avail->|<-----tomove------>|
467 | |<----needed---->|<-----tomove------>|
468 0 lo new_hi new_size
469 */
470 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
471 self->ob_size - lo - needed);
472 }
473 }
474
475 if (needed > 0)
476 memcpy(self->ob_bytes + lo, bytes, needed);
477
478 return 0;
479}
480
481static int
482bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
483{
484 Py_ssize_t ival;
485
486 if (i < 0)
487 i += self->ob_size;
488
489 if (i < 0 || i >= self->ob_size) {
490 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
491 return -1;
492 }
493
494 if (value == NULL)
495 return bytes_setslice(self, i, i+1, NULL);
496
Thomas Woutersd204a712006-08-22 13:41:17 +0000497 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000498 if (ival == -1 && PyErr_Occurred())
499 return -1;
500
501 if (ival < 0 || ival >= 256) {
502 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
503 return -1;
504 }
505
506 self->ob_bytes[i] = ival;
507 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000508}
509
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000510static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000511bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
512{
513 Py_ssize_t start, stop, step, slicelen, needed;
514 char *bytes;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000515
Thomas Wouters376446d2006-12-19 08:30:14 +0000516 if (PyIndex_Check(item)) {
517 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
518
519 if (i == -1 && PyErr_Occurred())
520 return -1;
521
522 if (i < 0)
523 i += PyBytes_GET_SIZE(self);
524
525 if (i < 0 || i >= self->ob_size) {
526 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
527 return -1;
528 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000529
Thomas Wouters376446d2006-12-19 08:30:14 +0000530 if (values == NULL) {
531 /* Fall through to slice assignment */
532 start = i;
533 stop = i + 1;
534 step = 1;
535 slicelen = 1;
536 }
537 else {
538 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
539 if (ival == -1 && PyErr_Occurred())
540 return -1;
541 if (ival < 0 || ival >= 256) {
542 PyErr_SetString(PyExc_ValueError,
543 "byte must be in range(0, 256)");
544 return -1;
545 }
546 self->ob_bytes[i] = (char)ival;
547 return 0;
548 }
549 }
550 else if (PySlice_Check(item)) {
551 if (PySlice_GetIndicesEx((PySliceObject *)item,
552 PyBytes_GET_SIZE(self),
553 &start, &stop, &step, &slicelen) < 0) {
554 return -1;
555 }
556 }
557 else {
558 PyErr_SetString(PyExc_TypeError, "bytes indices must be integer");
559 return -1;
560 }
561
562 if (values == NULL) {
563 bytes = NULL;
564 needed = 0;
565 }
566 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
567 /* Make a copy an call this function recursively */
568 int err;
569 values = PyBytes_FromObject(values);
570 if (values == NULL)
571 return -1;
572 err = bytes_ass_subscript(self, item, values);
573 Py_DECREF(values);
574 return err;
575 }
576 else {
577 assert(PyBytes_Check(values));
578 bytes = ((PyBytesObject *)values)->ob_bytes;
579 needed = ((PyBytesObject *)values)->ob_size;
580 }
581 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
582 if ((step < 0 && start < stop) ||
583 (step > 0 && start > stop))
584 stop = start;
585 if (step == 1) {
586 if (slicelen != needed) {
587 if (slicelen > needed) {
588 /*
589 0 start stop old_size
590 | |<---slicelen--->|<-----tomove------>|
591 | |<-needed->|<-----tomove------>|
592 0 lo new_hi new_size
593 */
594 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
595 self->ob_size - stop);
596 }
597 if (PyBytes_Resize((PyObject *)self,
598 self->ob_size + needed - slicelen) < 0)
599 return -1;
600 if (slicelen < needed) {
601 /*
602 0 lo hi old_size
603 | |<-avail->|<-----tomove------>|
604 | |<----needed---->|<-----tomove------>|
605 0 lo new_hi new_size
606 */
607 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
608 self->ob_size - start - needed);
609 }
610 }
611
612 if (needed > 0)
613 memcpy(self->ob_bytes + start, bytes, needed);
614
615 return 0;
616 }
617 else {
618 if (needed == 0) {
619 /* Delete slice */
620 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000621
Thomas Wouters376446d2006-12-19 08:30:14 +0000622 if (step < 0) {
623 stop = start + 1;
624 start = stop + step * (slicelen - 1) - 1;
625 step = -step;
626 }
627 for (cur = start, i = 0;
628 i < slicelen; cur += step, i++) {
629 Py_ssize_t lim = step - 1;
630
631 if (cur + step >= PyBytes_GET_SIZE(self))
632 lim = PyBytes_GET_SIZE(self) - cur - 1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000633
Thomas Wouters376446d2006-12-19 08:30:14 +0000634 memmove(self->ob_bytes + cur - i,
635 self->ob_bytes + cur + 1, lim);
636 }
637 /* Move the tail of the bytes, in one chunk */
638 cur = start + slicelen*step;
639 if (cur < PyBytes_GET_SIZE(self)) {
640 memmove(self->ob_bytes + cur - slicelen,
641 self->ob_bytes + cur,
642 PyBytes_GET_SIZE(self) - cur);
643 }
644 if (PyBytes_Resize((PyObject *)self,
645 PyBytes_GET_SIZE(self) - slicelen) < 0)
646 return -1;
647
648 return 0;
649 }
650 else {
651 /* Assign slice */
652 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000653
Thomas Wouters376446d2006-12-19 08:30:14 +0000654 if (needed != slicelen) {
655 PyErr_Format(PyExc_ValueError,
656 "attempt to assign bytes of size %zd "
657 "to extended slice of size %zd",
658 needed, slicelen);
659 return -1;
660 }
661 for (cur = start, i = 0; i < slicelen; cur += step, i++)
662 self->ob_bytes[cur] = bytes[i];
663 return 0;
664 }
665 }
666}
667
668static int
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000669bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
670{
Guido van Rossumd624f182006-04-24 13:47:05 +0000671 static char *kwlist[] = {"source", "encoding", "errors", 0};
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000672 PyObject *arg = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +0000673 const char *encoding = NULL;
674 const char *errors = NULL;
675 Py_ssize_t count;
676 PyObject *it;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000677 PyObject *(*iternext)(PyObject *);
678
Guido van Rossuma0867f72006-05-05 04:34:18 +0000679 if (self->ob_size != 0) {
680 /* Empty previous contents (yes, do this first of all!) */
681 if (PyBytes_Resize((PyObject *)self, 0) < 0)
682 return -1;
683 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000684
Guido van Rossumd624f182006-04-24 13:47:05 +0000685 /* Parse arguments */
686 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
687 &arg, &encoding, &errors))
688 return -1;
689
690 /* Make a quick exit if no first argument */
691 if (arg == NULL) {
692 if (encoding != NULL || errors != NULL) {
693 PyErr_SetString(PyExc_TypeError,
694 "encoding or errors without sequence argument");
695 return -1;
696 }
697 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000698 }
699
Guido van Rossumd624f182006-04-24 13:47:05 +0000700 if (PyUnicode_Check(arg)) {
701 /* Encode via the codec registry */
Guido van Rossum4355a472007-05-04 05:00:04 +0000702 PyObject *encoded, *new;
Guido van Rossumd624f182006-04-24 13:47:05 +0000703 if (encoding == NULL)
704 encoding = PyUnicode_GetDefaultEncoding();
705 encoded = PyCodec_Encode(arg, encoding, errors);
706 if (encoded == NULL)
707 return -1;
Guido van Rossum4355a472007-05-04 05:00:04 +0000708 if (!PyBytes_Check(encoded) && !PyString_Check(encoded)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000709 PyErr_Format(PyExc_TypeError,
Guido van Rossum4355a472007-05-04 05:00:04 +0000710 "encoder did not return a str8 or bytes object (type=%.400s)",
Guido van Rossumd624f182006-04-24 13:47:05 +0000711 encoded->ob_type->tp_name);
712 Py_DECREF(encoded);
713 return -1;
714 }
Guido van Rossum4355a472007-05-04 05:00:04 +0000715 new = bytes_iconcat(self, encoded);
716 Py_DECREF(encoded);
717 if (new == NULL)
718 return -1;
719 Py_DECREF(new);
720 return 0;
Guido van Rossumd624f182006-04-24 13:47:05 +0000721 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000722
Guido van Rossumd624f182006-04-24 13:47:05 +0000723 /* If it's not unicode, there can't be encoding or errors */
724 if (encoding != NULL || errors != NULL) {
725 PyErr_SetString(PyExc_TypeError,
726 "encoding or errors without a string argument");
727 return -1;
728 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000729
Guido van Rossumd624f182006-04-24 13:47:05 +0000730 /* Is it an int? */
Thomas Woutersd204a712006-08-22 13:41:17 +0000731 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000732 if (count == -1 && PyErr_Occurred())
733 PyErr_Clear();
734 else {
735 if (count < 0) {
736 PyErr_SetString(PyExc_ValueError, "negative count");
737 return -1;
738 }
739 if (count > 0) {
740 if (PyBytes_Resize((PyObject *)self, count))
741 return -1;
742 memset(self->ob_bytes, 0, count);
743 }
744 return 0;
745 }
746
747 if (PyObject_CheckReadBuffer(arg)) {
748 const void *bytes;
749 Py_ssize_t size;
750 if (PyObject_AsReadBuffer(arg, &bytes, &size) < 0)
751 return -1;
752 if (PyBytes_Resize((PyObject *)self, size) < 0)
753 return -1;
754 memcpy(self->ob_bytes, bytes, size);
755 return 0;
756 }
757
758 /* XXX Optimize this if the arguments is a list, tuple */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000759
760 /* Get the iterator */
761 it = PyObject_GetIter(arg);
762 if (it == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000763 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000764 iternext = *it->ob_type->tp_iternext;
765
766 /* Run the iterator to exhaustion */
767 for (;;) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000768 PyObject *item;
769 Py_ssize_t value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000770
Guido van Rossumd624f182006-04-24 13:47:05 +0000771 /* Get the next item */
772 item = iternext(it);
773 if (item == NULL) {
774 if (PyErr_Occurred()) {
775 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
776 goto error;
777 PyErr_Clear();
778 }
779 break;
780 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000781
Guido van Rossumd624f182006-04-24 13:47:05 +0000782 /* Interpret it as an int (__index__) */
Thomas Woutersd204a712006-08-22 13:41:17 +0000783 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000784 Py_DECREF(item);
785 if (value == -1 && PyErr_Occurred())
786 goto error;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000787
Guido van Rossumd624f182006-04-24 13:47:05 +0000788 /* Range check */
789 if (value < 0 || value >= 256) {
790 PyErr_SetString(PyExc_ValueError,
791 "bytes must be in range(0, 256)");
792 goto error;
793 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000794
Guido van Rossumd624f182006-04-24 13:47:05 +0000795 /* Append the byte */
Guido van Rossuma0867f72006-05-05 04:34:18 +0000796 if (self->ob_size < self->ob_alloc)
797 self->ob_size++;
798 else if (PyBytes_Resize((PyObject *)self, self->ob_size+1) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000799 goto error;
800 self->ob_bytes[self->ob_size-1] = value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000801 }
802
803 /* Clean up and return success */
804 Py_DECREF(it);
805 return 0;
806
807 error:
808 /* Error handling when it != NULL */
809 Py_DECREF(it);
810 return -1;
811}
812
Georg Brandlee91be42007-02-24 19:41:35 +0000813/* Mostly copied from string_repr, but without the
814 "smart quote" functionality. */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000815static PyObject *
816bytes_repr(PyBytesObject *self)
817{
Georg Brandlee91be42007-02-24 19:41:35 +0000818 size_t newsize = 3 + 4 * self->ob_size;
819 PyObject *v;
820 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != self->ob_size) {
821 PyErr_SetString(PyExc_OverflowError,
822 "bytes object is too large to make repr");
Guido van Rossumd624f182006-04-24 13:47:05 +0000823 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000824 }
Georg Brandlee91be42007-02-24 19:41:35 +0000825 v = PyString_FromStringAndSize((char *)NULL, newsize);
826 if (v == NULL) {
827 return NULL;
828 }
829 else {
830 register Py_ssize_t i;
831 register char c;
832 register char *p;
833 int quote = '\'';
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000834
Georg Brandlee91be42007-02-24 19:41:35 +0000835 p = PyString_AS_STRING(v);
836 *p++ = 'b';
837 *p++ = quote;
838 for (i = 0; i < self->ob_size; i++) {
839 /* There's at least enough room for a hex escape
840 and a closing quote. */
841 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
842 c = self->ob_bytes[i];
843 if (c == quote || c == '\\')
844 *p++ = '\\', *p++ = c;
845 else if (c == '\t')
846 *p++ = '\\', *p++ = 't';
847 else if (c == '\n')
848 *p++ = '\\', *p++ = 'n';
849 else if (c == '\r')
850 *p++ = '\\', *p++ = 'r';
851 else if (c == 0)
852 *p++ = '\\', *p++ = '0';
853 else if (c < ' ' || c >= 0x7f) {
854 /* For performance, we don't want to call
855 PyOS_snprintf here (extra layers of
856 function call). */
857 sprintf(p, "\\x%02x", c & 0xff);
858 p += 4;
859 }
860 else
861 *p++ = c;
862 }
863 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
864 *p++ = quote;
865 *p = '\0';
866 _PyString_Resize(
867 &v, (p - PyString_AS_STRING(v)));
868 return v;
869 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000870}
871
872static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000873bytes_str(PyBytesObject *self)
874{
875 return PyString_FromStringAndSize(self->ob_bytes, self->ob_size);
876}
877
878static PyObject *
Guido van Rossum343e97f2007-04-09 00:43:24 +0000879bytes_richcompare(PyObject *self, PyObject *other, int op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000880{
Guido van Rossum343e97f2007-04-09 00:43:24 +0000881 Py_ssize_t self_size, other_size;
882 void *self_bytes, *other_bytes;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000883 PyObject *res;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000884 Py_ssize_t minsize;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000885 int cmp;
886
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000887 /* Bytes can be compared to anything that supports the (binary) buffer
888 API. Except Unicode. */
Guido van Rossumebea9be2007-04-09 00:49:13 +0000889
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000890 self_size = _getbuffer(self, &self_bytes);
891 if (self_size < 0) {
Guido van Rossumebea9be2007-04-09 00:49:13 +0000892 Py_INCREF(Py_NotImplemented);
893 return Py_NotImplemented;
894 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000895
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000896 other_size = _getbuffer(other, &other_bytes);
897 if (other_size < 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000898 Py_INCREF(Py_NotImplemented);
899 return Py_NotImplemented;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000900 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000901
902 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000903 /* Shortcut: if the lengths differ, the objects differ */
904 cmp = (op == Py_NE);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000905 }
906 else {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000907 minsize = self_size;
908 if (other_size < minsize)
909 minsize = other_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000910
Guido van Rossum343e97f2007-04-09 00:43:24 +0000911 cmp = memcmp(self_bytes, other_bytes, minsize);
Guido van Rossumd624f182006-04-24 13:47:05 +0000912 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000913
Guido van Rossumd624f182006-04-24 13:47:05 +0000914 if (cmp == 0) {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000915 if (self_size < other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +0000916 cmp = -1;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000917 else if (self_size > other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +0000918 cmp = 1;
919 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000920
Guido van Rossumd624f182006-04-24 13:47:05 +0000921 switch (op) {
922 case Py_LT: cmp = cmp < 0; break;
923 case Py_LE: cmp = cmp <= 0; break;
924 case Py_EQ: cmp = cmp == 0; break;
925 case Py_NE: cmp = cmp != 0; break;
926 case Py_GT: cmp = cmp > 0; break;
927 case Py_GE: cmp = cmp >= 0; break;
928 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000929 }
930
931 res = cmp ? Py_True : Py_False;
932 Py_INCREF(res);
933 return res;
934}
935
936static void
937bytes_dealloc(PyBytesObject *self)
938{
Guido van Rossumd624f182006-04-24 13:47:05 +0000939 if (self->ob_bytes != 0) {
940 PyMem_Free(self->ob_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000941 }
942 self->ob_type->tp_free((PyObject *)self);
943}
944
Guido van Rossumd624f182006-04-24 13:47:05 +0000945static Py_ssize_t
946bytes_getbuffer(PyBytesObject *self, Py_ssize_t index, const void **ptr)
947{
948 if (index != 0) {
949 PyErr_SetString(PyExc_SystemError,
Neal Norwitz6968b052007-02-27 19:02:19 +0000950 "accessing non-existent bytes segment");
Guido van Rossumd624f182006-04-24 13:47:05 +0000951 return -1;
952 }
953 *ptr = (void *)self->ob_bytes;
954 return self->ob_size;
955}
956
957static Py_ssize_t
958bytes_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
959{
960 if (lenp)
961 *lenp = self->ob_size;
962 return 1;
963}
964
Neal Norwitz6968b052007-02-27 19:02:19 +0000965
966
967/* -------------------------------------------------------------------- */
968/* Methods */
969
970#define STRINGLIB_CHAR char
971#define STRINGLIB_CMP memcmp
972#define STRINGLIB_LEN PyBytes_GET_SIZE
973#define STRINGLIB_NEW PyBytes_FromStringAndSize
974#define STRINGLIB_EMPTY nullbytes
975
976#include "stringlib/fastsearch.h"
977#include "stringlib/count.h"
978#include "stringlib/find.h"
979#include "stringlib/partition.h"
980
981
982/* The following Py_LOCAL_INLINE and Py_LOCAL functions
983were copied from the old char* style string object. */
984
985Py_LOCAL_INLINE(void)
986_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
987{
988 if (*end > len)
989 *end = len;
990 else if (*end < 0)
991 *end += len;
992 if (*end < 0)
993 *end = 0;
994 if (*start < 0)
995 *start += len;
996 if (*start < 0)
997 *start = 0;
998}
999
1000
1001Py_LOCAL_INLINE(Py_ssize_t)
1002bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
1003{
1004 PyObject *subobj;
1005 const char *sub;
1006 Py_ssize_t sub_len;
1007 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1008
1009 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1010 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1011 return -2;
1012 if (PyBytes_Check(subobj)) {
1013 sub = PyBytes_AS_STRING(subobj);
1014 sub_len = PyBytes_GET_SIZE(subobj);
1015 }
1016 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1017 /* XXX - the "expected a character buffer object" is pretty
1018 confusing for a non-expert. remap to something else ? */
1019 return -2;
1020
1021 if (dir > 0)
1022 return stringlib_find_slice(
1023 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1024 sub, sub_len, start, end);
1025 else
1026 return stringlib_rfind_slice(
1027 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1028 sub, sub_len, start, end);
1029}
1030
1031
1032PyDoc_STRVAR(find__doc__,
1033"B.find(sub [,start [,end]]) -> int\n\
1034\n\
1035Return the lowest index in B where subsection sub is found,\n\
1036such that sub is contained within s[start,end]. Optional\n\
1037arguments start and end are interpreted as in slice notation.\n\
1038\n\
1039Return -1 on failure.");
1040
1041static PyObject *
1042bytes_find(PyBytesObject *self, PyObject *args)
1043{
1044 Py_ssize_t result = bytes_find_internal(self, args, +1);
1045 if (result == -2)
1046 return NULL;
1047 return PyInt_FromSsize_t(result);
1048}
1049
1050PyDoc_STRVAR(count__doc__,
1051"B.count(sub[, start[, end]]) -> int\n\
1052\n\
1053Return the number of non-overlapping occurrences of subsection sub in\n\
1054bytes B[start:end]. Optional arguments start and end are interpreted\n\
1055as in slice notation.");
1056
1057static PyObject *
1058bytes_count(PyBytesObject *self, PyObject *args)
1059{
1060 PyObject *sub_obj;
1061 const char *str = PyBytes_AS_STRING(self), *sub;
1062 Py_ssize_t sub_len;
1063 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1064
1065 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1066 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1067 return NULL;
1068
1069 if (PyBytes_Check(sub_obj)) {
1070 sub = PyBytes_AS_STRING(sub_obj);
1071 sub_len = PyBytes_GET_SIZE(sub_obj);
1072 }
1073 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1074 return NULL;
1075
1076 _adjust_indices(&start, &end, PyString_GET_SIZE(self));
1077
1078 return PyInt_FromSsize_t(
1079 stringlib_count(str + start, end - start, sub, sub_len)
1080 );
1081}
1082
1083
1084PyDoc_STRVAR(index__doc__,
1085"B.index(sub [,start [,end]]) -> int\n\
1086\n\
1087Like B.find() but raise ValueError when the subsection is not found.");
1088
1089static PyObject *
1090bytes_index(PyBytesObject *self, PyObject *args)
1091{
1092 Py_ssize_t result = bytes_find_internal(self, args, +1);
1093 if (result == -2)
1094 return NULL;
1095 if (result == -1) {
1096 PyErr_SetString(PyExc_ValueError,
1097 "subsection not found");
1098 return NULL;
1099 }
1100 return PyInt_FromSsize_t(result);
1101}
1102
1103
1104PyDoc_STRVAR(rfind__doc__,
1105"B.rfind(sub [,start [,end]]) -> int\n\
1106\n\
1107Return the highest index in B where subsection sub is found,\n\
1108such that sub is contained within s[start,end]. Optional\n\
1109arguments start and end are interpreted as in slice notation.\n\
1110\n\
1111Return -1 on failure.");
1112
1113static PyObject *
1114bytes_rfind(PyBytesObject *self, PyObject *args)
1115{
1116 Py_ssize_t result = bytes_find_internal(self, args, -1);
1117 if (result == -2)
1118 return NULL;
1119 return PyInt_FromSsize_t(result);
1120}
1121
1122
1123PyDoc_STRVAR(rindex__doc__,
1124"B.rindex(sub [,start [,end]]) -> int\n\
1125\n\
1126Like B.rfind() but raise ValueError when the subsection is not found.");
1127
1128static PyObject *
1129bytes_rindex(PyBytesObject *self, PyObject *args)
1130{
1131 Py_ssize_t result = bytes_find_internal(self, args, -1);
1132 if (result == -2)
1133 return NULL;
1134 if (result == -1) {
1135 PyErr_SetString(PyExc_ValueError,
1136 "subsection not found");
1137 return NULL;
1138 }
1139 return PyInt_FromSsize_t(result);
1140}
1141
1142
1143/* Matches the end (direction >= 0) or start (direction < 0) of self
1144 * against substr, using the start and end arguments. Returns
1145 * -1 on error, 0 if not found and 1 if found.
1146 */
1147Py_LOCAL(int)
1148_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
1149 Py_ssize_t end, int direction)
1150{
1151 Py_ssize_t len = PyBytes_GET_SIZE(self);
1152 Py_ssize_t slen;
1153 const char* sub;
1154 const char* str;
1155
1156 if (PyBytes_Check(substr)) {
1157 sub = PyBytes_AS_STRING(substr);
1158 slen = PyBytes_GET_SIZE(substr);
1159 }
1160 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
1161 return -1;
1162 str = PyBytes_AS_STRING(self);
1163
1164 _adjust_indices(&start, &end, len);
1165
1166 if (direction < 0) {
1167 /* startswith */
1168 if (start+slen > len)
1169 return 0;
1170 } else {
1171 /* endswith */
1172 if (end-start < slen || start > len)
1173 return 0;
1174
1175 if (end-slen > start)
1176 start = end - slen;
1177 }
1178 if (end-start >= slen)
1179 return ! memcmp(str+start, sub, slen);
1180 return 0;
1181}
1182
1183
1184PyDoc_STRVAR(startswith__doc__,
1185"B.startswith(prefix[, start[, end]]) -> bool\n\
1186\n\
1187Return True if B starts with the specified prefix, False otherwise.\n\
1188With optional start, test B beginning at that position.\n\
1189With optional end, stop comparing B at that position.\n\
1190prefix can also be a tuple of strings to try.");
1191
1192static PyObject *
1193bytes_startswith(PyBytesObject *self, PyObject *args)
1194{
1195 Py_ssize_t start = 0;
1196 Py_ssize_t end = PY_SSIZE_T_MAX;
1197 PyObject *subobj;
1198 int result;
1199
1200 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1201 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1202 return NULL;
1203 if (PyTuple_Check(subobj)) {
1204 Py_ssize_t i;
1205 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1206 result = _bytes_tailmatch(self,
1207 PyTuple_GET_ITEM(subobj, i),
1208 start, end, -1);
1209 if (result == -1)
1210 return NULL;
1211 else if (result) {
1212 Py_RETURN_TRUE;
1213 }
1214 }
1215 Py_RETURN_FALSE;
1216 }
1217 result = _bytes_tailmatch(self, subobj, start, end, -1);
1218 if (result == -1)
1219 return NULL;
1220 else
1221 return PyBool_FromLong(result);
1222}
1223
1224PyDoc_STRVAR(endswith__doc__,
1225"B.endswith(suffix[, start[, end]]) -> bool\n\
1226\n\
1227Return True if B ends with the specified suffix, False otherwise.\n\
1228With optional start, test B beginning at that position.\n\
1229With optional end, stop comparing B at that position.\n\
1230suffix can also be a tuple of strings to try.");
1231
1232static PyObject *
1233bytes_endswith(PyBytesObject *self, PyObject *args)
1234{
1235 Py_ssize_t start = 0;
1236 Py_ssize_t end = PY_SSIZE_T_MAX;
1237 PyObject *subobj;
1238 int result;
1239
1240 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1241 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1242 return NULL;
1243 if (PyTuple_Check(subobj)) {
1244 Py_ssize_t i;
1245 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1246 result = _bytes_tailmatch(self,
1247 PyTuple_GET_ITEM(subobj, i),
1248 start, end, +1);
1249 if (result == -1)
1250 return NULL;
1251 else if (result) {
1252 Py_RETURN_TRUE;
1253 }
1254 }
1255 Py_RETURN_FALSE;
1256 }
1257 result = _bytes_tailmatch(self, subobj, start, end, +1);
1258 if (result == -1)
1259 return NULL;
1260 else
1261 return PyBool_FromLong(result);
1262}
1263
1264
1265
1266PyDoc_STRVAR(translate__doc__,
1267"B.translate(table [,deletechars]) -> bytes\n\
1268\n\
1269Return a copy of the bytes B, where all characters occurring\n\
1270in the optional argument deletechars are removed, and the\n\
1271remaining characters have been mapped through the given\n\
1272translation table, which must be a bytes of length 256.");
1273
1274static PyObject *
1275bytes_translate(PyBytesObject *self, PyObject *args)
1276{
1277 register char *input, *output;
1278 register const char *table;
1279 register Py_ssize_t i, c, changed = 0;
1280 PyObject *input_obj = (PyObject*)self;
1281 const char *table1, *output_start, *del_table=NULL;
1282 Py_ssize_t inlen, tablen, dellen = 0;
1283 PyObject *result;
1284 int trans_table[256];
1285 PyObject *tableobj, *delobj = NULL;
1286
1287 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1288 &tableobj, &delobj))
1289 return NULL;
1290
1291 if (PyBytes_Check(tableobj)) {
1292 table1 = PyBytes_AS_STRING(tableobj);
1293 tablen = PyBytes_GET_SIZE(tableobj);
1294 }
1295 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1296 return NULL;
1297
1298 if (tablen != 256) {
1299 PyErr_SetString(PyExc_ValueError,
1300 "translation table must be 256 characters long");
1301 return NULL;
1302 }
1303
1304 if (delobj != NULL) {
1305 if (PyBytes_Check(delobj)) {
1306 del_table = PyBytes_AS_STRING(delobj);
1307 dellen = PyBytes_GET_SIZE(delobj);
1308 }
1309 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1310 return NULL;
1311 }
1312 else {
1313 del_table = NULL;
1314 dellen = 0;
1315 }
1316
1317 table = table1;
1318 inlen = PyBytes_GET_SIZE(input_obj);
1319 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1320 if (result == NULL)
1321 return NULL;
1322 output_start = output = PyBytes_AsString(result);
1323 input = PyBytes_AS_STRING(input_obj);
1324
1325 if (dellen == 0) {
1326 /* If no deletions are required, use faster code */
1327 for (i = inlen; --i >= 0; ) {
1328 c = Py_CHARMASK(*input++);
1329 if (Py_CHARMASK((*output++ = table[c])) != c)
1330 changed = 1;
1331 }
1332 if (changed || !PyBytes_CheckExact(input_obj))
1333 return result;
1334 Py_DECREF(result);
1335 Py_INCREF(input_obj);
1336 return input_obj;
1337 }
1338
1339 for (i = 0; i < 256; i++)
1340 trans_table[i] = Py_CHARMASK(table[i]);
1341
1342 for (i = 0; i < dellen; i++)
1343 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1344
1345 for (i = inlen; --i >= 0; ) {
1346 c = Py_CHARMASK(*input++);
1347 if (trans_table[c] != -1)
1348 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1349 continue;
1350 changed = 1;
1351 }
1352 if (!changed && PyBytes_CheckExact(input_obj)) {
1353 Py_DECREF(result);
1354 Py_INCREF(input_obj);
1355 return input_obj;
1356 }
1357 /* Fix the size of the resulting string */
1358 if (inlen > 0)
1359 PyBytes_Resize(result, output - output_start);
1360 return result;
1361}
1362
1363
1364#define FORWARD 1
1365#define REVERSE -1
1366
1367/* find and count characters and substrings */
1368
1369#define findchar(target, target_len, c) \
1370 ((char *)memchr((const void *)(target), c, target_len))
1371
1372/* Don't call if length < 2 */
1373#define Py_STRING_MATCH(target, offset, pattern, length) \
1374 (target[offset] == pattern[0] && \
1375 target[offset+length-1] == pattern[length-1] && \
1376 !memcmp(target+offset+1, pattern+1, length-2) )
1377
1378
1379/* Bytes ops must return a string. */
1380/* If the object is subclass of bytes, create a copy */
1381Py_LOCAL(PyBytesObject *)
1382return_self(PyBytesObject *self)
1383{
1384 if (PyBytes_CheckExact(self)) {
1385 Py_INCREF(self);
1386 return (PyBytesObject *)self;
1387 }
1388 return (PyBytesObject *)PyBytes_FromStringAndSize(
1389 PyBytes_AS_STRING(self),
1390 PyBytes_GET_SIZE(self));
1391}
1392
1393Py_LOCAL_INLINE(Py_ssize_t)
1394countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1395{
1396 Py_ssize_t count=0;
1397 const char *start=target;
1398 const char *end=target+target_len;
1399
1400 while ( (start=findchar(start, end-start, c)) != NULL ) {
1401 count++;
1402 if (count >= maxcount)
1403 break;
1404 start += 1;
1405 }
1406 return count;
1407}
1408
1409Py_LOCAL(Py_ssize_t)
1410findstring(const char *target, Py_ssize_t target_len,
1411 const char *pattern, Py_ssize_t pattern_len,
1412 Py_ssize_t start,
1413 Py_ssize_t end,
1414 int direction)
1415{
1416 if (start < 0) {
1417 start += target_len;
1418 if (start < 0)
1419 start = 0;
1420 }
1421 if (end > target_len) {
1422 end = target_len;
1423 } else if (end < 0) {
1424 end += target_len;
1425 if (end < 0)
1426 end = 0;
1427 }
1428
1429 /* zero-length substrings always match at the first attempt */
1430 if (pattern_len == 0)
1431 return (direction > 0) ? start : end;
1432
1433 end -= pattern_len;
1434
1435 if (direction < 0) {
1436 for (; end >= start; end--)
1437 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1438 return end;
1439 } else {
1440 for (; start <= end; start++)
1441 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1442 return start;
1443 }
1444 return -1;
1445}
1446
1447Py_LOCAL_INLINE(Py_ssize_t)
1448countstring(const char *target, Py_ssize_t target_len,
1449 const char *pattern, Py_ssize_t pattern_len,
1450 Py_ssize_t start,
1451 Py_ssize_t end,
1452 int direction, Py_ssize_t maxcount)
1453{
1454 Py_ssize_t count=0;
1455
1456 if (start < 0) {
1457 start += target_len;
1458 if (start < 0)
1459 start = 0;
1460 }
1461 if (end > target_len) {
1462 end = target_len;
1463 } else if (end < 0) {
1464 end += target_len;
1465 if (end < 0)
1466 end = 0;
1467 }
1468
1469 /* zero-length substrings match everywhere */
1470 if (pattern_len == 0 || maxcount == 0) {
1471 if (target_len+1 < maxcount)
1472 return target_len+1;
1473 return maxcount;
1474 }
1475
1476 end -= pattern_len;
1477 if (direction < 0) {
1478 for (; (end >= start); end--)
1479 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1480 count++;
1481 if (--maxcount <= 0) break;
1482 end -= pattern_len-1;
1483 }
1484 } else {
1485 for (; (start <= end); start++)
1486 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1487 count++;
1488 if (--maxcount <= 0)
1489 break;
1490 start += pattern_len-1;
1491 }
1492 }
1493 return count;
1494}
1495
1496
1497/* Algorithms for different cases of string replacement */
1498
1499/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1500Py_LOCAL(PyBytesObject *)
1501replace_interleave(PyBytesObject *self,
1502 const char *to_s, Py_ssize_t to_len,
1503 Py_ssize_t maxcount)
1504{
1505 char *self_s, *result_s;
1506 Py_ssize_t self_len, result_len;
1507 Py_ssize_t count, i, product;
1508 PyBytesObject *result;
1509
1510 self_len = PyBytes_GET_SIZE(self);
1511
1512 /* 1 at the end plus 1 after every character */
1513 count = self_len+1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001514 if (maxcount < count)
Neal Norwitz6968b052007-02-27 19:02:19 +00001515 count = maxcount;
1516
1517 /* Check for overflow */
1518 /* result_len = count * to_len + self_len; */
1519 product = count * to_len;
1520 if (product / to_len != count) {
1521 PyErr_SetString(PyExc_OverflowError,
1522 "replace string is too long");
1523 return NULL;
1524 }
1525 result_len = product + self_len;
1526 if (result_len < 0) {
1527 PyErr_SetString(PyExc_OverflowError,
1528 "replace string is too long");
1529 return NULL;
1530 }
1531
1532 if (! (result = (PyBytesObject *)
1533 PyBytes_FromStringAndSize(NULL, result_len)) )
1534 return NULL;
1535
1536 self_s = PyBytes_AS_STRING(self);
1537 result_s = PyBytes_AS_STRING(result);
1538
1539 /* TODO: special case single character, which doesn't need memcpy */
1540
1541 /* Lay the first one down (guaranteed this will occur) */
1542 Py_MEMCPY(result_s, to_s, to_len);
1543 result_s += to_len;
1544 count -= 1;
1545
1546 for (i=0; i<count; i++) {
1547 *result_s++ = *self_s++;
1548 Py_MEMCPY(result_s, to_s, to_len);
1549 result_s += to_len;
1550 }
1551
1552 /* Copy the rest of the original string */
1553 Py_MEMCPY(result_s, self_s, self_len-i);
1554
1555 return result;
1556}
1557
1558/* Special case for deleting a single character */
1559/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1560Py_LOCAL(PyBytesObject *)
1561replace_delete_single_character(PyBytesObject *self,
1562 char from_c, Py_ssize_t maxcount)
1563{
1564 char *self_s, *result_s;
1565 char *start, *next, *end;
1566 Py_ssize_t self_len, result_len;
1567 Py_ssize_t count;
1568 PyBytesObject *result;
1569
1570 self_len = PyBytes_GET_SIZE(self);
1571 self_s = PyBytes_AS_STRING(self);
1572
1573 count = countchar(self_s, self_len, from_c, maxcount);
1574 if (count == 0) {
1575 return return_self(self);
1576 }
1577
1578 result_len = self_len - count; /* from_len == 1 */
1579 assert(result_len>=0);
1580
1581 if ( (result = (PyBytesObject *)
1582 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1583 return NULL;
1584 result_s = PyBytes_AS_STRING(result);
1585
1586 start = self_s;
1587 end = self_s + self_len;
1588 while (count-- > 0) {
1589 next = findchar(start, end-start, from_c);
1590 if (next == NULL)
1591 break;
1592 Py_MEMCPY(result_s, start, next-start);
1593 result_s += (next-start);
1594 start = next+1;
1595 }
1596 Py_MEMCPY(result_s, start, end-start);
1597
1598 return result;
1599}
1600
1601/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1602
1603Py_LOCAL(PyBytesObject *)
1604replace_delete_substring(PyBytesObject *self,
1605 const char *from_s, Py_ssize_t from_len,
1606 Py_ssize_t maxcount)
1607{
1608 char *self_s, *result_s;
1609 char *start, *next, *end;
1610 Py_ssize_t self_len, result_len;
1611 Py_ssize_t count, offset;
1612 PyBytesObject *result;
1613
1614 self_len = PyBytes_GET_SIZE(self);
1615 self_s = PyBytes_AS_STRING(self);
1616
1617 count = countstring(self_s, self_len,
1618 from_s, from_len,
1619 0, self_len, 1,
1620 maxcount);
1621
1622 if (count == 0) {
1623 /* no matches */
1624 return return_self(self);
1625 }
1626
1627 result_len = self_len - (count * from_len);
1628 assert (result_len>=0);
1629
1630 if ( (result = (PyBytesObject *)
1631 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1632 return NULL;
1633
1634 result_s = PyBytes_AS_STRING(result);
1635
1636 start = self_s;
1637 end = self_s + self_len;
1638 while (count-- > 0) {
1639 offset = findstring(start, end-start,
1640 from_s, from_len,
1641 0, end-start, FORWARD);
1642 if (offset == -1)
1643 break;
1644 next = start + offset;
1645
1646 Py_MEMCPY(result_s, start, next-start);
1647
1648 result_s += (next-start);
1649 start = next+from_len;
1650 }
1651 Py_MEMCPY(result_s, start, end-start);
1652 return result;
1653}
1654
1655/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1656Py_LOCAL(PyBytesObject *)
1657replace_single_character_in_place(PyBytesObject *self,
1658 char from_c, char to_c,
1659 Py_ssize_t maxcount)
1660{
1661 char *self_s, *result_s, *start, *end, *next;
1662 Py_ssize_t self_len;
1663 PyBytesObject *result;
1664
1665 /* The result string will be the same size */
1666 self_s = PyBytes_AS_STRING(self);
1667 self_len = PyBytes_GET_SIZE(self);
1668
1669 next = findchar(self_s, self_len, from_c);
1670
1671 if (next == NULL) {
1672 /* No matches; return the original bytes */
1673 return return_self(self);
1674 }
1675
1676 /* Need to make a new bytes */
1677 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1678 if (result == NULL)
1679 return NULL;
1680 result_s = PyBytes_AS_STRING(result);
1681 Py_MEMCPY(result_s, self_s, self_len);
1682
1683 /* change everything in-place, starting with this one */
1684 start = result_s + (next-self_s);
1685 *start = to_c;
1686 start++;
1687 end = result_s + self_len;
1688
1689 while (--maxcount > 0) {
1690 next = findchar(start, end-start, from_c);
1691 if (next == NULL)
1692 break;
1693 *next = to_c;
1694 start = next+1;
1695 }
1696
1697 return result;
1698}
1699
1700/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1701Py_LOCAL(PyBytesObject *)
1702replace_substring_in_place(PyBytesObject *self,
1703 const char *from_s, Py_ssize_t from_len,
1704 const char *to_s, Py_ssize_t to_len,
1705 Py_ssize_t maxcount)
1706{
1707 char *result_s, *start, *end;
1708 char *self_s;
1709 Py_ssize_t self_len, offset;
1710 PyBytesObject *result;
1711
1712 /* The result bytes will be the same size */
1713
1714 self_s = PyBytes_AS_STRING(self);
1715 self_len = PyBytes_GET_SIZE(self);
1716
1717 offset = findstring(self_s, self_len,
1718 from_s, from_len,
1719 0, self_len, FORWARD);
1720 if (offset == -1) {
1721 /* No matches; return the original bytes */
1722 return return_self(self);
1723 }
1724
1725 /* Need to make a new bytes */
1726 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1727 if (result == NULL)
1728 return NULL;
1729 result_s = PyBytes_AS_STRING(result);
1730 Py_MEMCPY(result_s, self_s, self_len);
1731
1732 /* change everything in-place, starting with this one */
1733 start = result_s + offset;
1734 Py_MEMCPY(start, to_s, from_len);
1735 start += from_len;
1736 end = result_s + self_len;
1737
1738 while ( --maxcount > 0) {
1739 offset = findstring(start, end-start,
1740 from_s, from_len,
1741 0, end-start, FORWARD);
1742 if (offset==-1)
1743 break;
1744 Py_MEMCPY(start+offset, to_s, from_len);
1745 start += offset+from_len;
1746 }
1747
1748 return result;
1749}
1750
1751/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1752Py_LOCAL(PyBytesObject *)
1753replace_single_character(PyBytesObject *self,
1754 char from_c,
1755 const char *to_s, Py_ssize_t to_len,
1756 Py_ssize_t maxcount)
1757{
1758 char *self_s, *result_s;
1759 char *start, *next, *end;
1760 Py_ssize_t self_len, result_len;
1761 Py_ssize_t count, product;
1762 PyBytesObject *result;
1763
1764 self_s = PyBytes_AS_STRING(self);
1765 self_len = PyBytes_GET_SIZE(self);
1766
1767 count = countchar(self_s, self_len, from_c, maxcount);
1768 if (count == 0) {
1769 /* no matches, return unchanged */
1770 return return_self(self);
1771 }
1772
1773 /* use the difference between current and new, hence the "-1" */
1774 /* result_len = self_len + count * (to_len-1) */
1775 product = count * (to_len-1);
1776 if (product / (to_len-1) != count) {
1777 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1778 return NULL;
1779 }
1780 result_len = self_len + product;
1781 if (result_len < 0) {
1782 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1783 return NULL;
1784 }
1785
1786 if ( (result = (PyBytesObject *)
1787 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1788 return NULL;
1789 result_s = PyBytes_AS_STRING(result);
1790
1791 start = self_s;
1792 end = self_s + self_len;
1793 while (count-- > 0) {
1794 next = findchar(start, end-start, from_c);
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001795 if (next == NULL)
Neal Norwitz6968b052007-02-27 19:02:19 +00001796 break;
1797
1798 if (next == start) {
1799 /* replace with the 'to' */
1800 Py_MEMCPY(result_s, to_s, to_len);
1801 result_s += to_len;
1802 start += 1;
1803 } else {
1804 /* copy the unchanged old then the 'to' */
1805 Py_MEMCPY(result_s, start, next-start);
1806 result_s += (next-start);
1807 Py_MEMCPY(result_s, to_s, to_len);
1808 result_s += to_len;
1809 start = next+1;
1810 }
1811 }
1812 /* Copy the remainder of the remaining bytes */
1813 Py_MEMCPY(result_s, start, end-start);
1814
1815 return result;
1816}
1817
1818/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1819Py_LOCAL(PyBytesObject *)
1820replace_substring(PyBytesObject *self,
1821 const char *from_s, Py_ssize_t from_len,
1822 const char *to_s, Py_ssize_t to_len,
1823 Py_ssize_t maxcount)
1824{
1825 char *self_s, *result_s;
1826 char *start, *next, *end;
1827 Py_ssize_t self_len, result_len;
1828 Py_ssize_t count, offset, product;
1829 PyBytesObject *result;
1830
1831 self_s = PyBytes_AS_STRING(self);
1832 self_len = PyBytes_GET_SIZE(self);
1833
1834 count = countstring(self_s, self_len,
1835 from_s, from_len,
1836 0, self_len, FORWARD, maxcount);
1837 if (count == 0) {
1838 /* no matches, return unchanged */
1839 return return_self(self);
1840 }
1841
1842 /* Check for overflow */
1843 /* result_len = self_len + count * (to_len-from_len) */
1844 product = count * (to_len-from_len);
1845 if (product / (to_len-from_len) != count) {
1846 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1847 return NULL;
1848 }
1849 result_len = self_len + product;
1850 if (result_len < 0) {
1851 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1852 return NULL;
1853 }
1854
1855 if ( (result = (PyBytesObject *)
1856 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1857 return NULL;
1858 result_s = PyBytes_AS_STRING(result);
1859
1860 start = self_s;
1861 end = self_s + self_len;
1862 while (count-- > 0) {
1863 offset = findstring(start, end-start,
1864 from_s, from_len,
1865 0, end-start, FORWARD);
1866 if (offset == -1)
1867 break;
1868 next = start+offset;
1869 if (next == start) {
1870 /* replace with the 'to' */
1871 Py_MEMCPY(result_s, to_s, to_len);
1872 result_s += to_len;
1873 start += from_len;
1874 } else {
1875 /* copy the unchanged old then the 'to' */
1876 Py_MEMCPY(result_s, start, next-start);
1877 result_s += (next-start);
1878 Py_MEMCPY(result_s, to_s, to_len);
1879 result_s += to_len;
1880 start = next+from_len;
1881 }
1882 }
1883 /* Copy the remainder of the remaining bytes */
1884 Py_MEMCPY(result_s, start, end-start);
1885
1886 return result;
1887}
1888
1889
1890Py_LOCAL(PyBytesObject *)
1891replace(PyBytesObject *self,
1892 const char *from_s, Py_ssize_t from_len,
1893 const char *to_s, Py_ssize_t to_len,
1894 Py_ssize_t maxcount)
1895{
1896 if (maxcount < 0) {
1897 maxcount = PY_SSIZE_T_MAX;
1898 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
1899 /* nothing to do; return the original bytes */
1900 return return_self(self);
1901 }
1902
1903 if (maxcount == 0 ||
1904 (from_len == 0 && to_len == 0)) {
1905 /* nothing to do; return the original bytes */
1906 return return_self(self);
1907 }
1908
1909 /* Handle zero-length special cases */
1910
1911 if (from_len == 0) {
1912 /* insert the 'to' bytes everywhere. */
1913 /* >>> "Python".replace("", ".") */
1914 /* '.P.y.t.h.o.n.' */
1915 return replace_interleave(self, to_s, to_len, maxcount);
1916 }
1917
1918 /* Except for "".replace("", "A") == "A" there is no way beyond this */
1919 /* point for an empty self bytes to generate a non-empty bytes */
1920 /* Special case so the remaining code always gets a non-empty bytes */
1921 if (PyBytes_GET_SIZE(self) == 0) {
1922 return return_self(self);
1923 }
1924
1925 if (to_len == 0) {
1926 /* delete all occurances of 'from' bytes */
1927 if (from_len == 1) {
1928 return replace_delete_single_character(
1929 self, from_s[0], maxcount);
1930 } else {
1931 return replace_delete_substring(self, from_s, from_len, maxcount);
1932 }
1933 }
1934
1935 /* Handle special case where both bytes have the same length */
1936
1937 if (from_len == to_len) {
1938 if (from_len == 1) {
1939 return replace_single_character_in_place(
1940 self,
1941 from_s[0],
1942 to_s[0],
1943 maxcount);
1944 } else {
1945 return replace_substring_in_place(
1946 self, from_s, from_len, to_s, to_len, maxcount);
1947 }
1948 }
1949
1950 /* Otherwise use the more generic algorithms */
1951 if (from_len == 1) {
1952 return replace_single_character(self, from_s[0],
1953 to_s, to_len, maxcount);
1954 } else {
1955 /* len('from')>=2, len('to')>=1 */
1956 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
1957 }
1958}
1959
1960PyDoc_STRVAR(replace__doc__,
1961"B.replace (old, new[, count]) -> bytes\n\
1962\n\
1963Return a copy of bytes B with all occurrences of subsection\n\
1964old replaced by new. If the optional argument count is\n\
1965given, only the first count occurrences are replaced.");
1966
1967static PyObject *
1968bytes_replace(PyBytesObject *self, PyObject *args)
1969{
1970 Py_ssize_t count = -1;
1971 PyObject *from, *to;
1972 const char *from_s, *to_s;
1973 Py_ssize_t from_len, to_len;
1974
1975 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
1976 return NULL;
1977
1978 if (PyBytes_Check(from)) {
1979 from_s = PyBytes_AS_STRING(from);
1980 from_len = PyBytes_GET_SIZE(from);
1981 }
1982 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
1983 return NULL;
1984
1985 if (PyBytes_Check(to)) {
1986 to_s = PyBytes_AS_STRING(to);
1987 to_len = PyBytes_GET_SIZE(to);
1988 }
1989 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
1990 return NULL;
1991
1992 return (PyObject *)replace((PyBytesObject *) self,
1993 from_s, from_len,
1994 to_s, to_len, count);
1995}
1996
1997
1998/* Overallocate the initial list to reduce the number of reallocs for small
1999 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2000 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2001 text (roughly 11 words per line) and field delimited data (usually 1-10
2002 fields). For large strings the split algorithms are bandwidth limited
2003 so increasing the preallocation likely will not improve things.*/
2004
2005#define MAX_PREALLOC 12
2006
2007/* 5 splits gives 6 elements */
2008#define PREALLOC_SIZE(maxsplit) \
2009 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2010
2011#define SPLIT_APPEND(data, left, right) \
2012 str = PyBytes_FromStringAndSize((data) + (left), \
2013 (right) - (left)); \
2014 if (str == NULL) \
2015 goto onError; \
2016 if (PyList_Append(list, str)) { \
2017 Py_DECREF(str); \
2018 goto onError; \
2019 } \
2020 else \
2021 Py_DECREF(str);
2022
2023#define SPLIT_ADD(data, left, right) { \
2024 str = PyBytes_FromStringAndSize((data) + (left), \
2025 (right) - (left)); \
2026 if (str == NULL) \
2027 goto onError; \
2028 if (count < MAX_PREALLOC) { \
2029 PyList_SET_ITEM(list, count, str); \
2030 } else { \
2031 if (PyList_Append(list, str)) { \
2032 Py_DECREF(str); \
2033 goto onError; \
2034 } \
2035 else \
2036 Py_DECREF(str); \
2037 } \
2038 count++; }
2039
2040/* Always force the list to the expected size. */
2041#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
2042
2043
2044Py_LOCAL_INLINE(PyObject *)
2045split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2046{
2047 register Py_ssize_t i, j, count=0;
2048 PyObject *str;
2049 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2050
2051 if (list == NULL)
2052 return NULL;
2053
2054 i = j = 0;
2055 while ((j < len) && (maxcount-- > 0)) {
2056 for(; j<len; j++) {
2057 /* I found that using memchr makes no difference */
2058 if (s[j] == ch) {
2059 SPLIT_ADD(s, i, j);
2060 i = j = j + 1;
2061 break;
2062 }
2063 }
2064 }
2065 if (i <= len) {
2066 SPLIT_ADD(s, i, len);
2067 }
2068 FIX_PREALLOC_SIZE(list);
2069 return list;
2070
2071 onError:
2072 Py_DECREF(list);
2073 return NULL;
2074}
2075
2076PyDoc_STRVAR(split__doc__,
2077"B.split(sep [,maxsplit]) -> list of bytes\n\
2078\n\
2079Return a list of the bytes in the string B, using sep as the\n\
2080delimiter. If maxsplit is given, at most maxsplit\n\
2081splits are done.");
2082
2083static PyObject *
2084bytes_split(PyBytesObject *self, PyObject *args)
2085{
2086 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2087 Py_ssize_t maxsplit = -1, count=0;
2088 const char *s = PyBytes_AS_STRING(self), *sub;
2089 PyObject *list, *str, *subobj;
2090#ifdef USE_FAST
2091 Py_ssize_t pos;
2092#endif
2093
2094 if (!PyArg_ParseTuple(args, "O|n:split", &subobj, &maxsplit))
2095 return NULL;
2096 if (maxsplit < 0)
2097 maxsplit = PY_SSIZE_T_MAX;
2098 if (PyBytes_Check(subobj)) {
2099 sub = PyBytes_AS_STRING(subobj);
2100 n = PyBytes_GET_SIZE(subobj);
2101 }
2102 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2103 return NULL;
2104
2105 if (n == 0) {
2106 PyErr_SetString(PyExc_ValueError, "empty separator");
2107 return NULL;
2108 }
2109 else if (n == 1)
2110 return split_char(s, len, sub[0], maxsplit);
2111
2112 list = PyList_New(PREALLOC_SIZE(maxsplit));
2113 if (list == NULL)
2114 return NULL;
2115
2116#ifdef USE_FAST
2117 i = j = 0;
2118 while (maxsplit-- > 0) {
2119 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2120 if (pos < 0)
2121 break;
2122 j = i+pos;
2123 SPLIT_ADD(s, i, j);
2124 i = j + n;
2125 }
2126#else
2127 i = j = 0;
2128 while ((j+n <= len) && (maxsplit-- > 0)) {
2129 for (; j+n <= len; j++) {
2130 if (Py_STRING_MATCH(s, j, sub, n)) {
2131 SPLIT_ADD(s, i, j);
2132 i = j = j + n;
2133 break;
2134 }
2135 }
2136 }
2137#endif
2138 SPLIT_ADD(s, i, len);
2139 FIX_PREALLOC_SIZE(list);
2140 return list;
2141
2142 onError:
2143 Py_DECREF(list);
2144 return NULL;
2145}
2146
2147PyDoc_STRVAR(partition__doc__,
2148"B.partition(sep) -> (head, sep, tail)\n\
2149\n\
2150Searches for the separator sep in B, and returns the part before it,\n\
2151the separator itself, and the part after it. If the separator is not\n\
2152found, returns B and two empty bytes.");
2153
2154static PyObject *
2155bytes_partition(PyBytesObject *self, PyObject *sep_obj)
2156{
2157 PyObject *bytesep, *result;
2158
2159 bytesep = PyBytes_FromObject(sep_obj);
2160 if (! bytesep)
2161 return NULL;
2162
2163 result = stringlib_partition(
2164 (PyObject*) self,
2165 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002166 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002167 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2168 );
2169
2170 Py_DECREF(bytesep);
2171 return result;
2172}
2173
2174PyDoc_STRVAR(rpartition__doc__,
2175"B.rpartition(sep) -> (tail, sep, head)\n\
2176\n\
2177Searches for the separator sep in B, starting at the end of B, and returns\n\
2178the part before it, the separator itself, and the part after it. If the\n\
2179separator is not found, returns two empty bytes and B.");
2180
2181static PyObject *
2182bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
2183{
2184 PyObject *bytesep, *result;
2185
2186 bytesep = PyBytes_FromObject(sep_obj);
2187 if (! bytesep)
2188 return NULL;
2189
2190 result = stringlib_rpartition(
2191 (PyObject*) self,
2192 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002193 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002194 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2195 );
2196
2197 Py_DECREF(bytesep);
2198 return result;
2199}
2200
2201Py_LOCAL_INLINE(PyObject *)
2202rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2203{
2204 register Py_ssize_t i, j, count=0;
2205 PyObject *str;
2206 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2207
2208 if (list == NULL)
2209 return NULL;
2210
2211 i = j = len - 1;
2212 while ((i >= 0) && (maxcount-- > 0)) {
2213 for (; i >= 0; i--) {
2214 if (s[i] == ch) {
2215 SPLIT_ADD(s, i + 1, j + 1);
2216 j = i = i - 1;
2217 break;
2218 }
2219 }
2220 }
2221 if (j >= -1) {
2222 SPLIT_ADD(s, 0, j + 1);
2223 }
2224 FIX_PREALLOC_SIZE(list);
2225 if (PyList_Reverse(list) < 0)
2226 goto onError;
2227
2228 return list;
2229
2230 onError:
2231 Py_DECREF(list);
2232 return NULL;
2233}
2234
2235PyDoc_STRVAR(rsplit__doc__,
2236"B.rsplit(sep [,maxsplit]) -> list of bytes\n\
2237\n\
2238Return a list of the sections in the byte B, using sep as the\n\
2239delimiter, starting at the end of the bytes and working\n\
2240to the front. If maxsplit is given, at most maxsplit splits are\n\
2241done.");
2242
2243static PyObject *
2244bytes_rsplit(PyBytesObject *self, PyObject *args)
2245{
2246 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2247 Py_ssize_t maxsplit = -1, count=0;
2248 const char *s = PyBytes_AS_STRING(self), *sub;
2249 PyObject *list, *str, *subobj;
2250
2251 if (!PyArg_ParseTuple(args, "O|n:rsplit", &subobj, &maxsplit))
2252 return NULL;
2253 if (maxsplit < 0)
2254 maxsplit = PY_SSIZE_T_MAX;
2255 if (PyBytes_Check(subobj)) {
2256 sub = PyBytes_AS_STRING(subobj);
2257 n = PyBytes_GET_SIZE(subobj);
2258 }
2259 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2260 return NULL;
2261
2262 if (n == 0) {
2263 PyErr_SetString(PyExc_ValueError, "empty separator");
2264 return NULL;
2265 }
2266 else if (n == 1)
2267 return rsplit_char(s, len, sub[0], maxsplit);
2268
2269 list = PyList_New(PREALLOC_SIZE(maxsplit));
2270 if (list == NULL)
2271 return NULL;
2272
2273 j = len;
2274 i = j - n;
2275
2276 while ( (i >= 0) && (maxsplit-- > 0) ) {
2277 for (; i>=0; i--) {
2278 if (Py_STRING_MATCH(s, i, sub, n)) {
2279 SPLIT_ADD(s, i + n, j);
2280 j = i;
2281 i -= n;
2282 break;
2283 }
2284 }
2285 }
2286 SPLIT_ADD(s, 0, j);
2287 FIX_PREALLOC_SIZE(list);
2288 if (PyList_Reverse(list) < 0)
2289 goto onError;
2290 return list;
2291
2292onError:
2293 Py_DECREF(list);
2294 return NULL;
2295}
2296
2297PyDoc_STRVAR(extend__doc__,
2298"B.extend(iterable int) -> None\n\
2299\n\
2300Append all the elements from the iterator or sequence to the\n\
2301end of the bytes.");
2302static PyObject *
2303bytes_extend(PyBytesObject *self, PyObject *arg)
2304{
2305 if (bytes_setslice(self, self->ob_size, self->ob_size, arg) == -1)
2306 return NULL;
2307 Py_RETURN_NONE;
2308}
2309
2310
2311PyDoc_STRVAR(reverse__doc__,
2312"B.reverse() -> None\n\
2313\n\
2314Reverse the order of the values in bytes in place.");
2315static PyObject *
2316bytes_reverse(PyBytesObject *self, PyObject *unused)
2317{
2318 char swap, *head, *tail;
2319 Py_ssize_t i, j, n = self->ob_size;
2320
2321 j = n / 2;
2322 head = self->ob_bytes;
2323 tail = head + n - 1;
2324 for (i = 0; i < j; i++) {
2325 swap = *head;
2326 *head++ = *tail;
2327 *tail-- = swap;
2328 }
2329
2330 Py_RETURN_NONE;
2331}
2332
2333PyDoc_STRVAR(insert__doc__,
2334"B.insert(index, int) -> None\n\
2335\n\
2336Insert a single item into the bytes before the given index.");
2337static PyObject *
2338bytes_insert(PyBytesObject *self, PyObject *args)
2339{
2340 int value;
2341 Py_ssize_t where, n = self->ob_size;
2342
2343 if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
2344 return NULL;
2345
2346 if (n == PY_SSIZE_T_MAX) {
2347 PyErr_SetString(PyExc_OverflowError,
2348 "cannot add more objects to bytes");
2349 return NULL;
2350 }
2351 if (value < 0 || value >= 256) {
2352 PyErr_SetString(PyExc_ValueError,
2353 "byte must be in range(0, 256)");
2354 return NULL;
2355 }
2356 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2357 return NULL;
2358
2359 if (where < 0) {
2360 where += n;
2361 if (where < 0)
2362 where = 0;
2363 }
2364 if (where > n)
2365 where = n;
Guido van Rossum4fc8ae42007-02-27 20:57:45 +00002366 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
Neal Norwitz6968b052007-02-27 19:02:19 +00002367 self->ob_bytes[where] = value;
2368
2369 Py_RETURN_NONE;
2370}
2371
2372PyDoc_STRVAR(append__doc__,
2373"B.append(int) -> None\n\
2374\n\
2375Append a single item to the end of the bytes.");
2376static PyObject *
2377bytes_append(PyBytesObject *self, PyObject *arg)
2378{
2379 int value;
2380 Py_ssize_t n = self->ob_size;
2381
2382 if (! _getbytevalue(arg, &value))
2383 return NULL;
2384 if (n == PY_SSIZE_T_MAX) {
2385 PyErr_SetString(PyExc_OverflowError,
2386 "cannot add more objects to bytes");
2387 return NULL;
2388 }
2389 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2390 return NULL;
2391
2392 self->ob_bytes[n] = value;
2393
2394 Py_RETURN_NONE;
2395}
2396
2397PyDoc_STRVAR(pop__doc__,
2398"B.pop([index]) -> int\n\
2399\n\
2400Remove and return a single item from the bytes. If no index\n\
2401argument is give, will pop the last value.");
2402static PyObject *
2403bytes_pop(PyBytesObject *self, PyObject *args)
2404{
2405 int value;
2406 Py_ssize_t where = -1, n = self->ob_size;
2407
2408 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2409 return NULL;
2410
2411 if (n == 0) {
2412 PyErr_SetString(PyExc_OverflowError,
2413 "cannot pop an empty bytes");
2414 return NULL;
2415 }
2416 if (where < 0)
2417 where += self->ob_size;
2418 if (where < 0 || where >= self->ob_size) {
2419 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2420 return NULL;
2421 }
2422
2423 value = self->ob_bytes[where];
2424 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2425 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2426 return NULL;
2427
2428 return PyInt_FromLong(value);
2429}
2430
2431PyDoc_STRVAR(remove__doc__,
2432"B.remove(int) -> None\n\
2433\n\
2434Remove the first occurance of a value in bytes");
2435static PyObject *
2436bytes_remove(PyBytesObject *self, PyObject *arg)
2437{
2438 int value;
2439 Py_ssize_t where, n = self->ob_size;
2440
2441 if (! _getbytevalue(arg, &value))
2442 return NULL;
2443
2444 for (where = 0; where < n; where++) {
2445 if (self->ob_bytes[where] == value)
2446 break;
2447 }
2448 if (where == n) {
2449 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2450 return NULL;
2451 }
2452
2453 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2454 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2455 return NULL;
2456
2457 Py_RETURN_NONE;
2458}
2459
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002460/* XXX These two helpers could be optimized if argsize == 1 */
2461
2462Py_ssize_t
2463lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2464 void *argptr, Py_ssize_t argsize)
2465{
2466 Py_ssize_t i = 0;
2467 while (i < mysize && memchr(argptr, myptr[i], argsize))
2468 i++;
2469 return i;
2470}
2471
2472Py_ssize_t
2473rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2474 void *argptr, Py_ssize_t argsize)
2475{
2476 Py_ssize_t i = mysize - 1;
2477 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2478 i--;
2479 return i + 1;
2480}
2481
2482PyDoc_STRVAR(strip__doc__,
2483"B.strip(bytes) -> bytes\n\
2484\n\
2485Strip leading and trailing bytes contained in the argument.");
2486static PyObject *
2487bytes_strip(PyBytesObject *self, PyObject *arg)
2488{
2489 Py_ssize_t left, right, mysize, argsize;
2490 void *myptr, *argptr;
2491 if (arg == NULL || !PyBytes_Check(arg)) {
2492 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2493 return NULL;
2494 }
2495 myptr = self->ob_bytes;
2496 mysize = self->ob_size;
2497 argptr = ((PyBytesObject *)arg)->ob_bytes;
2498 argsize = ((PyBytesObject *)arg)->ob_size;
2499 left = lstrip_helper(myptr, mysize, argptr, argsize);
2500 right = rstrip_helper(myptr, mysize, argptr, argsize);
2501 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2502}
2503
2504PyDoc_STRVAR(lstrip__doc__,
2505"B.lstrip(bytes) -> bytes\n\
2506\n\
2507Strip leading bytes contained in the argument.");
2508static PyObject *
2509bytes_lstrip(PyBytesObject *self, PyObject *arg)
2510{
2511 Py_ssize_t left, right, mysize, argsize;
2512 void *myptr, *argptr;
2513 if (arg == NULL || !PyBytes_Check(arg)) {
2514 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2515 return NULL;
2516 }
2517 myptr = self->ob_bytes;
2518 mysize = self->ob_size;
2519 argptr = ((PyBytesObject *)arg)->ob_bytes;
2520 argsize = ((PyBytesObject *)arg)->ob_size;
2521 left = lstrip_helper(myptr, mysize, argptr, argsize);
2522 right = mysize;
2523 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2524}
2525
2526PyDoc_STRVAR(rstrip__doc__,
2527"B.rstrip(bytes) -> bytes\n\
2528\n\
2529Strip trailing bytes contained in the argument.");
2530static PyObject *
2531bytes_rstrip(PyBytesObject *self, PyObject *arg)
2532{
2533 Py_ssize_t left, right, mysize, argsize;
2534 void *myptr, *argptr;
2535 if (arg == NULL || !PyBytes_Check(arg)) {
2536 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2537 return NULL;
2538 }
2539 myptr = self->ob_bytes;
2540 mysize = self->ob_size;
2541 argptr = ((PyBytesObject *)arg)->ob_bytes;
2542 argsize = ((PyBytesObject *)arg)->ob_size;
2543 left = 0;
2544 right = rstrip_helper(myptr, mysize, argptr, argsize);
2545 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2546}
Neal Norwitz6968b052007-02-27 19:02:19 +00002547
Guido van Rossumd624f182006-04-24 13:47:05 +00002548PyDoc_STRVAR(decode_doc,
2549"B.decode([encoding[,errors]]) -> unicode obect.\n\
2550\n\
2551Decodes B using the codec registered for encoding. encoding defaults\n\
2552to the default encoding. errors may be given to set a different error\n\
2553handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2554a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2555as well as any other name registerd with codecs.register_error that is\n\
2556able to handle UnicodeDecodeErrors.");
2557
2558static PyObject *
2559bytes_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002560{
Guido van Rossumd624f182006-04-24 13:47:05 +00002561 const char *encoding = NULL;
2562 const char *errors = NULL;
2563
2564 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2565 return NULL;
2566 if (encoding == NULL)
2567 encoding = PyUnicode_GetDefaultEncoding();
2568 return PyCodec_Decode(self, encoding, errors);
2569}
2570
Guido van Rossuma0867f72006-05-05 04:34:18 +00002571PyDoc_STRVAR(alloc_doc,
2572"B.__alloc__() -> int\n\
2573\n\
2574Returns the number of bytes actually allocated.");
2575
2576static PyObject *
2577bytes_alloc(PyBytesObject *self)
2578{
2579 return PyInt_FromSsize_t(self->ob_alloc);
2580}
2581
Guido van Rossum20188312006-05-05 15:15:40 +00002582PyDoc_STRVAR(join_doc,
2583"bytes.join(iterable_of_bytes) -> bytes\n\
2584\n\
2585Concatenates any number of bytes objects. Example:\n\
2586bytes.join([bytes('ab'), bytes('pq'), bytes('rs')]) -> bytes('abpqrs').");
2587
2588static PyObject *
2589bytes_join(PyObject *cls, PyObject *it)
2590{
2591 PyObject *seq;
2592 Py_ssize_t i;
2593 Py_ssize_t n;
2594 PyObject **items;
2595 Py_ssize_t totalsize = 0;
2596 PyObject *result;
2597 char *dest;
2598
2599 seq = PySequence_Fast(it, "can only join an iterable");
2600 if (seq == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002601 return NULL;
Guido van Rossum20188312006-05-05 15:15:40 +00002602 n = PySequence_Fast_GET_SIZE(seq);
2603 items = PySequence_Fast_ITEMS(seq);
2604
2605 /* Compute the total size, and check that they are all bytes */
2606 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002607 PyObject *obj = items[i];
2608 if (!PyBytes_Check(obj)) {
2609 PyErr_Format(PyExc_TypeError,
2610 "can only join an iterable of bytes "
2611 "(item %ld has type '%.100s')",
Guido van Rossum3cf5b1e2006-07-27 21:53:35 +00002612 /* XXX %ld isn't right on Win64 */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002613 (long)i, obj->ob_type->tp_name);
2614 goto error;
2615 }
2616 totalsize += PyBytes_GET_SIZE(obj);
2617 if (totalsize < 0) {
2618 PyErr_NoMemory();
2619 goto error;
2620 }
Guido van Rossum20188312006-05-05 15:15:40 +00002621 }
2622
2623 /* Allocate the result, and copy the bytes */
2624 result = PyBytes_FromStringAndSize(NULL, totalsize);
2625 if (result == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002626 goto error;
Guido van Rossum20188312006-05-05 15:15:40 +00002627 dest = PyBytes_AS_STRING(result);
2628 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002629 PyObject *obj = items[i];
2630 Py_ssize_t size = PyBytes_GET_SIZE(obj);
2631 memcpy(dest, PyBytes_AS_STRING(obj), size);
2632 dest += size;
Guido van Rossum20188312006-05-05 15:15:40 +00002633 }
2634
2635 /* Done */
2636 Py_DECREF(seq);
2637 return result;
2638
2639 /* Error handling */
2640 error:
2641 Py_DECREF(seq);
2642 return NULL;
2643}
2644
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002645PyDoc_STRVAR(fromhex_doc,
2646"bytes.fromhex(string) -> bytes\n\
2647\n\
2648Create a bytes object from a string of hexadecimal numbers.\n\
2649Spaces between two numbers are accepted. Example:\n\
2650bytes.fromhex('10 2030') -> bytes([0x10, 0x20, 0x30]).");
2651
2652static int
2653hex_digit_to_int(int c)
2654{
Georg Brandlb3f568f2007-02-27 08:49:18 +00002655 if (isdigit(c))
2656 return c - '0';
2657 else {
2658 if (isupper(c))
2659 c = tolower(c);
2660 if (c >= 'a' && c <= 'f')
2661 return c - 'a' + 10;
2662 }
2663 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002664}
2665
2666static PyObject *
2667bytes_fromhex(PyObject *cls, PyObject *args)
2668{
2669 PyObject *newbytes;
2670 char *hex, *buf;
2671 Py_ssize_t len, byteslen, i, j;
2672 int top, bot;
2673
2674 if (!PyArg_ParseTuple(args, "s#:fromhex", &hex, &len))
2675 return NULL;
2676
2677 byteslen = len / 2; /* max length if there are no spaces */
2678
2679 newbytes = PyBytes_FromStringAndSize(NULL, byteslen);
2680 if (!newbytes)
2681 return NULL;
2682 buf = PyBytes_AS_STRING(newbytes);
2683
Guido van Rossum4355a472007-05-04 05:00:04 +00002684 for (i = j = 0; i < len; i += 2) {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002685 /* skip over spaces in the input */
2686 while (Py_CHARMASK(hex[i]) == ' ')
2687 i++;
2688 if (i >= len)
2689 break;
2690 top = hex_digit_to_int(Py_CHARMASK(hex[i]));
2691 bot = hex_digit_to_int(Py_CHARMASK(hex[i+1]));
2692 if (top == -1 || bot == -1) {
2693 PyErr_Format(PyExc_ValueError,
2694 "non-hexadecimal number string '%c%c' found in "
2695 "fromhex() arg at position %zd",
2696 hex[i], hex[i+1], i);
2697 goto error;
2698 }
2699 buf[j++] = (top << 4) + bot;
2700 }
2701 if (PyBytes_Resize(newbytes, j) < 0)
2702 goto error;
2703 return newbytes;
2704
2705 error:
2706 Py_DECREF(newbytes);
2707 return NULL;
2708}
2709
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002710PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2711
2712static PyObject *
2713bytes_reduce(PyBytesObject *self)
2714{
2715 return Py_BuildValue("(O(s#))",
2716 self->ob_type,
2717 self->ob_bytes == NULL ? "" : self->ob_bytes,
2718 self->ob_size);
2719}
2720
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002721static PySequenceMethods bytes_as_sequence = {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002722 (lenfunc)bytes_length, /* sq_length */
2723 (binaryfunc)bytes_concat, /* sq_concat */
2724 (ssizeargfunc)bytes_repeat, /* sq_repeat */
2725 (ssizeargfunc)bytes_getitem, /* sq_item */
2726 0, /* sq_slice */
2727 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
2728 0, /* sq_ass_slice */
Guido van Rossumd624f182006-04-24 13:47:05 +00002729 (objobjproc)bytes_contains, /* sq_contains */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002730 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
2731 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002732};
2733
2734static PyMappingMethods bytes_as_mapping = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002735 (lenfunc)bytes_length,
Thomas Wouters376446d2006-12-19 08:30:14 +00002736 (binaryfunc)bytes_subscript,
2737 (objobjargproc)bytes_ass_subscript,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002738};
2739
2740static PyBufferProcs bytes_as_buffer = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002741 (readbufferproc)bytes_getbuffer,
2742 (writebufferproc)bytes_getbuffer,
2743 (segcountproc)bytes_getsegcount,
2744 /* XXX Bytes are not characters! But we need to implement
2745 bf_getcharbuffer() so we can be used as 't#' argument to codecs. */
2746 (charbufferproc)bytes_getbuffer,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002747};
2748
2749static PyMethodDef
2750bytes_methods[] = {
Neal Norwitz6968b052007-02-27 19:02:19 +00002751 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2752 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2753 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2754 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2755 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2756 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
2757 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2758 startswith__doc__},
2759 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2760 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, translate__doc__},
2761 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2762 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
2763 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2764 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2765 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
2766 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
2767 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
2768 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
2769 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
2770 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002771 {"strip", (PyCFunction)bytes_strip, METH_O, strip__doc__},
2772 {"lstrip", (PyCFunction)bytes_lstrip, METH_O, lstrip__doc__},
2773 {"rstrip", (PyCFunction)bytes_rstrip, METH_O, rstrip__doc__},
Guido van Rossumd624f182006-04-24 13:47:05 +00002774 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002775 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002776 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2777 fromhex_doc},
Guido van Rossum20188312006-05-05 15:15:40 +00002778 {"join", (PyCFunction)bytes_join, METH_O|METH_CLASS, join_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002779 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002780 {NULL}
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002781};
2782
2783PyDoc_STRVAR(bytes_doc,
2784"bytes([iterable]) -> new array of bytes.\n\
2785\n\
2786If an argument is given it must be an iterable yielding ints in range(256).");
2787
2788PyTypeObject PyBytes_Type = {
2789 PyObject_HEAD_INIT(&PyType_Type)
2790 0,
2791 "bytes",
2792 sizeof(PyBytesObject),
2793 0,
Guido van Rossumd624f182006-04-24 13:47:05 +00002794 (destructor)bytes_dealloc, /* tp_dealloc */
2795 0, /* tp_print */
2796 0, /* tp_getattr */
2797 0, /* tp_setattr */
2798 0, /* tp_compare */
2799 (reprfunc)bytes_repr, /* tp_repr */
2800 0, /* tp_as_number */
2801 &bytes_as_sequence, /* tp_as_sequence */
2802 &bytes_as_mapping, /* tp_as_mapping */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002803 0, /* tp_hash */
Guido van Rossumd624f182006-04-24 13:47:05 +00002804 0, /* tp_call */
2805 (reprfunc)bytes_str, /* tp_str */
2806 PyObject_GenericGetAttr, /* tp_getattro */
2807 0, /* tp_setattro */
2808 &bytes_as_buffer, /* tp_as_buffer */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002809 /* bytes is 'final' or 'sealed' */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002810 Py_TPFLAGS_DEFAULT, /* tp_flags */
Guido van Rossumd624f182006-04-24 13:47:05 +00002811 bytes_doc, /* tp_doc */
2812 0, /* tp_traverse */
2813 0, /* tp_clear */
2814 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2815 0, /* tp_weaklistoffset */
2816 0, /* tp_iter */
2817 0, /* tp_iternext */
2818 bytes_methods, /* tp_methods */
2819 0, /* tp_members */
2820 0, /* tp_getset */
2821 0, /* tp_base */
2822 0, /* tp_dict */
2823 0, /* tp_descr_get */
2824 0, /* tp_descr_set */
2825 0, /* tp_dictoffset */
2826 (initproc)bytes_init, /* tp_init */
2827 PyType_GenericAlloc, /* tp_alloc */
2828 PyType_GenericNew, /* tp_new */
2829 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002830};