blob: 532e63777c5937561a53f5f4a6a60c05c0cf051d [file] [log] [blame]
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001/* Bytes object implementation */
2
3/* XXX TO DO: optimizations */
4
5#define PY_SSIZE_T_CLEAN
6#include "Python.h"
Guido van Rossuma0867f72006-05-05 04:34:18 +00007#include "structmember.h"
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00008
Neal Norwitz6968b052007-02-27 19:02:19 +00009/* The nullbytes are used by the stringlib during partition.
10 * If partition is removed from bytes, nullbytes and its helper
11 * Init/Fini should also be removed.
12 */
13static PyBytesObject *nullbytes = NULL;
14
15void
16PyBytes_Fini(void)
17{
18 Py_CLEAR(nullbytes);
19}
20
21int
22PyBytes_Init(void)
23{
24 nullbytes = PyObject_New(PyBytesObject, &PyBytes_Type);
25 if (nullbytes == NULL)
26 return 0;
27 nullbytes->ob_bytes = NULL;
28 nullbytes->ob_size = nullbytes->ob_alloc = 0;
29 return 1;
30}
31
32/* end nullbytes support */
33
Guido van Rossumad7d8d12007-04-13 01:39:34 +000034/* Helpers */
35
36static int
37_getbytevalue(PyObject* arg, int *value)
Neal Norwitz6968b052007-02-27 19:02:19 +000038{
39 PyObject *intarg = PyNumber_Int(arg);
40 if (! intarg)
41 return 0;
42 *value = PyInt_AsLong(intarg);
43 Py_DECREF(intarg);
44 if (*value < 0 || *value >= 256) {
45 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
46 return 0;
47 }
48 return 1;
49}
50
Guido van Rossumad7d8d12007-04-13 01:39:34 +000051Py_ssize_t
52_getbuffer(PyObject *obj, void **ptr)
53{
54 PyBufferProcs *buffer = obj->ob_type->tp_as_buffer;
55
56 if (buffer == NULL ||
57 PyUnicode_Check(obj) ||
58 buffer->bf_getreadbuffer == NULL ||
59 buffer->bf_getsegcount == NULL ||
60 buffer->bf_getsegcount(obj, NULL) != 1)
61 {
62 *ptr = NULL;
63 return -1;
64 }
65
66 return buffer->bf_getreadbuffer(obj, 0, ptr);
67}
68
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000069/* Direct API functions */
70
71PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +000072PyBytes_FromObject(PyObject *input)
73{
74 return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
75 input, NULL);
76}
77
78PyObject *
79PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000080{
81 PyBytesObject *new;
Guido van Rossumf15a29f2007-05-04 00:41:39 +000082 int alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000083
Guido van Rossumd624f182006-04-24 13:47:05 +000084 assert(size >= 0);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000085
86 new = PyObject_New(PyBytesObject, &PyBytes_Type);
87 if (new == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +000088 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000089
Guido van Rossumf15a29f2007-05-04 00:41:39 +000090 if (size == 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +000091 new->ob_bytes = NULL;
Guido van Rossumf15a29f2007-05-04 00:41:39 +000092 alloc = 0;
93 }
Guido van Rossumd624f182006-04-24 13:47:05 +000094 else {
Guido van Rossumf15a29f2007-05-04 00:41:39 +000095 alloc = size + 1;
96 new->ob_bytes = PyMem_Malloc(alloc);
Guido van Rossumd624f182006-04-24 13:47:05 +000097 if (new->ob_bytes == NULL) {
98 Py_DECREF(new);
99 return NULL;
100 }
101 if (bytes != NULL)
102 memcpy(new->ob_bytes, bytes, size);
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000103 new->ob_bytes[size] = '\0'; /* Trailing null byte */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000104 }
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000105 new->ob_size = size;
106 new->ob_alloc = alloc;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000107
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000108 return (PyObject *)new;
109}
110
111Py_ssize_t
112PyBytes_Size(PyObject *self)
113{
114 assert(self != NULL);
115 assert(PyBytes_Check(self));
116
Guido van Rossum20188312006-05-05 15:15:40 +0000117 return PyBytes_GET_SIZE(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000118}
119
120char *
121PyBytes_AsString(PyObject *self)
122{
123 assert(self != NULL);
124 assert(PyBytes_Check(self));
125
Guido van Rossum20188312006-05-05 15:15:40 +0000126 return PyBytes_AS_STRING(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000127}
128
129int
130PyBytes_Resize(PyObject *self, Py_ssize_t size)
131{
132 void *sval;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000133 Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000134
135 assert(self != NULL);
136 assert(PyBytes_Check(self));
137 assert(size >= 0);
138
Guido van Rossuma0867f72006-05-05 04:34:18 +0000139 if (size < alloc / 2) {
140 /* Major downsize; resize down to exact size */
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000141 alloc = size + 1;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000142 }
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000143 else if (size < alloc) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000144 /* Within allocated size; quick exit */
145 ((PyBytesObject *)self)->ob_size = size;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000146 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
Guido van Rossuma0867f72006-05-05 04:34:18 +0000147 return 0;
148 }
149 else if (size <= alloc * 1.125) {
150 /* Moderate upsize; overallocate similar to list_resize() */
151 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
152 }
153 else {
154 /* Major upsize; resize up to exact size */
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000155 alloc = size + 1;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000156 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000157
158 sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000159 if (sval == NULL) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000160 PyErr_NoMemory();
161 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000162 }
163
Guido van Rossumd624f182006-04-24 13:47:05 +0000164 ((PyBytesObject *)self)->ob_bytes = sval;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000165 ((PyBytesObject *)self)->ob_size = size;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000166 ((PyBytesObject *)self)->ob_alloc = alloc;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000167 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
168
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000169 return 0;
170}
171
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000172PyObject *
173PyBytes_Concat(PyObject *a, PyObject *b)
174{
175 Py_ssize_t asize, bsize, size;
176 void *aptr, *bptr;
177 PyBytesObject *result;
178
179 asize = _getbuffer(a, &aptr);
180 bsize = _getbuffer(b, &bptr);
181 if (asize < 0 || bsize < 0) {
182 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
183 a->ob_type->tp_name, b->ob_type->tp_name);
184 return NULL;
185 }
186
187 size = asize + bsize;
188 if (size < 0)
189 return PyErr_NoMemory();
190
191 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
192 if (result != NULL) {
193 memcpy(result->ob_bytes, aptr, asize);
194 memcpy(result->ob_bytes + asize, bptr, bsize);
195 }
196 return (PyObject *)result;
197}
198
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000199/* Functions stuffed into the type object */
200
201static Py_ssize_t
202bytes_length(PyBytesObject *self)
203{
204 return self->ob_size;
205}
206
207static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000208bytes_concat(PyBytesObject *self, PyObject *other)
209{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000210 return PyBytes_Concat((PyObject *)self, other);
Guido van Rossumd624f182006-04-24 13:47:05 +0000211}
212
213static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000214bytes_iconcat(PyBytesObject *self, PyObject *other)
215{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000216 void *optr;
Guido van Rossum13e57212006-04-27 22:54:26 +0000217 Py_ssize_t osize;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000218 Py_ssize_t mysize;
Guido van Rossum13e57212006-04-27 22:54:26 +0000219 Py_ssize_t size;
220
Guido van Rossum4355a472007-05-04 05:00:04 +0000221 /* XXX What if other == self? */
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000222 osize = _getbuffer(other, &optr);
223 if (osize < 0) {
Guido van Rossum13e57212006-04-27 22:54:26 +0000224 PyErr_Format(PyExc_TypeError,
225 "can't concat bytes to %.100s", other->ob_type->tp_name);
226 return NULL;
227 }
228
229 mysize = self->ob_size;
Guido van Rossum13e57212006-04-27 22:54:26 +0000230 size = mysize + osize;
231 if (size < 0)
232 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000233 if (size < self->ob_alloc) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000234 self->ob_size = size;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000235 self->ob_bytes[self->ob_size] = '\0'; /* Trailing null byte */
236 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000237 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000238 return NULL;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000239 memcpy(self->ob_bytes + mysize, optr, osize);
Guido van Rossum13e57212006-04-27 22:54:26 +0000240 Py_INCREF(self);
241 return (PyObject *)self;
242}
243
244static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000245bytes_repeat(PyBytesObject *self, Py_ssize_t count)
246{
247 PyBytesObject *result;
248 Py_ssize_t mysize;
249 Py_ssize_t size;
250
251 if (count < 0)
252 count = 0;
253 mysize = self->ob_size;
254 size = mysize * count;
255 if (count != 0 && size / count != mysize)
256 return PyErr_NoMemory();
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000257 result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
Guido van Rossumd624f182006-04-24 13:47:05 +0000258 if (result != NULL && size != 0) {
259 if (mysize == 1)
260 memset(result->ob_bytes, self->ob_bytes[0], size);
261 else {
Guido van Rossum13e57212006-04-27 22:54:26 +0000262 Py_ssize_t i;
Guido van Rossumd624f182006-04-24 13:47:05 +0000263 for (i = 0; i < count; i++)
264 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
265 }
266 }
267 return (PyObject *)result;
268}
269
270static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000271bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
272{
273 Py_ssize_t mysize;
274 Py_ssize_t size;
275
276 if (count < 0)
277 count = 0;
278 mysize = self->ob_size;
279 size = mysize * count;
280 if (count != 0 && size / count != mysize)
281 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000282 if (size < self->ob_alloc) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000283 self->ob_size = size;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000284 self->ob_bytes[self->ob_size] = '\0'; /* Trailing null byte */
285 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000286 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000287 return NULL;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000288
Guido van Rossum13e57212006-04-27 22:54:26 +0000289 if (mysize == 1)
290 memset(self->ob_bytes, self->ob_bytes[0], size);
291 else {
292 Py_ssize_t i;
293 for (i = 1; i < count; i++)
294 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
295 }
296
297 Py_INCREF(self);
298 return (PyObject *)self;
299}
300
301static int
302bytes_substring(PyBytesObject *self, PyBytesObject *other)
303{
304 Py_ssize_t i;
305
306 if (other->ob_size == 1) {
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000307 return memchr(self->ob_bytes, other->ob_bytes[0],
Guido van Rossum13e57212006-04-27 22:54:26 +0000308 self->ob_size) != NULL;
309 }
310 if (other->ob_size == 0)
311 return 1; /* Edge case */
312 for (i = 0; i + other->ob_size <= self->ob_size; i++) {
313 /* XXX Yeah, yeah, lots of optimizations possible... */
314 if (memcmp(self->ob_bytes + i, other->ob_bytes, other->ob_size) == 0)
315 return 1;
316 }
317 return 0;
318}
319
320static int
321bytes_contains(PyBytesObject *self, PyObject *value)
322{
323 Py_ssize_t ival;
324
325 if (PyBytes_Check(value))
326 return bytes_substring(self, (PyBytesObject *)value);
327
Thomas Woutersd204a712006-08-22 13:41:17 +0000328 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossum13e57212006-04-27 22:54:26 +0000329 if (ival == -1 && PyErr_Occurred())
330 return -1;
Guido van Rossum13e57212006-04-27 22:54:26 +0000331 if (ival < 0 || ival >= 256) {
332 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
333 return -1;
334 }
335
336 return memchr(self->ob_bytes, ival, self->ob_size) != NULL;
337}
338
339static PyObject *
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000340bytes_getitem(PyBytesObject *self, Py_ssize_t i)
341{
342 if (i < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000343 i += self->ob_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000344 if (i < 0 || i >= self->ob_size) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000345 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
346 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000347 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000348 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
349}
350
351static PyObject *
Thomas Wouters376446d2006-12-19 08:30:14 +0000352bytes_subscript(PyBytesObject *self, PyObject *item)
Guido van Rossumd624f182006-04-24 13:47:05 +0000353{
Thomas Wouters376446d2006-12-19 08:30:14 +0000354 if (PyIndex_Check(item)) {
355 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000356
Thomas Wouters376446d2006-12-19 08:30:14 +0000357 if (i == -1 && PyErr_Occurred())
358 return NULL;
359
360 if (i < 0)
361 i += PyBytes_GET_SIZE(self);
362
363 if (i < 0 || i >= self->ob_size) {
364 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
365 return NULL;
366 }
367 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
368 }
369 else if (PySlice_Check(item)) {
370 Py_ssize_t start, stop, step, slicelength, cur, i;
371 if (PySlice_GetIndicesEx((PySliceObject *)item,
372 PyBytes_GET_SIZE(self),
373 &start, &stop, &step, &slicelength) < 0) {
374 return NULL;
375 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000376
Thomas Wouters376446d2006-12-19 08:30:14 +0000377 if (slicelength <= 0)
378 return PyBytes_FromStringAndSize("", 0);
379 else if (step == 1) {
380 return PyBytes_FromStringAndSize(self->ob_bytes + start,
381 slicelength);
382 }
383 else {
384 char *source_buf = PyBytes_AS_STRING(self);
385 char *result_buf = (char *)PyMem_Malloc(slicelength);
386 PyObject *result;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000387
Thomas Wouters376446d2006-12-19 08:30:14 +0000388 if (result_buf == NULL)
389 return PyErr_NoMemory();
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000390
Thomas Wouters376446d2006-12-19 08:30:14 +0000391 for (cur = start, i = 0; i < slicelength;
392 cur += step, i++) {
393 result_buf[i] = source_buf[cur];
394 }
395 result = PyBytes_FromStringAndSize(result_buf, slicelength);
396 PyMem_Free(result_buf);
397 return result;
398 }
399 }
400 else {
401 PyErr_SetString(PyExc_TypeError, "bytes indices must be integers");
402 return NULL;
403 }
404}
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000405
Guido van Rossumd624f182006-04-24 13:47:05 +0000406static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000407bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
Guido van Rossumd624f182006-04-24 13:47:05 +0000408 PyObject *values)
409{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000410 Py_ssize_t avail, needed;
411 void *bytes;
Guido van Rossumd624f182006-04-24 13:47:05 +0000412
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000413 if (values == (PyObject *)self) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000414 /* Make a copy an call this function recursively */
415 int err;
416 values = PyBytes_FromObject(values);
417 if (values == NULL)
418 return -1;
419 err = bytes_setslice(self, lo, hi, values);
420 Py_DECREF(values);
421 return err;
422 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000423 if (values == NULL) {
424 /* del b[lo:hi] */
425 bytes = NULL;
426 needed = 0;
427 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000428 else {
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000429 needed = _getbuffer(values, &bytes);
430 if (needed < 0) {
431 PyErr_Format(PyExc_TypeError,
432 "can't set bytes slice from %.100s",
433 values->ob_type->tp_name);
434 return -1;
435 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000436 }
437
438 if (lo < 0)
439 lo = 0;
Thomas Wouters9a6e62b2006-08-23 23:20:29 +0000440 if (hi < lo)
441 hi = lo;
Guido van Rossumd624f182006-04-24 13:47:05 +0000442 if (hi > self->ob_size)
443 hi = self->ob_size;
444
445 avail = hi - lo;
446 if (avail < 0)
447 lo = hi = avail = 0;
448
449 if (avail != needed) {
450 if (avail > needed) {
451 /*
452 0 lo hi old_size
453 | |<----avail----->|<-----tomove------>|
454 | |<-needed->|<-----tomove------>|
455 0 lo new_hi new_size
456 */
457 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
458 self->ob_size - hi);
459 }
Thomas Wouters376446d2006-12-19 08:30:14 +0000460 if (PyBytes_Resize((PyObject *)self,
Guido van Rossumd624f182006-04-24 13:47:05 +0000461 self->ob_size + needed - avail) < 0)
462 return -1;
463 if (avail < needed) {
464 /*
465 0 lo hi old_size
466 | |<-avail->|<-----tomove------>|
467 | |<----needed---->|<-----tomove------>|
468 0 lo new_hi new_size
469 */
470 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
471 self->ob_size - lo - needed);
472 }
473 }
474
475 if (needed > 0)
476 memcpy(self->ob_bytes + lo, bytes, needed);
477
478 return 0;
479}
480
481static int
482bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
483{
484 Py_ssize_t ival;
485
486 if (i < 0)
487 i += self->ob_size;
488
489 if (i < 0 || i >= self->ob_size) {
490 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
491 return -1;
492 }
493
494 if (value == NULL)
495 return bytes_setslice(self, i, i+1, NULL);
496
Thomas Woutersd204a712006-08-22 13:41:17 +0000497 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000498 if (ival == -1 && PyErr_Occurred())
499 return -1;
500
501 if (ival < 0 || ival >= 256) {
502 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
503 return -1;
504 }
505
506 self->ob_bytes[i] = ival;
507 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000508}
509
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000510static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000511bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
512{
513 Py_ssize_t start, stop, step, slicelen, needed;
514 char *bytes;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000515
Thomas Wouters376446d2006-12-19 08:30:14 +0000516 if (PyIndex_Check(item)) {
517 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
518
519 if (i == -1 && PyErr_Occurred())
520 return -1;
521
522 if (i < 0)
523 i += PyBytes_GET_SIZE(self);
524
525 if (i < 0 || i >= self->ob_size) {
526 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
527 return -1;
528 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000529
Thomas Wouters376446d2006-12-19 08:30:14 +0000530 if (values == NULL) {
531 /* Fall through to slice assignment */
532 start = i;
533 stop = i + 1;
534 step = 1;
535 slicelen = 1;
536 }
537 else {
538 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
539 if (ival == -1 && PyErr_Occurred())
540 return -1;
541 if (ival < 0 || ival >= 256) {
542 PyErr_SetString(PyExc_ValueError,
543 "byte must be in range(0, 256)");
544 return -1;
545 }
546 self->ob_bytes[i] = (char)ival;
547 return 0;
548 }
549 }
550 else if (PySlice_Check(item)) {
551 if (PySlice_GetIndicesEx((PySliceObject *)item,
552 PyBytes_GET_SIZE(self),
553 &start, &stop, &step, &slicelen) < 0) {
554 return -1;
555 }
556 }
557 else {
558 PyErr_SetString(PyExc_TypeError, "bytes indices must be integer");
559 return -1;
560 }
561
562 if (values == NULL) {
563 bytes = NULL;
564 needed = 0;
565 }
566 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
567 /* Make a copy an call this function recursively */
568 int err;
569 values = PyBytes_FromObject(values);
570 if (values == NULL)
571 return -1;
572 err = bytes_ass_subscript(self, item, values);
573 Py_DECREF(values);
574 return err;
575 }
576 else {
577 assert(PyBytes_Check(values));
578 bytes = ((PyBytesObject *)values)->ob_bytes;
579 needed = ((PyBytesObject *)values)->ob_size;
580 }
581 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
582 if ((step < 0 && start < stop) ||
583 (step > 0 && start > stop))
584 stop = start;
585 if (step == 1) {
586 if (slicelen != needed) {
587 if (slicelen > needed) {
588 /*
589 0 start stop old_size
590 | |<---slicelen--->|<-----tomove------>|
591 | |<-needed->|<-----tomove------>|
592 0 lo new_hi new_size
593 */
594 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
595 self->ob_size - stop);
596 }
597 if (PyBytes_Resize((PyObject *)self,
598 self->ob_size + needed - slicelen) < 0)
599 return -1;
600 if (slicelen < needed) {
601 /*
602 0 lo hi old_size
603 | |<-avail->|<-----tomove------>|
604 | |<----needed---->|<-----tomove------>|
605 0 lo new_hi new_size
606 */
607 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
608 self->ob_size - start - needed);
609 }
610 }
611
612 if (needed > 0)
613 memcpy(self->ob_bytes + start, bytes, needed);
614
615 return 0;
616 }
617 else {
618 if (needed == 0) {
619 /* Delete slice */
620 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000621
Thomas Wouters376446d2006-12-19 08:30:14 +0000622 if (step < 0) {
623 stop = start + 1;
624 start = stop + step * (slicelen - 1) - 1;
625 step = -step;
626 }
627 for (cur = start, i = 0;
628 i < slicelen; cur += step, i++) {
629 Py_ssize_t lim = step - 1;
630
631 if (cur + step >= PyBytes_GET_SIZE(self))
632 lim = PyBytes_GET_SIZE(self) - cur - 1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000633
Thomas Wouters376446d2006-12-19 08:30:14 +0000634 memmove(self->ob_bytes + cur - i,
635 self->ob_bytes + cur + 1, lim);
636 }
637 /* Move the tail of the bytes, in one chunk */
638 cur = start + slicelen*step;
639 if (cur < PyBytes_GET_SIZE(self)) {
640 memmove(self->ob_bytes + cur - slicelen,
641 self->ob_bytes + cur,
642 PyBytes_GET_SIZE(self) - cur);
643 }
644 if (PyBytes_Resize((PyObject *)self,
645 PyBytes_GET_SIZE(self) - slicelen) < 0)
646 return -1;
647
648 return 0;
649 }
650 else {
651 /* Assign slice */
652 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000653
Thomas Wouters376446d2006-12-19 08:30:14 +0000654 if (needed != slicelen) {
655 PyErr_Format(PyExc_ValueError,
656 "attempt to assign bytes of size %zd "
657 "to extended slice of size %zd",
658 needed, slicelen);
659 return -1;
660 }
661 for (cur = start, i = 0; i < slicelen; cur += step, i++)
662 self->ob_bytes[cur] = bytes[i];
663 return 0;
664 }
665 }
666}
667
668static int
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000669bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
670{
Guido van Rossumd624f182006-04-24 13:47:05 +0000671 static char *kwlist[] = {"source", "encoding", "errors", 0};
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000672 PyObject *arg = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +0000673 const char *encoding = NULL;
674 const char *errors = NULL;
675 Py_ssize_t count;
676 PyObject *it;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000677 PyObject *(*iternext)(PyObject *);
678
Guido van Rossuma0867f72006-05-05 04:34:18 +0000679 if (self->ob_size != 0) {
680 /* Empty previous contents (yes, do this first of all!) */
681 if (PyBytes_Resize((PyObject *)self, 0) < 0)
682 return -1;
683 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000684
Guido van Rossumd624f182006-04-24 13:47:05 +0000685 /* Parse arguments */
686 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
687 &arg, &encoding, &errors))
688 return -1;
689
690 /* Make a quick exit if no first argument */
691 if (arg == NULL) {
692 if (encoding != NULL || errors != NULL) {
693 PyErr_SetString(PyExc_TypeError,
694 "encoding or errors without sequence argument");
695 return -1;
696 }
697 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000698 }
699
Guido van Rossumd624f182006-04-24 13:47:05 +0000700 if (PyUnicode_Check(arg)) {
701 /* Encode via the codec registry */
Guido van Rossum4355a472007-05-04 05:00:04 +0000702 PyObject *encoded, *new;
Guido van Rossumd624f182006-04-24 13:47:05 +0000703 if (encoding == NULL)
704 encoding = PyUnicode_GetDefaultEncoding();
705 encoded = PyCodec_Encode(arg, encoding, errors);
706 if (encoded == NULL)
707 return -1;
Guido van Rossum4355a472007-05-04 05:00:04 +0000708 if (!PyBytes_Check(encoded) && !PyString_Check(encoded)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000709 PyErr_Format(PyExc_TypeError,
Guido van Rossum4355a472007-05-04 05:00:04 +0000710 "encoder did not return a str8 or bytes object (type=%.400s)",
Guido van Rossumd624f182006-04-24 13:47:05 +0000711 encoded->ob_type->tp_name);
712 Py_DECREF(encoded);
713 return -1;
714 }
Guido van Rossum4355a472007-05-04 05:00:04 +0000715 new = bytes_iconcat(self, encoded);
716 Py_DECREF(encoded);
717 if (new == NULL)
718 return -1;
719 Py_DECREF(new);
720 return 0;
Guido van Rossumd624f182006-04-24 13:47:05 +0000721 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000722
Guido van Rossumd624f182006-04-24 13:47:05 +0000723 /* If it's not unicode, there can't be encoding or errors */
724 if (encoding != NULL || errors != NULL) {
725 PyErr_SetString(PyExc_TypeError,
726 "encoding or errors without a string argument");
727 return -1;
728 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000729
Guido van Rossumd624f182006-04-24 13:47:05 +0000730 /* Is it an int? */
Thomas Woutersd204a712006-08-22 13:41:17 +0000731 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000732 if (count == -1 && PyErr_Occurred())
733 PyErr_Clear();
734 else {
735 if (count < 0) {
736 PyErr_SetString(PyExc_ValueError, "negative count");
737 return -1;
738 }
739 if (count > 0) {
740 if (PyBytes_Resize((PyObject *)self, count))
741 return -1;
742 memset(self->ob_bytes, 0, count);
743 }
744 return 0;
745 }
746
747 if (PyObject_CheckReadBuffer(arg)) {
748 const void *bytes;
749 Py_ssize_t size;
750 if (PyObject_AsReadBuffer(arg, &bytes, &size) < 0)
751 return -1;
752 if (PyBytes_Resize((PyObject *)self, size) < 0)
753 return -1;
754 memcpy(self->ob_bytes, bytes, size);
755 return 0;
756 }
757
758 /* XXX Optimize this if the arguments is a list, tuple */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000759
760 /* Get the iterator */
761 it = PyObject_GetIter(arg);
762 if (it == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000763 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000764 iternext = *it->ob_type->tp_iternext;
765
766 /* Run the iterator to exhaustion */
767 for (;;) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000768 PyObject *item;
769 Py_ssize_t value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000770
Guido van Rossumd624f182006-04-24 13:47:05 +0000771 /* Get the next item */
772 item = iternext(it);
773 if (item == NULL) {
774 if (PyErr_Occurred()) {
775 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
776 goto error;
777 PyErr_Clear();
778 }
779 break;
780 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000781
Guido van Rossumd624f182006-04-24 13:47:05 +0000782 /* Interpret it as an int (__index__) */
Thomas Woutersd204a712006-08-22 13:41:17 +0000783 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000784 Py_DECREF(item);
785 if (value == -1 && PyErr_Occurred())
786 goto error;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000787
Guido van Rossumd624f182006-04-24 13:47:05 +0000788 /* Range check */
789 if (value < 0 || value >= 256) {
790 PyErr_SetString(PyExc_ValueError,
791 "bytes must be in range(0, 256)");
792 goto error;
793 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000794
Guido van Rossumd624f182006-04-24 13:47:05 +0000795 /* Append the byte */
Guido van Rossuma0867f72006-05-05 04:34:18 +0000796 if (self->ob_size < self->ob_alloc)
797 self->ob_size++;
798 else if (PyBytes_Resize((PyObject *)self, self->ob_size+1) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000799 goto error;
800 self->ob_bytes[self->ob_size-1] = value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000801 }
802
803 /* Clean up and return success */
804 Py_DECREF(it);
805 return 0;
806
807 error:
808 /* Error handling when it != NULL */
809 Py_DECREF(it);
810 return -1;
811}
812
Georg Brandlee91be42007-02-24 19:41:35 +0000813/* Mostly copied from string_repr, but without the
814 "smart quote" functionality. */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000815static PyObject *
816bytes_repr(PyBytesObject *self)
817{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000818 static const char *hexdigits = "0123456789abcdef";
Georg Brandlee91be42007-02-24 19:41:35 +0000819 size_t newsize = 3 + 4 * self->ob_size;
820 PyObject *v;
821 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != self->ob_size) {
822 PyErr_SetString(PyExc_OverflowError,
823 "bytes object is too large to make repr");
Guido van Rossumd624f182006-04-24 13:47:05 +0000824 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000825 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000826 v = PyUnicode_FromUnicode(NULL, newsize);
Georg Brandlee91be42007-02-24 19:41:35 +0000827 if (v == NULL) {
828 return NULL;
829 }
830 else {
831 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000832 register Py_UNICODE c;
833 register Py_UNICODE *p;
Georg Brandlee91be42007-02-24 19:41:35 +0000834 int quote = '\'';
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000835
Walter Dörwald1ab83302007-05-18 17:15:44 +0000836 p = PyUnicode_AS_UNICODE(v);
Georg Brandlee91be42007-02-24 19:41:35 +0000837 *p++ = 'b';
838 *p++ = quote;
839 for (i = 0; i < self->ob_size; i++) {
840 /* There's at least enough room for a hex escape
841 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000842 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Georg Brandlee91be42007-02-24 19:41:35 +0000843 c = self->ob_bytes[i];
844 if (c == quote || c == '\\')
845 *p++ = '\\', *p++ = c;
846 else if (c == '\t')
847 *p++ = '\\', *p++ = 't';
848 else if (c == '\n')
849 *p++ = '\\', *p++ = 'n';
850 else if (c == '\r')
851 *p++ = '\\', *p++ = 'r';
852 else if (c == 0)
Guido van Rossum57b93ad2007-05-08 19:09:34 +0000853 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
Georg Brandlee91be42007-02-24 19:41:35 +0000854 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000855 *p++ = '\\';
856 *p++ = 'x';
857 *p++ = hexdigits[(c & 0xf0) >> 4];
858 *p++ = hexdigits[c & 0xf];
Georg Brandlee91be42007-02-24 19:41:35 +0000859 }
860 else
861 *p++ = c;
862 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000863 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Georg Brandlee91be42007-02-24 19:41:35 +0000864 *p++ = quote;
865 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000866 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
867 Py_DECREF(v);
868 return NULL;
869 }
Georg Brandlee91be42007-02-24 19:41:35 +0000870 return v;
871 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000872}
873
874static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000875bytes_str(PyBytesObject *self)
876{
877 return PyString_FromStringAndSize(self->ob_bytes, self->ob_size);
878}
879
880static PyObject *
Guido van Rossum343e97f2007-04-09 00:43:24 +0000881bytes_richcompare(PyObject *self, PyObject *other, int op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000882{
Guido van Rossum343e97f2007-04-09 00:43:24 +0000883 Py_ssize_t self_size, other_size;
884 void *self_bytes, *other_bytes;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000885 PyObject *res;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000886 Py_ssize_t minsize;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000887 int cmp;
888
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000889 /* Bytes can be compared to anything that supports the (binary) buffer
890 API. Except Unicode. */
Guido van Rossumebea9be2007-04-09 00:49:13 +0000891
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000892 self_size = _getbuffer(self, &self_bytes);
893 if (self_size < 0) {
Guido van Rossumebea9be2007-04-09 00:49:13 +0000894 Py_INCREF(Py_NotImplemented);
895 return Py_NotImplemented;
896 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000897
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000898 other_size = _getbuffer(other, &other_bytes);
899 if (other_size < 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000900 Py_INCREF(Py_NotImplemented);
901 return Py_NotImplemented;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000902 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000903
904 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000905 /* Shortcut: if the lengths differ, the objects differ */
906 cmp = (op == Py_NE);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000907 }
908 else {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000909 minsize = self_size;
910 if (other_size < minsize)
911 minsize = other_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000912
Guido van Rossum343e97f2007-04-09 00:43:24 +0000913 cmp = memcmp(self_bytes, other_bytes, minsize);
Guido van Rossumd624f182006-04-24 13:47:05 +0000914 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000915
Guido van Rossumd624f182006-04-24 13:47:05 +0000916 if (cmp == 0) {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000917 if (self_size < other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +0000918 cmp = -1;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000919 else if (self_size > other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +0000920 cmp = 1;
921 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000922
Guido van Rossumd624f182006-04-24 13:47:05 +0000923 switch (op) {
924 case Py_LT: cmp = cmp < 0; break;
925 case Py_LE: cmp = cmp <= 0; break;
926 case Py_EQ: cmp = cmp == 0; break;
927 case Py_NE: cmp = cmp != 0; break;
928 case Py_GT: cmp = cmp > 0; break;
929 case Py_GE: cmp = cmp >= 0; break;
930 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000931 }
932
933 res = cmp ? Py_True : Py_False;
934 Py_INCREF(res);
935 return res;
936}
937
938static void
939bytes_dealloc(PyBytesObject *self)
940{
Guido van Rossumd624f182006-04-24 13:47:05 +0000941 if (self->ob_bytes != 0) {
942 PyMem_Free(self->ob_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000943 }
944 self->ob_type->tp_free((PyObject *)self);
945}
946
Guido van Rossumd624f182006-04-24 13:47:05 +0000947static Py_ssize_t
948bytes_getbuffer(PyBytesObject *self, Py_ssize_t index, const void **ptr)
949{
950 if (index != 0) {
951 PyErr_SetString(PyExc_SystemError,
Neal Norwitz6968b052007-02-27 19:02:19 +0000952 "accessing non-existent bytes segment");
Guido van Rossumd624f182006-04-24 13:47:05 +0000953 return -1;
954 }
Guido van Rossum63eac152007-05-09 23:36:14 +0000955 if (self->ob_bytes == NULL)
956 *ptr = "";
957 else
958 *ptr = self->ob_bytes;
Guido van Rossumd624f182006-04-24 13:47:05 +0000959 return self->ob_size;
960}
961
962static Py_ssize_t
963bytes_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
964{
965 if (lenp)
966 *lenp = self->ob_size;
967 return 1;
968}
969
Neal Norwitz6968b052007-02-27 19:02:19 +0000970
971
972/* -------------------------------------------------------------------- */
973/* Methods */
974
975#define STRINGLIB_CHAR char
976#define STRINGLIB_CMP memcmp
977#define STRINGLIB_LEN PyBytes_GET_SIZE
978#define STRINGLIB_NEW PyBytes_FromStringAndSize
979#define STRINGLIB_EMPTY nullbytes
980
981#include "stringlib/fastsearch.h"
982#include "stringlib/count.h"
983#include "stringlib/find.h"
984#include "stringlib/partition.h"
985
986
987/* The following Py_LOCAL_INLINE and Py_LOCAL functions
988were copied from the old char* style string object. */
989
990Py_LOCAL_INLINE(void)
991_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
992{
993 if (*end > len)
994 *end = len;
995 else if (*end < 0)
996 *end += len;
997 if (*end < 0)
998 *end = 0;
999 if (*start < 0)
1000 *start += len;
1001 if (*start < 0)
1002 *start = 0;
1003}
1004
1005
1006Py_LOCAL_INLINE(Py_ssize_t)
1007bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
1008{
1009 PyObject *subobj;
1010 const char *sub;
1011 Py_ssize_t sub_len;
1012 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1013
1014 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1015 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1016 return -2;
1017 if (PyBytes_Check(subobj)) {
1018 sub = PyBytes_AS_STRING(subobj);
1019 sub_len = PyBytes_GET_SIZE(subobj);
1020 }
1021 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1022 /* XXX - the "expected a character buffer object" is pretty
1023 confusing for a non-expert. remap to something else ? */
1024 return -2;
1025
1026 if (dir > 0)
1027 return stringlib_find_slice(
1028 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1029 sub, sub_len, start, end);
1030 else
1031 return stringlib_rfind_slice(
1032 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1033 sub, sub_len, start, end);
1034}
1035
1036
1037PyDoc_STRVAR(find__doc__,
1038"B.find(sub [,start [,end]]) -> int\n\
1039\n\
1040Return the lowest index in B where subsection sub is found,\n\
1041such that sub is contained within s[start,end]. Optional\n\
1042arguments start and end are interpreted as in slice notation.\n\
1043\n\
1044Return -1 on failure.");
1045
1046static PyObject *
1047bytes_find(PyBytesObject *self, PyObject *args)
1048{
1049 Py_ssize_t result = bytes_find_internal(self, args, +1);
1050 if (result == -2)
1051 return NULL;
1052 return PyInt_FromSsize_t(result);
1053}
1054
1055PyDoc_STRVAR(count__doc__,
1056"B.count(sub[, start[, end]]) -> int\n\
1057\n\
1058Return the number of non-overlapping occurrences of subsection sub in\n\
1059bytes B[start:end]. Optional arguments start and end are interpreted\n\
1060as in slice notation.");
1061
1062static PyObject *
1063bytes_count(PyBytesObject *self, PyObject *args)
1064{
1065 PyObject *sub_obj;
1066 const char *str = PyBytes_AS_STRING(self), *sub;
1067 Py_ssize_t sub_len;
1068 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1069
1070 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1071 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1072 return NULL;
1073
1074 if (PyBytes_Check(sub_obj)) {
1075 sub = PyBytes_AS_STRING(sub_obj);
1076 sub_len = PyBytes_GET_SIZE(sub_obj);
1077 }
1078 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1079 return NULL;
1080
Martin v. Löwis5b222132007-06-10 09:51:05 +00001081 _adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
Neal Norwitz6968b052007-02-27 19:02:19 +00001082
1083 return PyInt_FromSsize_t(
1084 stringlib_count(str + start, end - start, sub, sub_len)
1085 );
1086}
1087
1088
1089PyDoc_STRVAR(index__doc__,
1090"B.index(sub [,start [,end]]) -> int\n\
1091\n\
1092Like B.find() but raise ValueError when the subsection is not found.");
1093
1094static PyObject *
1095bytes_index(PyBytesObject *self, PyObject *args)
1096{
1097 Py_ssize_t result = bytes_find_internal(self, args, +1);
1098 if (result == -2)
1099 return NULL;
1100 if (result == -1) {
1101 PyErr_SetString(PyExc_ValueError,
1102 "subsection not found");
1103 return NULL;
1104 }
1105 return PyInt_FromSsize_t(result);
1106}
1107
1108
1109PyDoc_STRVAR(rfind__doc__,
1110"B.rfind(sub [,start [,end]]) -> int\n\
1111\n\
1112Return the highest index in B where subsection sub is found,\n\
1113such that sub is contained within s[start,end]. Optional\n\
1114arguments start and end are interpreted as in slice notation.\n\
1115\n\
1116Return -1 on failure.");
1117
1118static PyObject *
1119bytes_rfind(PyBytesObject *self, PyObject *args)
1120{
1121 Py_ssize_t result = bytes_find_internal(self, args, -1);
1122 if (result == -2)
1123 return NULL;
1124 return PyInt_FromSsize_t(result);
1125}
1126
1127
1128PyDoc_STRVAR(rindex__doc__,
1129"B.rindex(sub [,start [,end]]) -> int\n\
1130\n\
1131Like B.rfind() but raise ValueError when the subsection is not found.");
1132
1133static PyObject *
1134bytes_rindex(PyBytesObject *self, PyObject *args)
1135{
1136 Py_ssize_t result = bytes_find_internal(self, args, -1);
1137 if (result == -2)
1138 return NULL;
1139 if (result == -1) {
1140 PyErr_SetString(PyExc_ValueError,
1141 "subsection not found");
1142 return NULL;
1143 }
1144 return PyInt_FromSsize_t(result);
1145}
1146
1147
1148/* Matches the end (direction >= 0) or start (direction < 0) of self
1149 * against substr, using the start and end arguments. Returns
1150 * -1 on error, 0 if not found and 1 if found.
1151 */
1152Py_LOCAL(int)
1153_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
1154 Py_ssize_t end, int direction)
1155{
1156 Py_ssize_t len = PyBytes_GET_SIZE(self);
1157 Py_ssize_t slen;
1158 const char* sub;
1159 const char* str;
1160
1161 if (PyBytes_Check(substr)) {
1162 sub = PyBytes_AS_STRING(substr);
1163 slen = PyBytes_GET_SIZE(substr);
1164 }
1165 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
1166 return -1;
1167 str = PyBytes_AS_STRING(self);
1168
1169 _adjust_indices(&start, &end, len);
1170
1171 if (direction < 0) {
1172 /* startswith */
1173 if (start+slen > len)
1174 return 0;
1175 } else {
1176 /* endswith */
1177 if (end-start < slen || start > len)
1178 return 0;
1179
1180 if (end-slen > start)
1181 start = end - slen;
1182 }
1183 if (end-start >= slen)
1184 return ! memcmp(str+start, sub, slen);
1185 return 0;
1186}
1187
1188
1189PyDoc_STRVAR(startswith__doc__,
1190"B.startswith(prefix[, start[, end]]) -> bool\n\
1191\n\
1192Return True if B starts with the specified prefix, False otherwise.\n\
1193With optional start, test B beginning at that position.\n\
1194With optional end, stop comparing B at that position.\n\
1195prefix can also be a tuple of strings to try.");
1196
1197static PyObject *
1198bytes_startswith(PyBytesObject *self, PyObject *args)
1199{
1200 Py_ssize_t start = 0;
1201 Py_ssize_t end = PY_SSIZE_T_MAX;
1202 PyObject *subobj;
1203 int result;
1204
1205 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1206 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1207 return NULL;
1208 if (PyTuple_Check(subobj)) {
1209 Py_ssize_t i;
1210 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1211 result = _bytes_tailmatch(self,
1212 PyTuple_GET_ITEM(subobj, i),
1213 start, end, -1);
1214 if (result == -1)
1215 return NULL;
1216 else if (result) {
1217 Py_RETURN_TRUE;
1218 }
1219 }
1220 Py_RETURN_FALSE;
1221 }
1222 result = _bytes_tailmatch(self, subobj, start, end, -1);
1223 if (result == -1)
1224 return NULL;
1225 else
1226 return PyBool_FromLong(result);
1227}
1228
1229PyDoc_STRVAR(endswith__doc__,
1230"B.endswith(suffix[, start[, end]]) -> bool\n\
1231\n\
1232Return True if B ends with the specified suffix, False otherwise.\n\
1233With optional start, test B beginning at that position.\n\
1234With optional end, stop comparing B at that position.\n\
1235suffix can also be a tuple of strings to try.");
1236
1237static PyObject *
1238bytes_endswith(PyBytesObject *self, PyObject *args)
1239{
1240 Py_ssize_t start = 0;
1241 Py_ssize_t end = PY_SSIZE_T_MAX;
1242 PyObject *subobj;
1243 int result;
1244
1245 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1246 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1247 return NULL;
1248 if (PyTuple_Check(subobj)) {
1249 Py_ssize_t i;
1250 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1251 result = _bytes_tailmatch(self,
1252 PyTuple_GET_ITEM(subobj, i),
1253 start, end, +1);
1254 if (result == -1)
1255 return NULL;
1256 else if (result) {
1257 Py_RETURN_TRUE;
1258 }
1259 }
1260 Py_RETURN_FALSE;
1261 }
1262 result = _bytes_tailmatch(self, subobj, start, end, +1);
1263 if (result == -1)
1264 return NULL;
1265 else
1266 return PyBool_FromLong(result);
1267}
1268
1269
1270
1271PyDoc_STRVAR(translate__doc__,
1272"B.translate(table [,deletechars]) -> bytes\n\
1273\n\
1274Return a copy of the bytes B, where all characters occurring\n\
1275in the optional argument deletechars are removed, and the\n\
1276remaining characters have been mapped through the given\n\
1277translation table, which must be a bytes of length 256.");
1278
1279static PyObject *
1280bytes_translate(PyBytesObject *self, PyObject *args)
1281{
1282 register char *input, *output;
1283 register const char *table;
1284 register Py_ssize_t i, c, changed = 0;
1285 PyObject *input_obj = (PyObject*)self;
1286 const char *table1, *output_start, *del_table=NULL;
1287 Py_ssize_t inlen, tablen, dellen = 0;
1288 PyObject *result;
1289 int trans_table[256];
1290 PyObject *tableobj, *delobj = NULL;
1291
1292 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1293 &tableobj, &delobj))
1294 return NULL;
1295
1296 if (PyBytes_Check(tableobj)) {
1297 table1 = PyBytes_AS_STRING(tableobj);
1298 tablen = PyBytes_GET_SIZE(tableobj);
1299 }
1300 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1301 return NULL;
1302
1303 if (tablen != 256) {
1304 PyErr_SetString(PyExc_ValueError,
1305 "translation table must be 256 characters long");
1306 return NULL;
1307 }
1308
1309 if (delobj != NULL) {
1310 if (PyBytes_Check(delobj)) {
1311 del_table = PyBytes_AS_STRING(delobj);
1312 dellen = PyBytes_GET_SIZE(delobj);
1313 }
1314 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1315 return NULL;
1316 }
1317 else {
1318 del_table = NULL;
1319 dellen = 0;
1320 }
1321
1322 table = table1;
1323 inlen = PyBytes_GET_SIZE(input_obj);
1324 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1325 if (result == NULL)
1326 return NULL;
1327 output_start = output = PyBytes_AsString(result);
1328 input = PyBytes_AS_STRING(input_obj);
1329
1330 if (dellen == 0) {
1331 /* If no deletions are required, use faster code */
1332 for (i = inlen; --i >= 0; ) {
1333 c = Py_CHARMASK(*input++);
1334 if (Py_CHARMASK((*output++ = table[c])) != c)
1335 changed = 1;
1336 }
1337 if (changed || !PyBytes_CheckExact(input_obj))
1338 return result;
1339 Py_DECREF(result);
1340 Py_INCREF(input_obj);
1341 return input_obj;
1342 }
1343
1344 for (i = 0; i < 256; i++)
1345 trans_table[i] = Py_CHARMASK(table[i]);
1346
1347 for (i = 0; i < dellen; i++)
1348 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1349
1350 for (i = inlen; --i >= 0; ) {
1351 c = Py_CHARMASK(*input++);
1352 if (trans_table[c] != -1)
1353 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1354 continue;
1355 changed = 1;
1356 }
1357 if (!changed && PyBytes_CheckExact(input_obj)) {
1358 Py_DECREF(result);
1359 Py_INCREF(input_obj);
1360 return input_obj;
1361 }
1362 /* Fix the size of the resulting string */
1363 if (inlen > 0)
1364 PyBytes_Resize(result, output - output_start);
1365 return result;
1366}
1367
1368
1369#define FORWARD 1
1370#define REVERSE -1
1371
1372/* find and count characters and substrings */
1373
1374#define findchar(target, target_len, c) \
1375 ((char *)memchr((const void *)(target), c, target_len))
1376
1377/* Don't call if length < 2 */
1378#define Py_STRING_MATCH(target, offset, pattern, length) \
1379 (target[offset] == pattern[0] && \
1380 target[offset+length-1] == pattern[length-1] && \
1381 !memcmp(target+offset+1, pattern+1, length-2) )
1382
1383
1384/* Bytes ops must return a string. */
1385/* If the object is subclass of bytes, create a copy */
1386Py_LOCAL(PyBytesObject *)
1387return_self(PyBytesObject *self)
1388{
1389 if (PyBytes_CheckExact(self)) {
1390 Py_INCREF(self);
1391 return (PyBytesObject *)self;
1392 }
1393 return (PyBytesObject *)PyBytes_FromStringAndSize(
1394 PyBytes_AS_STRING(self),
1395 PyBytes_GET_SIZE(self));
1396}
1397
1398Py_LOCAL_INLINE(Py_ssize_t)
1399countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1400{
1401 Py_ssize_t count=0;
1402 const char *start=target;
1403 const char *end=target+target_len;
1404
1405 while ( (start=findchar(start, end-start, c)) != NULL ) {
1406 count++;
1407 if (count >= maxcount)
1408 break;
1409 start += 1;
1410 }
1411 return count;
1412}
1413
1414Py_LOCAL(Py_ssize_t)
1415findstring(const char *target, Py_ssize_t target_len,
1416 const char *pattern, Py_ssize_t pattern_len,
1417 Py_ssize_t start,
1418 Py_ssize_t end,
1419 int direction)
1420{
1421 if (start < 0) {
1422 start += target_len;
1423 if (start < 0)
1424 start = 0;
1425 }
1426 if (end > target_len) {
1427 end = target_len;
1428 } else if (end < 0) {
1429 end += target_len;
1430 if (end < 0)
1431 end = 0;
1432 }
1433
1434 /* zero-length substrings always match at the first attempt */
1435 if (pattern_len == 0)
1436 return (direction > 0) ? start : end;
1437
1438 end -= pattern_len;
1439
1440 if (direction < 0) {
1441 for (; end >= start; end--)
1442 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1443 return end;
1444 } else {
1445 for (; start <= end; start++)
1446 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1447 return start;
1448 }
1449 return -1;
1450}
1451
1452Py_LOCAL_INLINE(Py_ssize_t)
1453countstring(const char *target, Py_ssize_t target_len,
1454 const char *pattern, Py_ssize_t pattern_len,
1455 Py_ssize_t start,
1456 Py_ssize_t end,
1457 int direction, Py_ssize_t maxcount)
1458{
1459 Py_ssize_t count=0;
1460
1461 if (start < 0) {
1462 start += target_len;
1463 if (start < 0)
1464 start = 0;
1465 }
1466 if (end > target_len) {
1467 end = target_len;
1468 } else if (end < 0) {
1469 end += target_len;
1470 if (end < 0)
1471 end = 0;
1472 }
1473
1474 /* zero-length substrings match everywhere */
1475 if (pattern_len == 0 || maxcount == 0) {
1476 if (target_len+1 < maxcount)
1477 return target_len+1;
1478 return maxcount;
1479 }
1480
1481 end -= pattern_len;
1482 if (direction < 0) {
1483 for (; (end >= start); end--)
1484 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1485 count++;
1486 if (--maxcount <= 0) break;
1487 end -= pattern_len-1;
1488 }
1489 } else {
1490 for (; (start <= end); start++)
1491 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1492 count++;
1493 if (--maxcount <= 0)
1494 break;
1495 start += pattern_len-1;
1496 }
1497 }
1498 return count;
1499}
1500
1501
1502/* Algorithms for different cases of string replacement */
1503
1504/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1505Py_LOCAL(PyBytesObject *)
1506replace_interleave(PyBytesObject *self,
1507 const char *to_s, Py_ssize_t to_len,
1508 Py_ssize_t maxcount)
1509{
1510 char *self_s, *result_s;
1511 Py_ssize_t self_len, result_len;
1512 Py_ssize_t count, i, product;
1513 PyBytesObject *result;
1514
1515 self_len = PyBytes_GET_SIZE(self);
1516
1517 /* 1 at the end plus 1 after every character */
1518 count = self_len+1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001519 if (maxcount < count)
Neal Norwitz6968b052007-02-27 19:02:19 +00001520 count = maxcount;
1521
1522 /* Check for overflow */
1523 /* result_len = count * to_len + self_len; */
1524 product = count * to_len;
1525 if (product / to_len != count) {
1526 PyErr_SetString(PyExc_OverflowError,
1527 "replace string is too long");
1528 return NULL;
1529 }
1530 result_len = product + self_len;
1531 if (result_len < 0) {
1532 PyErr_SetString(PyExc_OverflowError,
1533 "replace string is too long");
1534 return NULL;
1535 }
1536
1537 if (! (result = (PyBytesObject *)
1538 PyBytes_FromStringAndSize(NULL, result_len)) )
1539 return NULL;
1540
1541 self_s = PyBytes_AS_STRING(self);
1542 result_s = PyBytes_AS_STRING(result);
1543
1544 /* TODO: special case single character, which doesn't need memcpy */
1545
1546 /* Lay the first one down (guaranteed this will occur) */
1547 Py_MEMCPY(result_s, to_s, to_len);
1548 result_s += to_len;
1549 count -= 1;
1550
1551 for (i=0; i<count; i++) {
1552 *result_s++ = *self_s++;
1553 Py_MEMCPY(result_s, to_s, to_len);
1554 result_s += to_len;
1555 }
1556
1557 /* Copy the rest of the original string */
1558 Py_MEMCPY(result_s, self_s, self_len-i);
1559
1560 return result;
1561}
1562
1563/* Special case for deleting a single character */
1564/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1565Py_LOCAL(PyBytesObject *)
1566replace_delete_single_character(PyBytesObject *self,
1567 char from_c, Py_ssize_t maxcount)
1568{
1569 char *self_s, *result_s;
1570 char *start, *next, *end;
1571 Py_ssize_t self_len, result_len;
1572 Py_ssize_t count;
1573 PyBytesObject *result;
1574
1575 self_len = PyBytes_GET_SIZE(self);
1576 self_s = PyBytes_AS_STRING(self);
1577
1578 count = countchar(self_s, self_len, from_c, maxcount);
1579 if (count == 0) {
1580 return return_self(self);
1581 }
1582
1583 result_len = self_len - count; /* from_len == 1 */
1584 assert(result_len>=0);
1585
1586 if ( (result = (PyBytesObject *)
1587 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1588 return NULL;
1589 result_s = PyBytes_AS_STRING(result);
1590
1591 start = self_s;
1592 end = self_s + self_len;
1593 while (count-- > 0) {
1594 next = findchar(start, end-start, from_c);
1595 if (next == NULL)
1596 break;
1597 Py_MEMCPY(result_s, start, next-start);
1598 result_s += (next-start);
1599 start = next+1;
1600 }
1601 Py_MEMCPY(result_s, start, end-start);
1602
1603 return result;
1604}
1605
1606/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1607
1608Py_LOCAL(PyBytesObject *)
1609replace_delete_substring(PyBytesObject *self,
1610 const char *from_s, Py_ssize_t from_len,
1611 Py_ssize_t maxcount)
1612{
1613 char *self_s, *result_s;
1614 char *start, *next, *end;
1615 Py_ssize_t self_len, result_len;
1616 Py_ssize_t count, offset;
1617 PyBytesObject *result;
1618
1619 self_len = PyBytes_GET_SIZE(self);
1620 self_s = PyBytes_AS_STRING(self);
1621
1622 count = countstring(self_s, self_len,
1623 from_s, from_len,
1624 0, self_len, 1,
1625 maxcount);
1626
1627 if (count == 0) {
1628 /* no matches */
1629 return return_self(self);
1630 }
1631
1632 result_len = self_len - (count * from_len);
1633 assert (result_len>=0);
1634
1635 if ( (result = (PyBytesObject *)
1636 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1637 return NULL;
1638
1639 result_s = PyBytes_AS_STRING(result);
1640
1641 start = self_s;
1642 end = self_s + self_len;
1643 while (count-- > 0) {
1644 offset = findstring(start, end-start,
1645 from_s, from_len,
1646 0, end-start, FORWARD);
1647 if (offset == -1)
1648 break;
1649 next = start + offset;
1650
1651 Py_MEMCPY(result_s, start, next-start);
1652
1653 result_s += (next-start);
1654 start = next+from_len;
1655 }
1656 Py_MEMCPY(result_s, start, end-start);
1657 return result;
1658}
1659
1660/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1661Py_LOCAL(PyBytesObject *)
1662replace_single_character_in_place(PyBytesObject *self,
1663 char from_c, char to_c,
1664 Py_ssize_t maxcount)
1665{
1666 char *self_s, *result_s, *start, *end, *next;
1667 Py_ssize_t self_len;
1668 PyBytesObject *result;
1669
1670 /* The result string will be the same size */
1671 self_s = PyBytes_AS_STRING(self);
1672 self_len = PyBytes_GET_SIZE(self);
1673
1674 next = findchar(self_s, self_len, from_c);
1675
1676 if (next == NULL) {
1677 /* No matches; return the original bytes */
1678 return return_self(self);
1679 }
1680
1681 /* Need to make a new bytes */
1682 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1683 if (result == NULL)
1684 return NULL;
1685 result_s = PyBytes_AS_STRING(result);
1686 Py_MEMCPY(result_s, self_s, self_len);
1687
1688 /* change everything in-place, starting with this one */
1689 start = result_s + (next-self_s);
1690 *start = to_c;
1691 start++;
1692 end = result_s + self_len;
1693
1694 while (--maxcount > 0) {
1695 next = findchar(start, end-start, from_c);
1696 if (next == NULL)
1697 break;
1698 *next = to_c;
1699 start = next+1;
1700 }
1701
1702 return result;
1703}
1704
1705/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1706Py_LOCAL(PyBytesObject *)
1707replace_substring_in_place(PyBytesObject *self,
1708 const char *from_s, Py_ssize_t from_len,
1709 const char *to_s, Py_ssize_t to_len,
1710 Py_ssize_t maxcount)
1711{
1712 char *result_s, *start, *end;
1713 char *self_s;
1714 Py_ssize_t self_len, offset;
1715 PyBytesObject *result;
1716
1717 /* The result bytes will be the same size */
1718
1719 self_s = PyBytes_AS_STRING(self);
1720 self_len = PyBytes_GET_SIZE(self);
1721
1722 offset = findstring(self_s, self_len,
1723 from_s, from_len,
1724 0, self_len, FORWARD);
1725 if (offset == -1) {
1726 /* No matches; return the original bytes */
1727 return return_self(self);
1728 }
1729
1730 /* Need to make a new bytes */
1731 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1732 if (result == NULL)
1733 return NULL;
1734 result_s = PyBytes_AS_STRING(result);
1735 Py_MEMCPY(result_s, self_s, self_len);
1736
1737 /* change everything in-place, starting with this one */
1738 start = result_s + offset;
1739 Py_MEMCPY(start, to_s, from_len);
1740 start += from_len;
1741 end = result_s + self_len;
1742
1743 while ( --maxcount > 0) {
1744 offset = findstring(start, end-start,
1745 from_s, from_len,
1746 0, end-start, FORWARD);
1747 if (offset==-1)
1748 break;
1749 Py_MEMCPY(start+offset, to_s, from_len);
1750 start += offset+from_len;
1751 }
1752
1753 return result;
1754}
1755
1756/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1757Py_LOCAL(PyBytesObject *)
1758replace_single_character(PyBytesObject *self,
1759 char from_c,
1760 const char *to_s, Py_ssize_t to_len,
1761 Py_ssize_t maxcount)
1762{
1763 char *self_s, *result_s;
1764 char *start, *next, *end;
1765 Py_ssize_t self_len, result_len;
1766 Py_ssize_t count, product;
1767 PyBytesObject *result;
1768
1769 self_s = PyBytes_AS_STRING(self);
1770 self_len = PyBytes_GET_SIZE(self);
1771
1772 count = countchar(self_s, self_len, from_c, maxcount);
1773 if (count == 0) {
1774 /* no matches, return unchanged */
1775 return return_self(self);
1776 }
1777
1778 /* use the difference between current and new, hence the "-1" */
1779 /* result_len = self_len + count * (to_len-1) */
1780 product = count * (to_len-1);
1781 if (product / (to_len-1) != count) {
1782 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1783 return NULL;
1784 }
1785 result_len = self_len + product;
1786 if (result_len < 0) {
1787 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1788 return NULL;
1789 }
1790
1791 if ( (result = (PyBytesObject *)
1792 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1793 return NULL;
1794 result_s = PyBytes_AS_STRING(result);
1795
1796 start = self_s;
1797 end = self_s + self_len;
1798 while (count-- > 0) {
1799 next = findchar(start, end-start, from_c);
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001800 if (next == NULL)
Neal Norwitz6968b052007-02-27 19:02:19 +00001801 break;
1802
1803 if (next == start) {
1804 /* replace with the 'to' */
1805 Py_MEMCPY(result_s, to_s, to_len);
1806 result_s += to_len;
1807 start += 1;
1808 } else {
1809 /* copy the unchanged old then the 'to' */
1810 Py_MEMCPY(result_s, start, next-start);
1811 result_s += (next-start);
1812 Py_MEMCPY(result_s, to_s, to_len);
1813 result_s += to_len;
1814 start = next+1;
1815 }
1816 }
1817 /* Copy the remainder of the remaining bytes */
1818 Py_MEMCPY(result_s, start, end-start);
1819
1820 return result;
1821}
1822
1823/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1824Py_LOCAL(PyBytesObject *)
1825replace_substring(PyBytesObject *self,
1826 const char *from_s, Py_ssize_t from_len,
1827 const char *to_s, Py_ssize_t to_len,
1828 Py_ssize_t maxcount)
1829{
1830 char *self_s, *result_s;
1831 char *start, *next, *end;
1832 Py_ssize_t self_len, result_len;
1833 Py_ssize_t count, offset, product;
1834 PyBytesObject *result;
1835
1836 self_s = PyBytes_AS_STRING(self);
1837 self_len = PyBytes_GET_SIZE(self);
1838
1839 count = countstring(self_s, self_len,
1840 from_s, from_len,
1841 0, self_len, FORWARD, maxcount);
1842 if (count == 0) {
1843 /* no matches, return unchanged */
1844 return return_self(self);
1845 }
1846
1847 /* Check for overflow */
1848 /* result_len = self_len + count * (to_len-from_len) */
1849 product = count * (to_len-from_len);
1850 if (product / (to_len-from_len) != count) {
1851 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1852 return NULL;
1853 }
1854 result_len = self_len + product;
1855 if (result_len < 0) {
1856 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1857 return NULL;
1858 }
1859
1860 if ( (result = (PyBytesObject *)
1861 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1862 return NULL;
1863 result_s = PyBytes_AS_STRING(result);
1864
1865 start = self_s;
1866 end = self_s + self_len;
1867 while (count-- > 0) {
1868 offset = findstring(start, end-start,
1869 from_s, from_len,
1870 0, end-start, FORWARD);
1871 if (offset == -1)
1872 break;
1873 next = start+offset;
1874 if (next == start) {
1875 /* replace with the 'to' */
1876 Py_MEMCPY(result_s, to_s, to_len);
1877 result_s += to_len;
1878 start += from_len;
1879 } else {
1880 /* copy the unchanged old then the 'to' */
1881 Py_MEMCPY(result_s, start, next-start);
1882 result_s += (next-start);
1883 Py_MEMCPY(result_s, to_s, to_len);
1884 result_s += to_len;
1885 start = next+from_len;
1886 }
1887 }
1888 /* Copy the remainder of the remaining bytes */
1889 Py_MEMCPY(result_s, start, end-start);
1890
1891 return result;
1892}
1893
1894
1895Py_LOCAL(PyBytesObject *)
1896replace(PyBytesObject *self,
1897 const char *from_s, Py_ssize_t from_len,
1898 const char *to_s, Py_ssize_t to_len,
1899 Py_ssize_t maxcount)
1900{
1901 if (maxcount < 0) {
1902 maxcount = PY_SSIZE_T_MAX;
1903 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
1904 /* nothing to do; return the original bytes */
1905 return return_self(self);
1906 }
1907
1908 if (maxcount == 0 ||
1909 (from_len == 0 && to_len == 0)) {
1910 /* nothing to do; return the original bytes */
1911 return return_self(self);
1912 }
1913
1914 /* Handle zero-length special cases */
1915
1916 if (from_len == 0) {
1917 /* insert the 'to' bytes everywhere. */
1918 /* >>> "Python".replace("", ".") */
1919 /* '.P.y.t.h.o.n.' */
1920 return replace_interleave(self, to_s, to_len, maxcount);
1921 }
1922
1923 /* Except for "".replace("", "A") == "A" there is no way beyond this */
1924 /* point for an empty self bytes to generate a non-empty bytes */
1925 /* Special case so the remaining code always gets a non-empty bytes */
1926 if (PyBytes_GET_SIZE(self) == 0) {
1927 return return_self(self);
1928 }
1929
1930 if (to_len == 0) {
1931 /* delete all occurances of 'from' bytes */
1932 if (from_len == 1) {
1933 return replace_delete_single_character(
1934 self, from_s[0], maxcount);
1935 } else {
1936 return replace_delete_substring(self, from_s, from_len, maxcount);
1937 }
1938 }
1939
1940 /* Handle special case where both bytes have the same length */
1941
1942 if (from_len == to_len) {
1943 if (from_len == 1) {
1944 return replace_single_character_in_place(
1945 self,
1946 from_s[0],
1947 to_s[0],
1948 maxcount);
1949 } else {
1950 return replace_substring_in_place(
1951 self, from_s, from_len, to_s, to_len, maxcount);
1952 }
1953 }
1954
1955 /* Otherwise use the more generic algorithms */
1956 if (from_len == 1) {
1957 return replace_single_character(self, from_s[0],
1958 to_s, to_len, maxcount);
1959 } else {
1960 /* len('from')>=2, len('to')>=1 */
1961 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
1962 }
1963}
1964
1965PyDoc_STRVAR(replace__doc__,
1966"B.replace (old, new[, count]) -> bytes\n\
1967\n\
1968Return a copy of bytes B with all occurrences of subsection\n\
1969old replaced by new. If the optional argument count is\n\
1970given, only the first count occurrences are replaced.");
1971
1972static PyObject *
1973bytes_replace(PyBytesObject *self, PyObject *args)
1974{
1975 Py_ssize_t count = -1;
1976 PyObject *from, *to;
1977 const char *from_s, *to_s;
1978 Py_ssize_t from_len, to_len;
1979
1980 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
1981 return NULL;
1982
1983 if (PyBytes_Check(from)) {
1984 from_s = PyBytes_AS_STRING(from);
1985 from_len = PyBytes_GET_SIZE(from);
1986 }
1987 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
1988 return NULL;
1989
1990 if (PyBytes_Check(to)) {
1991 to_s = PyBytes_AS_STRING(to);
1992 to_len = PyBytes_GET_SIZE(to);
1993 }
1994 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
1995 return NULL;
1996
1997 return (PyObject *)replace((PyBytesObject *) self,
1998 from_s, from_len,
1999 to_s, to_len, count);
2000}
2001
2002
2003/* Overallocate the initial list to reduce the number of reallocs for small
2004 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2005 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2006 text (roughly 11 words per line) and field delimited data (usually 1-10
2007 fields). For large strings the split algorithms are bandwidth limited
2008 so increasing the preallocation likely will not improve things.*/
2009
2010#define MAX_PREALLOC 12
2011
2012/* 5 splits gives 6 elements */
2013#define PREALLOC_SIZE(maxsplit) \
2014 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2015
2016#define SPLIT_APPEND(data, left, right) \
2017 str = PyBytes_FromStringAndSize((data) + (left), \
2018 (right) - (left)); \
2019 if (str == NULL) \
2020 goto onError; \
2021 if (PyList_Append(list, str)) { \
2022 Py_DECREF(str); \
2023 goto onError; \
2024 } \
2025 else \
2026 Py_DECREF(str);
2027
2028#define SPLIT_ADD(data, left, right) { \
2029 str = PyBytes_FromStringAndSize((data) + (left), \
2030 (right) - (left)); \
2031 if (str == NULL) \
2032 goto onError; \
2033 if (count < MAX_PREALLOC) { \
2034 PyList_SET_ITEM(list, count, str); \
2035 } else { \
2036 if (PyList_Append(list, str)) { \
2037 Py_DECREF(str); \
2038 goto onError; \
2039 } \
2040 else \
2041 Py_DECREF(str); \
2042 } \
2043 count++; }
2044
2045/* Always force the list to the expected size. */
2046#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
2047
2048
2049Py_LOCAL_INLINE(PyObject *)
2050split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2051{
2052 register Py_ssize_t i, j, count=0;
2053 PyObject *str;
2054 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2055
2056 if (list == NULL)
2057 return NULL;
2058
2059 i = j = 0;
2060 while ((j < len) && (maxcount-- > 0)) {
2061 for(; j<len; j++) {
2062 /* I found that using memchr makes no difference */
2063 if (s[j] == ch) {
2064 SPLIT_ADD(s, i, j);
2065 i = j = j + 1;
2066 break;
2067 }
2068 }
2069 }
2070 if (i <= len) {
2071 SPLIT_ADD(s, i, len);
2072 }
2073 FIX_PREALLOC_SIZE(list);
2074 return list;
2075
2076 onError:
2077 Py_DECREF(list);
2078 return NULL;
2079}
2080
2081PyDoc_STRVAR(split__doc__,
2082"B.split(sep [,maxsplit]) -> list of bytes\n\
2083\n\
2084Return a list of the bytes in the string B, using sep as the\n\
2085delimiter. If maxsplit is given, at most maxsplit\n\
2086splits are done.");
2087
2088static PyObject *
2089bytes_split(PyBytesObject *self, PyObject *args)
2090{
2091 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2092 Py_ssize_t maxsplit = -1, count=0;
2093 const char *s = PyBytes_AS_STRING(self), *sub;
2094 PyObject *list, *str, *subobj;
2095#ifdef USE_FAST
2096 Py_ssize_t pos;
2097#endif
2098
2099 if (!PyArg_ParseTuple(args, "O|n:split", &subobj, &maxsplit))
2100 return NULL;
2101 if (maxsplit < 0)
2102 maxsplit = PY_SSIZE_T_MAX;
2103 if (PyBytes_Check(subobj)) {
2104 sub = PyBytes_AS_STRING(subobj);
2105 n = PyBytes_GET_SIZE(subobj);
2106 }
2107 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2108 return NULL;
2109
2110 if (n == 0) {
2111 PyErr_SetString(PyExc_ValueError, "empty separator");
2112 return NULL;
2113 }
2114 else if (n == 1)
2115 return split_char(s, len, sub[0], maxsplit);
2116
2117 list = PyList_New(PREALLOC_SIZE(maxsplit));
2118 if (list == NULL)
2119 return NULL;
2120
2121#ifdef USE_FAST
2122 i = j = 0;
2123 while (maxsplit-- > 0) {
2124 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2125 if (pos < 0)
2126 break;
2127 j = i+pos;
2128 SPLIT_ADD(s, i, j);
2129 i = j + n;
2130 }
2131#else
2132 i = j = 0;
2133 while ((j+n <= len) && (maxsplit-- > 0)) {
2134 for (; j+n <= len; j++) {
2135 if (Py_STRING_MATCH(s, j, sub, n)) {
2136 SPLIT_ADD(s, i, j);
2137 i = j = j + n;
2138 break;
2139 }
2140 }
2141 }
2142#endif
2143 SPLIT_ADD(s, i, len);
2144 FIX_PREALLOC_SIZE(list);
2145 return list;
2146
2147 onError:
2148 Py_DECREF(list);
2149 return NULL;
2150}
2151
2152PyDoc_STRVAR(partition__doc__,
2153"B.partition(sep) -> (head, sep, tail)\n\
2154\n\
2155Searches for the separator sep in B, and returns the part before it,\n\
2156the separator itself, and the part after it. If the separator is not\n\
2157found, returns B and two empty bytes.");
2158
2159static PyObject *
2160bytes_partition(PyBytesObject *self, PyObject *sep_obj)
2161{
2162 PyObject *bytesep, *result;
2163
2164 bytesep = PyBytes_FromObject(sep_obj);
2165 if (! bytesep)
2166 return NULL;
2167
2168 result = stringlib_partition(
2169 (PyObject*) self,
2170 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002171 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002172 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2173 );
2174
2175 Py_DECREF(bytesep);
2176 return result;
2177}
2178
2179PyDoc_STRVAR(rpartition__doc__,
2180"B.rpartition(sep) -> (tail, sep, head)\n\
2181\n\
2182Searches for the separator sep in B, starting at the end of B, and returns\n\
2183the part before it, the separator itself, and the part after it. If the\n\
2184separator is not found, returns two empty bytes and B.");
2185
2186static PyObject *
2187bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
2188{
2189 PyObject *bytesep, *result;
2190
2191 bytesep = PyBytes_FromObject(sep_obj);
2192 if (! bytesep)
2193 return NULL;
2194
2195 result = stringlib_rpartition(
2196 (PyObject*) self,
2197 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002198 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002199 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2200 );
2201
2202 Py_DECREF(bytesep);
2203 return result;
2204}
2205
2206Py_LOCAL_INLINE(PyObject *)
2207rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2208{
2209 register Py_ssize_t i, j, count=0;
2210 PyObject *str;
2211 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2212
2213 if (list == NULL)
2214 return NULL;
2215
2216 i = j = len - 1;
2217 while ((i >= 0) && (maxcount-- > 0)) {
2218 for (; i >= 0; i--) {
2219 if (s[i] == ch) {
2220 SPLIT_ADD(s, i + 1, j + 1);
2221 j = i = i - 1;
2222 break;
2223 }
2224 }
2225 }
2226 if (j >= -1) {
2227 SPLIT_ADD(s, 0, j + 1);
2228 }
2229 FIX_PREALLOC_SIZE(list);
2230 if (PyList_Reverse(list) < 0)
2231 goto onError;
2232
2233 return list;
2234
2235 onError:
2236 Py_DECREF(list);
2237 return NULL;
2238}
2239
2240PyDoc_STRVAR(rsplit__doc__,
2241"B.rsplit(sep [,maxsplit]) -> list of bytes\n\
2242\n\
2243Return a list of the sections in the byte B, using sep as the\n\
2244delimiter, starting at the end of the bytes and working\n\
2245to the front. If maxsplit is given, at most maxsplit splits are\n\
2246done.");
2247
2248static PyObject *
2249bytes_rsplit(PyBytesObject *self, PyObject *args)
2250{
2251 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2252 Py_ssize_t maxsplit = -1, count=0;
2253 const char *s = PyBytes_AS_STRING(self), *sub;
2254 PyObject *list, *str, *subobj;
2255
2256 if (!PyArg_ParseTuple(args, "O|n:rsplit", &subobj, &maxsplit))
2257 return NULL;
2258 if (maxsplit < 0)
2259 maxsplit = PY_SSIZE_T_MAX;
2260 if (PyBytes_Check(subobj)) {
2261 sub = PyBytes_AS_STRING(subobj);
2262 n = PyBytes_GET_SIZE(subobj);
2263 }
2264 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2265 return NULL;
2266
2267 if (n == 0) {
2268 PyErr_SetString(PyExc_ValueError, "empty separator");
2269 return NULL;
2270 }
2271 else if (n == 1)
2272 return rsplit_char(s, len, sub[0], maxsplit);
2273
2274 list = PyList_New(PREALLOC_SIZE(maxsplit));
2275 if (list == NULL)
2276 return NULL;
2277
2278 j = len;
2279 i = j - n;
2280
2281 while ( (i >= 0) && (maxsplit-- > 0) ) {
2282 for (; i>=0; i--) {
2283 if (Py_STRING_MATCH(s, i, sub, n)) {
2284 SPLIT_ADD(s, i + n, j);
2285 j = i;
2286 i -= n;
2287 break;
2288 }
2289 }
2290 }
2291 SPLIT_ADD(s, 0, j);
2292 FIX_PREALLOC_SIZE(list);
2293 if (PyList_Reverse(list) < 0)
2294 goto onError;
2295 return list;
2296
2297onError:
2298 Py_DECREF(list);
2299 return NULL;
2300}
2301
2302PyDoc_STRVAR(extend__doc__,
2303"B.extend(iterable int) -> None\n\
2304\n\
2305Append all the elements from the iterator or sequence to the\n\
2306end of the bytes.");
2307static PyObject *
2308bytes_extend(PyBytesObject *self, PyObject *arg)
2309{
2310 if (bytes_setslice(self, self->ob_size, self->ob_size, arg) == -1)
2311 return NULL;
2312 Py_RETURN_NONE;
2313}
2314
2315
2316PyDoc_STRVAR(reverse__doc__,
2317"B.reverse() -> None\n\
2318\n\
2319Reverse the order of the values in bytes in place.");
2320static PyObject *
2321bytes_reverse(PyBytesObject *self, PyObject *unused)
2322{
2323 char swap, *head, *tail;
2324 Py_ssize_t i, j, n = self->ob_size;
2325
2326 j = n / 2;
2327 head = self->ob_bytes;
2328 tail = head + n - 1;
2329 for (i = 0; i < j; i++) {
2330 swap = *head;
2331 *head++ = *tail;
2332 *tail-- = swap;
2333 }
2334
2335 Py_RETURN_NONE;
2336}
2337
2338PyDoc_STRVAR(insert__doc__,
2339"B.insert(index, int) -> None\n\
2340\n\
2341Insert a single item into the bytes before the given index.");
2342static PyObject *
2343bytes_insert(PyBytesObject *self, PyObject *args)
2344{
2345 int value;
2346 Py_ssize_t where, n = self->ob_size;
2347
2348 if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
2349 return NULL;
2350
2351 if (n == PY_SSIZE_T_MAX) {
2352 PyErr_SetString(PyExc_OverflowError,
2353 "cannot add more objects to bytes");
2354 return NULL;
2355 }
2356 if (value < 0 || value >= 256) {
2357 PyErr_SetString(PyExc_ValueError,
2358 "byte must be in range(0, 256)");
2359 return NULL;
2360 }
2361 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2362 return NULL;
2363
2364 if (where < 0) {
2365 where += n;
2366 if (where < 0)
2367 where = 0;
2368 }
2369 if (where > n)
2370 where = n;
Guido van Rossum4fc8ae42007-02-27 20:57:45 +00002371 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
Neal Norwitz6968b052007-02-27 19:02:19 +00002372 self->ob_bytes[where] = value;
2373
2374 Py_RETURN_NONE;
2375}
2376
2377PyDoc_STRVAR(append__doc__,
2378"B.append(int) -> None\n\
2379\n\
2380Append a single item to the end of the bytes.");
2381static PyObject *
2382bytes_append(PyBytesObject *self, PyObject *arg)
2383{
2384 int value;
2385 Py_ssize_t n = self->ob_size;
2386
2387 if (! _getbytevalue(arg, &value))
2388 return NULL;
2389 if (n == PY_SSIZE_T_MAX) {
2390 PyErr_SetString(PyExc_OverflowError,
2391 "cannot add more objects to bytes");
2392 return NULL;
2393 }
2394 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2395 return NULL;
2396
2397 self->ob_bytes[n] = value;
2398
2399 Py_RETURN_NONE;
2400}
2401
2402PyDoc_STRVAR(pop__doc__,
2403"B.pop([index]) -> int\n\
2404\n\
2405Remove and return a single item from the bytes. If no index\n\
2406argument is give, will pop the last value.");
2407static PyObject *
2408bytes_pop(PyBytesObject *self, PyObject *args)
2409{
2410 int value;
2411 Py_ssize_t where = -1, n = self->ob_size;
2412
2413 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2414 return NULL;
2415
2416 if (n == 0) {
2417 PyErr_SetString(PyExc_OverflowError,
2418 "cannot pop an empty bytes");
2419 return NULL;
2420 }
2421 if (where < 0)
2422 where += self->ob_size;
2423 if (where < 0 || where >= self->ob_size) {
2424 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2425 return NULL;
2426 }
2427
2428 value = self->ob_bytes[where];
2429 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2430 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2431 return NULL;
2432
2433 return PyInt_FromLong(value);
2434}
2435
2436PyDoc_STRVAR(remove__doc__,
2437"B.remove(int) -> None\n\
2438\n\
2439Remove the first occurance of a value in bytes");
2440static PyObject *
2441bytes_remove(PyBytesObject *self, PyObject *arg)
2442{
2443 int value;
2444 Py_ssize_t where, n = self->ob_size;
2445
2446 if (! _getbytevalue(arg, &value))
2447 return NULL;
2448
2449 for (where = 0; where < n; where++) {
2450 if (self->ob_bytes[where] == value)
2451 break;
2452 }
2453 if (where == n) {
2454 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2455 return NULL;
2456 }
2457
2458 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2459 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2460 return NULL;
2461
2462 Py_RETURN_NONE;
2463}
2464
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002465/* XXX These two helpers could be optimized if argsize == 1 */
2466
2467Py_ssize_t
2468lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2469 void *argptr, Py_ssize_t argsize)
2470{
2471 Py_ssize_t i = 0;
2472 while (i < mysize && memchr(argptr, myptr[i], argsize))
2473 i++;
2474 return i;
2475}
2476
2477Py_ssize_t
2478rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2479 void *argptr, Py_ssize_t argsize)
2480{
2481 Py_ssize_t i = mysize - 1;
2482 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2483 i--;
2484 return i + 1;
2485}
2486
2487PyDoc_STRVAR(strip__doc__,
2488"B.strip(bytes) -> bytes\n\
2489\n\
2490Strip leading and trailing bytes contained in the argument.");
2491static PyObject *
2492bytes_strip(PyBytesObject *self, PyObject *arg)
2493{
2494 Py_ssize_t left, right, mysize, argsize;
2495 void *myptr, *argptr;
2496 if (arg == NULL || !PyBytes_Check(arg)) {
2497 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2498 return NULL;
2499 }
2500 myptr = self->ob_bytes;
2501 mysize = self->ob_size;
2502 argptr = ((PyBytesObject *)arg)->ob_bytes;
2503 argsize = ((PyBytesObject *)arg)->ob_size;
2504 left = lstrip_helper(myptr, mysize, argptr, argsize);
2505 right = rstrip_helper(myptr, mysize, argptr, argsize);
2506 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2507}
2508
2509PyDoc_STRVAR(lstrip__doc__,
2510"B.lstrip(bytes) -> bytes\n\
2511\n\
2512Strip leading bytes contained in the argument.");
2513static PyObject *
2514bytes_lstrip(PyBytesObject *self, PyObject *arg)
2515{
2516 Py_ssize_t left, right, mysize, argsize;
2517 void *myptr, *argptr;
2518 if (arg == NULL || !PyBytes_Check(arg)) {
2519 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2520 return NULL;
2521 }
2522 myptr = self->ob_bytes;
2523 mysize = self->ob_size;
2524 argptr = ((PyBytesObject *)arg)->ob_bytes;
2525 argsize = ((PyBytesObject *)arg)->ob_size;
2526 left = lstrip_helper(myptr, mysize, argptr, argsize);
2527 right = mysize;
2528 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2529}
2530
2531PyDoc_STRVAR(rstrip__doc__,
2532"B.rstrip(bytes) -> bytes\n\
2533\n\
2534Strip trailing bytes contained in the argument.");
2535static PyObject *
2536bytes_rstrip(PyBytesObject *self, PyObject *arg)
2537{
2538 Py_ssize_t left, right, mysize, argsize;
2539 void *myptr, *argptr;
2540 if (arg == NULL || !PyBytes_Check(arg)) {
2541 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2542 return NULL;
2543 }
2544 myptr = self->ob_bytes;
2545 mysize = self->ob_size;
2546 argptr = ((PyBytesObject *)arg)->ob_bytes;
2547 argsize = ((PyBytesObject *)arg)->ob_size;
2548 left = 0;
2549 right = rstrip_helper(myptr, mysize, argptr, argsize);
2550 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2551}
Neal Norwitz6968b052007-02-27 19:02:19 +00002552
Guido van Rossumd624f182006-04-24 13:47:05 +00002553PyDoc_STRVAR(decode_doc,
2554"B.decode([encoding[,errors]]) -> unicode obect.\n\
2555\n\
2556Decodes B using the codec registered for encoding. encoding defaults\n\
2557to the default encoding. errors may be given to set a different error\n\
2558handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2559a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2560as well as any other name registerd with codecs.register_error that is\n\
2561able to handle UnicodeDecodeErrors.");
2562
2563static PyObject *
2564bytes_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002565{
Guido van Rossumd624f182006-04-24 13:47:05 +00002566 const char *encoding = NULL;
2567 const char *errors = NULL;
2568
2569 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2570 return NULL;
2571 if (encoding == NULL)
2572 encoding = PyUnicode_GetDefaultEncoding();
2573 return PyCodec_Decode(self, encoding, errors);
2574}
2575
Guido van Rossuma0867f72006-05-05 04:34:18 +00002576PyDoc_STRVAR(alloc_doc,
2577"B.__alloc__() -> int\n\
2578\n\
2579Returns the number of bytes actually allocated.");
2580
2581static PyObject *
2582bytes_alloc(PyBytesObject *self)
2583{
2584 return PyInt_FromSsize_t(self->ob_alloc);
2585}
2586
Guido van Rossum20188312006-05-05 15:15:40 +00002587PyDoc_STRVAR(join_doc,
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002588"B.join(iterable_of_bytes) -> bytes\n\
Guido van Rossum20188312006-05-05 15:15:40 +00002589\n\
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002590Concatenates any number of bytes objects, with B in between each pair.\n\
2591Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
Guido van Rossum20188312006-05-05 15:15:40 +00002592
2593static PyObject *
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002594bytes_join(PyBytesObject *self, PyObject *it)
Guido van Rossum20188312006-05-05 15:15:40 +00002595{
2596 PyObject *seq;
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002597 Py_ssize_t mysize = self->ob_size;
Guido van Rossum20188312006-05-05 15:15:40 +00002598 Py_ssize_t i;
2599 Py_ssize_t n;
2600 PyObject **items;
2601 Py_ssize_t totalsize = 0;
2602 PyObject *result;
2603 char *dest;
2604
2605 seq = PySequence_Fast(it, "can only join an iterable");
2606 if (seq == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002607 return NULL;
Guido van Rossum20188312006-05-05 15:15:40 +00002608 n = PySequence_Fast_GET_SIZE(seq);
2609 items = PySequence_Fast_ITEMS(seq);
2610
2611 /* Compute the total size, and check that they are all bytes */
2612 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002613 PyObject *obj = items[i];
2614 if (!PyBytes_Check(obj)) {
2615 PyErr_Format(PyExc_TypeError,
2616 "can only join an iterable of bytes "
2617 "(item %ld has type '%.100s')",
Guido van Rossum3cf5b1e2006-07-27 21:53:35 +00002618 /* XXX %ld isn't right on Win64 */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002619 (long)i, obj->ob_type->tp_name);
2620 goto error;
2621 }
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002622 if (i > 0)
2623 totalsize += mysize;
Georg Brandlb3f568f2007-02-27 08:49:18 +00002624 totalsize += PyBytes_GET_SIZE(obj);
2625 if (totalsize < 0) {
2626 PyErr_NoMemory();
2627 goto error;
2628 }
Guido van Rossum20188312006-05-05 15:15:40 +00002629 }
2630
2631 /* Allocate the result, and copy the bytes */
2632 result = PyBytes_FromStringAndSize(NULL, totalsize);
2633 if (result == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002634 goto error;
Guido van Rossum20188312006-05-05 15:15:40 +00002635 dest = PyBytes_AS_STRING(result);
2636 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002637 PyObject *obj = items[i];
2638 Py_ssize_t size = PyBytes_GET_SIZE(obj);
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002639 if (i > 0) {
2640 memcpy(dest, self->ob_bytes, mysize);
2641 dest += mysize;
2642 }
Georg Brandlb3f568f2007-02-27 08:49:18 +00002643 memcpy(dest, PyBytes_AS_STRING(obj), size);
2644 dest += size;
Guido van Rossum20188312006-05-05 15:15:40 +00002645 }
2646
2647 /* Done */
2648 Py_DECREF(seq);
2649 return result;
2650
2651 /* Error handling */
2652 error:
2653 Py_DECREF(seq);
2654 return NULL;
2655}
2656
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002657PyDoc_STRVAR(fromhex_doc,
2658"bytes.fromhex(string) -> bytes\n\
2659\n\
2660Create a bytes object from a string of hexadecimal numbers.\n\
2661Spaces between two numbers are accepted. Example:\n\
2662bytes.fromhex('10 2030') -> bytes([0x10, 0x20, 0x30]).");
2663
2664static int
2665hex_digit_to_int(int c)
2666{
Georg Brandlb3f568f2007-02-27 08:49:18 +00002667 if (isdigit(c))
2668 return c - '0';
2669 else {
2670 if (isupper(c))
2671 c = tolower(c);
2672 if (c >= 'a' && c <= 'f')
2673 return c - 'a' + 10;
2674 }
2675 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002676}
2677
2678static PyObject *
2679bytes_fromhex(PyObject *cls, PyObject *args)
2680{
2681 PyObject *newbytes;
2682 char *hex, *buf;
2683 Py_ssize_t len, byteslen, i, j;
2684 int top, bot;
2685
2686 if (!PyArg_ParseTuple(args, "s#:fromhex", &hex, &len))
2687 return NULL;
2688
2689 byteslen = len / 2; /* max length if there are no spaces */
2690
2691 newbytes = PyBytes_FromStringAndSize(NULL, byteslen);
2692 if (!newbytes)
2693 return NULL;
2694 buf = PyBytes_AS_STRING(newbytes);
2695
Guido van Rossum4355a472007-05-04 05:00:04 +00002696 for (i = j = 0; i < len; i += 2) {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002697 /* skip over spaces in the input */
2698 while (Py_CHARMASK(hex[i]) == ' ')
2699 i++;
2700 if (i >= len)
2701 break;
2702 top = hex_digit_to_int(Py_CHARMASK(hex[i]));
2703 bot = hex_digit_to_int(Py_CHARMASK(hex[i+1]));
2704 if (top == -1 || bot == -1) {
2705 PyErr_Format(PyExc_ValueError,
2706 "non-hexadecimal number string '%c%c' found in "
2707 "fromhex() arg at position %zd",
2708 hex[i], hex[i+1], i);
2709 goto error;
2710 }
2711 buf[j++] = (top << 4) + bot;
2712 }
2713 if (PyBytes_Resize(newbytes, j) < 0)
2714 goto error;
2715 return newbytes;
2716
2717 error:
2718 Py_DECREF(newbytes);
2719 return NULL;
2720}
2721
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002722PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2723
2724static PyObject *
2725bytes_reduce(PyBytesObject *self)
2726{
2727 return Py_BuildValue("(O(s#))",
2728 self->ob_type,
2729 self->ob_bytes == NULL ? "" : self->ob_bytes,
2730 self->ob_size);
2731}
2732
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002733static PySequenceMethods bytes_as_sequence = {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002734 (lenfunc)bytes_length, /* sq_length */
2735 (binaryfunc)bytes_concat, /* sq_concat */
2736 (ssizeargfunc)bytes_repeat, /* sq_repeat */
2737 (ssizeargfunc)bytes_getitem, /* sq_item */
2738 0, /* sq_slice */
2739 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
2740 0, /* sq_ass_slice */
Guido van Rossumd624f182006-04-24 13:47:05 +00002741 (objobjproc)bytes_contains, /* sq_contains */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002742 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
2743 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002744};
2745
2746static PyMappingMethods bytes_as_mapping = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002747 (lenfunc)bytes_length,
Thomas Wouters376446d2006-12-19 08:30:14 +00002748 (binaryfunc)bytes_subscript,
2749 (objobjargproc)bytes_ass_subscript,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002750};
2751
2752static PyBufferProcs bytes_as_buffer = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002753 (readbufferproc)bytes_getbuffer,
2754 (writebufferproc)bytes_getbuffer,
2755 (segcountproc)bytes_getsegcount,
2756 /* XXX Bytes are not characters! But we need to implement
2757 bf_getcharbuffer() so we can be used as 't#' argument to codecs. */
2758 (charbufferproc)bytes_getbuffer,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002759};
2760
2761static PyMethodDef
2762bytes_methods[] = {
Neal Norwitz6968b052007-02-27 19:02:19 +00002763 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2764 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2765 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2766 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2767 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2768 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
2769 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2770 startswith__doc__},
2771 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2772 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, translate__doc__},
2773 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2774 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
2775 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2776 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2777 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
2778 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
2779 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
2780 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
2781 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
2782 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002783 {"strip", (PyCFunction)bytes_strip, METH_O, strip__doc__},
2784 {"lstrip", (PyCFunction)bytes_lstrip, METH_O, lstrip__doc__},
2785 {"rstrip", (PyCFunction)bytes_rstrip, METH_O, rstrip__doc__},
Guido van Rossumd624f182006-04-24 13:47:05 +00002786 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002787 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002788 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2789 fromhex_doc},
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002790 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002791 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002792 {NULL}
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002793};
2794
2795PyDoc_STRVAR(bytes_doc,
2796"bytes([iterable]) -> new array of bytes.\n\
2797\n\
2798If an argument is given it must be an iterable yielding ints in range(256).");
2799
2800PyTypeObject PyBytes_Type = {
2801 PyObject_HEAD_INIT(&PyType_Type)
2802 0,
2803 "bytes",
2804 sizeof(PyBytesObject),
2805 0,
Guido van Rossumd624f182006-04-24 13:47:05 +00002806 (destructor)bytes_dealloc, /* tp_dealloc */
2807 0, /* tp_print */
2808 0, /* tp_getattr */
2809 0, /* tp_setattr */
2810 0, /* tp_compare */
2811 (reprfunc)bytes_repr, /* tp_repr */
2812 0, /* tp_as_number */
2813 &bytes_as_sequence, /* tp_as_sequence */
2814 &bytes_as_mapping, /* tp_as_mapping */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002815 0, /* tp_hash */
Guido van Rossumd624f182006-04-24 13:47:05 +00002816 0, /* tp_call */
2817 (reprfunc)bytes_str, /* tp_str */
2818 PyObject_GenericGetAttr, /* tp_getattro */
2819 0, /* tp_setattro */
2820 &bytes_as_buffer, /* tp_as_buffer */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002821 /* bytes is 'final' or 'sealed' */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002822 Py_TPFLAGS_DEFAULT, /* tp_flags */
Guido van Rossumd624f182006-04-24 13:47:05 +00002823 bytes_doc, /* tp_doc */
2824 0, /* tp_traverse */
2825 0, /* tp_clear */
2826 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2827 0, /* tp_weaklistoffset */
2828 0, /* tp_iter */
2829 0, /* tp_iternext */
2830 bytes_methods, /* tp_methods */
2831 0, /* tp_members */
2832 0, /* tp_getset */
2833 0, /* tp_base */
2834 0, /* tp_dict */
2835 0, /* tp_descr_get */
2836 0, /* tp_descr_set */
2837 0, /* tp_dictoffset */
2838 (initproc)bytes_init, /* tp_init */
2839 PyType_GenericAlloc, /* tp_alloc */
2840 PyType_GenericNew, /* tp_new */
2841 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002842};