blob: 2cdaf377680ba8576e47ae3232d33385ef43157d [file] [log] [blame]
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001/* Bytes object implementation */
2
3/* XXX TO DO: optimizations */
4
5#define PY_SSIZE_T_CLEAN
6#include "Python.h"
Guido van Rossuma0867f72006-05-05 04:34:18 +00007#include "structmember.h"
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00008
Neal Norwitz6968b052007-02-27 19:02:19 +00009/* The nullbytes are used by the stringlib during partition.
10 * If partition is removed from bytes, nullbytes and its helper
11 * Init/Fini should also be removed.
12 */
13static PyBytesObject *nullbytes = NULL;
14
15void
16PyBytes_Fini(void)
17{
18 Py_CLEAR(nullbytes);
19}
20
21int
22PyBytes_Init(void)
23{
24 nullbytes = PyObject_New(PyBytesObject, &PyBytes_Type);
25 if (nullbytes == NULL)
26 return 0;
27 nullbytes->ob_bytes = NULL;
28 nullbytes->ob_size = nullbytes->ob_alloc = 0;
29 return 1;
30}
31
32/* end nullbytes support */
33
Guido van Rossumad7d8d12007-04-13 01:39:34 +000034/* Helpers */
35
36static int
37_getbytevalue(PyObject* arg, int *value)
Neal Norwitz6968b052007-02-27 19:02:19 +000038{
39 PyObject *intarg = PyNumber_Int(arg);
40 if (! intarg)
41 return 0;
42 *value = PyInt_AsLong(intarg);
43 Py_DECREF(intarg);
44 if (*value < 0 || *value >= 256) {
45 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
46 return 0;
47 }
48 return 1;
49}
50
Guido van Rossumad7d8d12007-04-13 01:39:34 +000051Py_ssize_t
52_getbuffer(PyObject *obj, void **ptr)
53{
54 PyBufferProcs *buffer = obj->ob_type->tp_as_buffer;
55
56 if (buffer == NULL ||
57 PyUnicode_Check(obj) ||
58 buffer->bf_getreadbuffer == NULL ||
59 buffer->bf_getsegcount == NULL ||
60 buffer->bf_getsegcount(obj, NULL) != 1)
61 {
62 *ptr = NULL;
63 return -1;
64 }
65
66 return buffer->bf_getreadbuffer(obj, 0, ptr);
67}
68
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000069/* Direct API functions */
70
71PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +000072PyBytes_FromObject(PyObject *input)
73{
74 return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
75 input, NULL);
76}
77
78PyObject *
79PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000080{
81 PyBytesObject *new;
Guido van Rossumf15a29f2007-05-04 00:41:39 +000082 int alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000083
Guido van Rossumd624f182006-04-24 13:47:05 +000084 assert(size >= 0);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000085
86 new = PyObject_New(PyBytesObject, &PyBytes_Type);
87 if (new == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +000088 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000089
Guido van Rossumf15a29f2007-05-04 00:41:39 +000090 if (size == 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +000091 new->ob_bytes = NULL;
Guido van Rossumf15a29f2007-05-04 00:41:39 +000092 alloc = 0;
93 }
Guido van Rossumd624f182006-04-24 13:47:05 +000094 else {
Guido van Rossumf15a29f2007-05-04 00:41:39 +000095 alloc = size + 1;
96 new->ob_bytes = PyMem_Malloc(alloc);
Guido van Rossumd624f182006-04-24 13:47:05 +000097 if (new->ob_bytes == NULL) {
98 Py_DECREF(new);
99 return NULL;
100 }
101 if (bytes != NULL)
102 memcpy(new->ob_bytes, bytes, size);
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000103 new->ob_bytes[size] = '\0'; /* Trailing null byte */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000104 }
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000105 new->ob_size = size;
106 new->ob_alloc = alloc;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000107
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000108 return (PyObject *)new;
109}
110
111Py_ssize_t
112PyBytes_Size(PyObject *self)
113{
114 assert(self != NULL);
115 assert(PyBytes_Check(self));
116
Guido van Rossum20188312006-05-05 15:15:40 +0000117 return PyBytes_GET_SIZE(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000118}
119
120char *
121PyBytes_AsString(PyObject *self)
122{
123 assert(self != NULL);
124 assert(PyBytes_Check(self));
125
Guido van Rossum20188312006-05-05 15:15:40 +0000126 return PyBytes_AS_STRING(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000127}
128
129int
130PyBytes_Resize(PyObject *self, Py_ssize_t size)
131{
132 void *sval;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000133 Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000134
135 assert(self != NULL);
136 assert(PyBytes_Check(self));
137 assert(size >= 0);
138
Guido van Rossuma0867f72006-05-05 04:34:18 +0000139 if (size < alloc / 2) {
140 /* Major downsize; resize down to exact size */
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000141 alloc = size + 1;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000142 }
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000143 else if (size < alloc) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000144 /* Within allocated size; quick exit */
145 ((PyBytesObject *)self)->ob_size = size;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000146 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
Guido van Rossuma0867f72006-05-05 04:34:18 +0000147 return 0;
148 }
149 else if (size <= alloc * 1.125) {
150 /* Moderate upsize; overallocate similar to list_resize() */
151 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
152 }
153 else {
154 /* Major upsize; resize up to exact size */
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000155 alloc = size + 1;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000156 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000157
158 sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000159 if (sval == NULL) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000160 PyErr_NoMemory();
161 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000162 }
163
Guido van Rossumd624f182006-04-24 13:47:05 +0000164 ((PyBytesObject *)self)->ob_bytes = sval;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000165 ((PyBytesObject *)self)->ob_size = size;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000166 ((PyBytesObject *)self)->ob_alloc = alloc;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000167 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
168
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000169 return 0;
170}
171
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000172PyObject *
173PyBytes_Concat(PyObject *a, PyObject *b)
174{
175 Py_ssize_t asize, bsize, size;
176 void *aptr, *bptr;
177 PyBytesObject *result;
178
179 asize = _getbuffer(a, &aptr);
180 bsize = _getbuffer(b, &bptr);
181 if (asize < 0 || bsize < 0) {
182 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
183 a->ob_type->tp_name, b->ob_type->tp_name);
184 return NULL;
185 }
186
187 size = asize + bsize;
188 if (size < 0)
189 return PyErr_NoMemory();
190
191 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
192 if (result != NULL) {
193 memcpy(result->ob_bytes, aptr, asize);
194 memcpy(result->ob_bytes + asize, bptr, bsize);
195 }
196 return (PyObject *)result;
197}
198
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000199/* Functions stuffed into the type object */
200
201static Py_ssize_t
202bytes_length(PyBytesObject *self)
203{
204 return self->ob_size;
205}
206
207static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000208bytes_concat(PyBytesObject *self, PyObject *other)
209{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000210 return PyBytes_Concat((PyObject *)self, other);
Guido van Rossumd624f182006-04-24 13:47:05 +0000211}
212
213static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000214bytes_iconcat(PyBytesObject *self, PyObject *other)
215{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000216 void *optr;
Guido van Rossum13e57212006-04-27 22:54:26 +0000217 Py_ssize_t osize;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000218 Py_ssize_t mysize;
Guido van Rossum13e57212006-04-27 22:54:26 +0000219 Py_ssize_t size;
220
Guido van Rossum4355a472007-05-04 05:00:04 +0000221 /* XXX What if other == self? */
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000222 osize = _getbuffer(other, &optr);
223 if (osize < 0) {
Guido van Rossum13e57212006-04-27 22:54:26 +0000224 PyErr_Format(PyExc_TypeError,
225 "can't concat bytes to %.100s", other->ob_type->tp_name);
226 return NULL;
227 }
228
229 mysize = self->ob_size;
Guido van Rossum13e57212006-04-27 22:54:26 +0000230 size = mysize + osize;
231 if (size < 0)
232 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000233 if (size < self->ob_alloc) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000234 self->ob_size = size;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000235 self->ob_bytes[self->ob_size] = '\0'; /* Trailing null byte */
236 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000237 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000238 return NULL;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000239 memcpy(self->ob_bytes + mysize, optr, osize);
Guido van Rossum13e57212006-04-27 22:54:26 +0000240 Py_INCREF(self);
241 return (PyObject *)self;
242}
243
244static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000245bytes_repeat(PyBytesObject *self, Py_ssize_t count)
246{
247 PyBytesObject *result;
248 Py_ssize_t mysize;
249 Py_ssize_t size;
250
251 if (count < 0)
252 count = 0;
253 mysize = self->ob_size;
254 size = mysize * count;
255 if (count != 0 && size / count != mysize)
256 return PyErr_NoMemory();
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000257 result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
Guido van Rossumd624f182006-04-24 13:47:05 +0000258 if (result != NULL && size != 0) {
259 if (mysize == 1)
260 memset(result->ob_bytes, self->ob_bytes[0], size);
261 else {
Guido van Rossum13e57212006-04-27 22:54:26 +0000262 Py_ssize_t i;
Guido van Rossumd624f182006-04-24 13:47:05 +0000263 for (i = 0; i < count; i++)
264 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
265 }
266 }
267 return (PyObject *)result;
268}
269
270static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000271bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
272{
273 Py_ssize_t mysize;
274 Py_ssize_t size;
275
276 if (count < 0)
277 count = 0;
278 mysize = self->ob_size;
279 size = mysize * count;
280 if (count != 0 && size / count != mysize)
281 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000282 if (size < self->ob_alloc) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000283 self->ob_size = size;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000284 self->ob_bytes[self->ob_size] = '\0'; /* Trailing null byte */
285 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000286 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000287 return NULL;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000288
Guido van Rossum13e57212006-04-27 22:54:26 +0000289 if (mysize == 1)
290 memset(self->ob_bytes, self->ob_bytes[0], size);
291 else {
292 Py_ssize_t i;
293 for (i = 1; i < count; i++)
294 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
295 }
296
297 Py_INCREF(self);
298 return (PyObject *)self;
299}
300
301static int
302bytes_substring(PyBytesObject *self, PyBytesObject *other)
303{
304 Py_ssize_t i;
305
306 if (other->ob_size == 1) {
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000307 return memchr(self->ob_bytes, other->ob_bytes[0],
Guido van Rossum13e57212006-04-27 22:54:26 +0000308 self->ob_size) != NULL;
309 }
310 if (other->ob_size == 0)
311 return 1; /* Edge case */
312 for (i = 0; i + other->ob_size <= self->ob_size; i++) {
313 /* XXX Yeah, yeah, lots of optimizations possible... */
314 if (memcmp(self->ob_bytes + i, other->ob_bytes, other->ob_size) == 0)
315 return 1;
316 }
317 return 0;
318}
319
320static int
321bytes_contains(PyBytesObject *self, PyObject *value)
322{
323 Py_ssize_t ival;
324
325 if (PyBytes_Check(value))
326 return bytes_substring(self, (PyBytesObject *)value);
327
Thomas Woutersd204a712006-08-22 13:41:17 +0000328 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossum13e57212006-04-27 22:54:26 +0000329 if (ival == -1 && PyErr_Occurred())
330 return -1;
Guido van Rossum13e57212006-04-27 22:54:26 +0000331 if (ival < 0 || ival >= 256) {
332 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
333 return -1;
334 }
335
336 return memchr(self->ob_bytes, ival, self->ob_size) != NULL;
337}
338
339static PyObject *
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000340bytes_getitem(PyBytesObject *self, Py_ssize_t i)
341{
342 if (i < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000343 i += self->ob_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000344 if (i < 0 || i >= self->ob_size) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000345 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
346 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000347 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000348 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
349}
350
351static PyObject *
Thomas Wouters376446d2006-12-19 08:30:14 +0000352bytes_subscript(PyBytesObject *self, PyObject *item)
Guido van Rossumd624f182006-04-24 13:47:05 +0000353{
Thomas Wouters376446d2006-12-19 08:30:14 +0000354 if (PyIndex_Check(item)) {
355 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000356
Thomas Wouters376446d2006-12-19 08:30:14 +0000357 if (i == -1 && PyErr_Occurred())
358 return NULL;
359
360 if (i < 0)
361 i += PyBytes_GET_SIZE(self);
362
363 if (i < 0 || i >= self->ob_size) {
364 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
365 return NULL;
366 }
367 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
368 }
369 else if (PySlice_Check(item)) {
370 Py_ssize_t start, stop, step, slicelength, cur, i;
371 if (PySlice_GetIndicesEx((PySliceObject *)item,
372 PyBytes_GET_SIZE(self),
373 &start, &stop, &step, &slicelength) < 0) {
374 return NULL;
375 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000376
Thomas Wouters376446d2006-12-19 08:30:14 +0000377 if (slicelength <= 0)
378 return PyBytes_FromStringAndSize("", 0);
379 else if (step == 1) {
380 return PyBytes_FromStringAndSize(self->ob_bytes + start,
381 slicelength);
382 }
383 else {
384 char *source_buf = PyBytes_AS_STRING(self);
385 char *result_buf = (char *)PyMem_Malloc(slicelength);
386 PyObject *result;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000387
Thomas Wouters376446d2006-12-19 08:30:14 +0000388 if (result_buf == NULL)
389 return PyErr_NoMemory();
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000390
Thomas Wouters376446d2006-12-19 08:30:14 +0000391 for (cur = start, i = 0; i < slicelength;
392 cur += step, i++) {
393 result_buf[i] = source_buf[cur];
394 }
395 result = PyBytes_FromStringAndSize(result_buf, slicelength);
396 PyMem_Free(result_buf);
397 return result;
398 }
399 }
400 else {
401 PyErr_SetString(PyExc_TypeError, "bytes indices must be integers");
402 return NULL;
403 }
404}
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000405
Guido van Rossumd624f182006-04-24 13:47:05 +0000406static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000407bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
Guido van Rossumd624f182006-04-24 13:47:05 +0000408 PyObject *values)
409{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000410 Py_ssize_t avail, needed;
411 void *bytes;
Guido van Rossumd624f182006-04-24 13:47:05 +0000412
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000413 if (values == (PyObject *)self) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000414 /* Make a copy an call this function recursively */
415 int err;
416 values = PyBytes_FromObject(values);
417 if (values == NULL)
418 return -1;
419 err = bytes_setslice(self, lo, hi, values);
420 Py_DECREF(values);
421 return err;
422 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000423 if (values == NULL) {
424 /* del b[lo:hi] */
425 bytes = NULL;
426 needed = 0;
427 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000428 else {
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000429 needed = _getbuffer(values, &bytes);
430 if (needed < 0) {
431 PyErr_Format(PyExc_TypeError,
432 "can't set bytes slice from %.100s",
433 values->ob_type->tp_name);
434 return -1;
435 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000436 }
437
438 if (lo < 0)
439 lo = 0;
Thomas Wouters9a6e62b2006-08-23 23:20:29 +0000440 if (hi < lo)
441 hi = lo;
Guido van Rossumd624f182006-04-24 13:47:05 +0000442 if (hi > self->ob_size)
443 hi = self->ob_size;
444
445 avail = hi - lo;
446 if (avail < 0)
447 lo = hi = avail = 0;
448
449 if (avail != needed) {
450 if (avail > needed) {
451 /*
452 0 lo hi old_size
453 | |<----avail----->|<-----tomove------>|
454 | |<-needed->|<-----tomove------>|
455 0 lo new_hi new_size
456 */
457 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
458 self->ob_size - hi);
459 }
Thomas Wouters376446d2006-12-19 08:30:14 +0000460 if (PyBytes_Resize((PyObject *)self,
Guido van Rossumd624f182006-04-24 13:47:05 +0000461 self->ob_size + needed - avail) < 0)
462 return -1;
463 if (avail < needed) {
464 /*
465 0 lo hi old_size
466 | |<-avail->|<-----tomove------>|
467 | |<----needed---->|<-----tomove------>|
468 0 lo new_hi new_size
469 */
470 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
471 self->ob_size - lo - needed);
472 }
473 }
474
475 if (needed > 0)
476 memcpy(self->ob_bytes + lo, bytes, needed);
477
478 return 0;
479}
480
481static int
482bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
483{
484 Py_ssize_t ival;
485
486 if (i < 0)
487 i += self->ob_size;
488
489 if (i < 0 || i >= self->ob_size) {
490 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
491 return -1;
492 }
493
494 if (value == NULL)
495 return bytes_setslice(self, i, i+1, NULL);
496
Thomas Woutersd204a712006-08-22 13:41:17 +0000497 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000498 if (ival == -1 && PyErr_Occurred())
499 return -1;
500
501 if (ival < 0 || ival >= 256) {
502 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
503 return -1;
504 }
505
506 self->ob_bytes[i] = ival;
507 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000508}
509
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000510static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000511bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
512{
513 Py_ssize_t start, stop, step, slicelen, needed;
514 char *bytes;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000515
Thomas Wouters376446d2006-12-19 08:30:14 +0000516 if (PyIndex_Check(item)) {
517 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
518
519 if (i == -1 && PyErr_Occurred())
520 return -1;
521
522 if (i < 0)
523 i += PyBytes_GET_SIZE(self);
524
525 if (i < 0 || i >= self->ob_size) {
526 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
527 return -1;
528 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000529
Thomas Wouters376446d2006-12-19 08:30:14 +0000530 if (values == NULL) {
531 /* Fall through to slice assignment */
532 start = i;
533 stop = i + 1;
534 step = 1;
535 slicelen = 1;
536 }
537 else {
538 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
539 if (ival == -1 && PyErr_Occurred())
540 return -1;
541 if (ival < 0 || ival >= 256) {
542 PyErr_SetString(PyExc_ValueError,
543 "byte must be in range(0, 256)");
544 return -1;
545 }
546 self->ob_bytes[i] = (char)ival;
547 return 0;
548 }
549 }
550 else if (PySlice_Check(item)) {
551 if (PySlice_GetIndicesEx((PySliceObject *)item,
552 PyBytes_GET_SIZE(self),
553 &start, &stop, &step, &slicelen) < 0) {
554 return -1;
555 }
556 }
557 else {
558 PyErr_SetString(PyExc_TypeError, "bytes indices must be integer");
559 return -1;
560 }
561
562 if (values == NULL) {
563 bytes = NULL;
564 needed = 0;
565 }
566 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
567 /* Make a copy an call this function recursively */
568 int err;
569 values = PyBytes_FromObject(values);
570 if (values == NULL)
571 return -1;
572 err = bytes_ass_subscript(self, item, values);
573 Py_DECREF(values);
574 return err;
575 }
576 else {
577 assert(PyBytes_Check(values));
578 bytes = ((PyBytesObject *)values)->ob_bytes;
579 needed = ((PyBytesObject *)values)->ob_size;
580 }
581 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
582 if ((step < 0 && start < stop) ||
583 (step > 0 && start > stop))
584 stop = start;
585 if (step == 1) {
586 if (slicelen != needed) {
587 if (slicelen > needed) {
588 /*
589 0 start stop old_size
590 | |<---slicelen--->|<-----tomove------>|
591 | |<-needed->|<-----tomove------>|
592 0 lo new_hi new_size
593 */
594 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
595 self->ob_size - stop);
596 }
597 if (PyBytes_Resize((PyObject *)self,
598 self->ob_size + needed - slicelen) < 0)
599 return -1;
600 if (slicelen < needed) {
601 /*
602 0 lo hi old_size
603 | |<-avail->|<-----tomove------>|
604 | |<----needed---->|<-----tomove------>|
605 0 lo new_hi new_size
606 */
607 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
608 self->ob_size - start - needed);
609 }
610 }
611
612 if (needed > 0)
613 memcpy(self->ob_bytes + start, bytes, needed);
614
615 return 0;
616 }
617 else {
618 if (needed == 0) {
619 /* Delete slice */
620 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000621
Thomas Wouters376446d2006-12-19 08:30:14 +0000622 if (step < 0) {
623 stop = start + 1;
624 start = stop + step * (slicelen - 1) - 1;
625 step = -step;
626 }
627 for (cur = start, i = 0;
628 i < slicelen; cur += step, i++) {
629 Py_ssize_t lim = step - 1;
630
631 if (cur + step >= PyBytes_GET_SIZE(self))
632 lim = PyBytes_GET_SIZE(self) - cur - 1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000633
Thomas Wouters376446d2006-12-19 08:30:14 +0000634 memmove(self->ob_bytes + cur - i,
635 self->ob_bytes + cur + 1, lim);
636 }
637 /* Move the tail of the bytes, in one chunk */
638 cur = start + slicelen*step;
639 if (cur < PyBytes_GET_SIZE(self)) {
640 memmove(self->ob_bytes + cur - slicelen,
641 self->ob_bytes + cur,
642 PyBytes_GET_SIZE(self) - cur);
643 }
644 if (PyBytes_Resize((PyObject *)self,
645 PyBytes_GET_SIZE(self) - slicelen) < 0)
646 return -1;
647
648 return 0;
649 }
650 else {
651 /* Assign slice */
652 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000653
Thomas Wouters376446d2006-12-19 08:30:14 +0000654 if (needed != slicelen) {
655 PyErr_Format(PyExc_ValueError,
656 "attempt to assign bytes of size %zd "
657 "to extended slice of size %zd",
658 needed, slicelen);
659 return -1;
660 }
661 for (cur = start, i = 0; i < slicelen; cur += step, i++)
662 self->ob_bytes[cur] = bytes[i];
663 return 0;
664 }
665 }
666}
667
668static int
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000669bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
670{
Guido van Rossumd624f182006-04-24 13:47:05 +0000671 static char *kwlist[] = {"source", "encoding", "errors", 0};
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000672 PyObject *arg = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +0000673 const char *encoding = NULL;
674 const char *errors = NULL;
675 Py_ssize_t count;
676 PyObject *it;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000677 PyObject *(*iternext)(PyObject *);
678
Guido van Rossuma0867f72006-05-05 04:34:18 +0000679 if (self->ob_size != 0) {
680 /* Empty previous contents (yes, do this first of all!) */
681 if (PyBytes_Resize((PyObject *)self, 0) < 0)
682 return -1;
683 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000684
Guido van Rossumd624f182006-04-24 13:47:05 +0000685 /* Parse arguments */
686 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
687 &arg, &encoding, &errors))
688 return -1;
689
690 /* Make a quick exit if no first argument */
691 if (arg == NULL) {
692 if (encoding != NULL || errors != NULL) {
693 PyErr_SetString(PyExc_TypeError,
694 "encoding or errors without sequence argument");
695 return -1;
696 }
697 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000698 }
699
Guido van Rossumd624f182006-04-24 13:47:05 +0000700 if (PyUnicode_Check(arg)) {
701 /* Encode via the codec registry */
Guido van Rossum4355a472007-05-04 05:00:04 +0000702 PyObject *encoded, *new;
Guido van Rossumd624f182006-04-24 13:47:05 +0000703 if (encoding == NULL)
704 encoding = PyUnicode_GetDefaultEncoding();
705 encoded = PyCodec_Encode(arg, encoding, errors);
706 if (encoded == NULL)
707 return -1;
Guido van Rossum4355a472007-05-04 05:00:04 +0000708 if (!PyBytes_Check(encoded) && !PyString_Check(encoded)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000709 PyErr_Format(PyExc_TypeError,
Guido van Rossum4355a472007-05-04 05:00:04 +0000710 "encoder did not return a str8 or bytes object (type=%.400s)",
Guido van Rossumd624f182006-04-24 13:47:05 +0000711 encoded->ob_type->tp_name);
712 Py_DECREF(encoded);
713 return -1;
714 }
Guido van Rossum4355a472007-05-04 05:00:04 +0000715 new = bytes_iconcat(self, encoded);
716 Py_DECREF(encoded);
717 if (new == NULL)
718 return -1;
719 Py_DECREF(new);
720 return 0;
Guido van Rossumd624f182006-04-24 13:47:05 +0000721 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000722
Guido van Rossumd624f182006-04-24 13:47:05 +0000723 /* If it's not unicode, there can't be encoding or errors */
724 if (encoding != NULL || errors != NULL) {
725 PyErr_SetString(PyExc_TypeError,
726 "encoding or errors without a string argument");
727 return -1;
728 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000729
Guido van Rossumd624f182006-04-24 13:47:05 +0000730 /* Is it an int? */
Thomas Woutersd204a712006-08-22 13:41:17 +0000731 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000732 if (count == -1 && PyErr_Occurred())
733 PyErr_Clear();
734 else {
735 if (count < 0) {
736 PyErr_SetString(PyExc_ValueError, "negative count");
737 return -1;
738 }
739 if (count > 0) {
740 if (PyBytes_Resize((PyObject *)self, count))
741 return -1;
742 memset(self->ob_bytes, 0, count);
743 }
744 return 0;
745 }
746
747 if (PyObject_CheckReadBuffer(arg)) {
748 const void *bytes;
749 Py_ssize_t size;
750 if (PyObject_AsReadBuffer(arg, &bytes, &size) < 0)
751 return -1;
752 if (PyBytes_Resize((PyObject *)self, size) < 0)
753 return -1;
754 memcpy(self->ob_bytes, bytes, size);
755 return 0;
756 }
757
758 /* XXX Optimize this if the arguments is a list, tuple */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000759
760 /* Get the iterator */
761 it = PyObject_GetIter(arg);
762 if (it == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000763 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000764 iternext = *it->ob_type->tp_iternext;
765
766 /* Run the iterator to exhaustion */
767 for (;;) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000768 PyObject *item;
769 Py_ssize_t value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000770
Guido van Rossumd624f182006-04-24 13:47:05 +0000771 /* Get the next item */
772 item = iternext(it);
773 if (item == NULL) {
774 if (PyErr_Occurred()) {
775 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
776 goto error;
777 PyErr_Clear();
778 }
779 break;
780 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000781
Guido van Rossumd624f182006-04-24 13:47:05 +0000782 /* Interpret it as an int (__index__) */
Thomas Woutersd204a712006-08-22 13:41:17 +0000783 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000784 Py_DECREF(item);
785 if (value == -1 && PyErr_Occurred())
786 goto error;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000787
Guido van Rossumd624f182006-04-24 13:47:05 +0000788 /* Range check */
789 if (value < 0 || value >= 256) {
790 PyErr_SetString(PyExc_ValueError,
791 "bytes must be in range(0, 256)");
792 goto error;
793 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000794
Guido van Rossumd624f182006-04-24 13:47:05 +0000795 /* Append the byte */
Guido van Rossuma0867f72006-05-05 04:34:18 +0000796 if (self->ob_size < self->ob_alloc)
797 self->ob_size++;
798 else if (PyBytes_Resize((PyObject *)self, self->ob_size+1) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000799 goto error;
800 self->ob_bytes[self->ob_size-1] = value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000801 }
802
803 /* Clean up and return success */
804 Py_DECREF(it);
805 return 0;
806
807 error:
808 /* Error handling when it != NULL */
809 Py_DECREF(it);
810 return -1;
811}
812
Georg Brandlee91be42007-02-24 19:41:35 +0000813/* Mostly copied from string_repr, but without the
814 "smart quote" functionality. */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000815static PyObject *
816bytes_repr(PyBytesObject *self)
817{
Georg Brandlee91be42007-02-24 19:41:35 +0000818 size_t newsize = 3 + 4 * self->ob_size;
819 PyObject *v;
820 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != self->ob_size) {
821 PyErr_SetString(PyExc_OverflowError,
822 "bytes object is too large to make repr");
Guido van Rossumd624f182006-04-24 13:47:05 +0000823 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000824 }
Georg Brandlee91be42007-02-24 19:41:35 +0000825 v = PyString_FromStringAndSize((char *)NULL, newsize);
826 if (v == NULL) {
827 return NULL;
828 }
829 else {
830 register Py_ssize_t i;
831 register char c;
832 register char *p;
833 int quote = '\'';
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000834
Georg Brandlee91be42007-02-24 19:41:35 +0000835 p = PyString_AS_STRING(v);
836 *p++ = 'b';
837 *p++ = quote;
838 for (i = 0; i < self->ob_size; i++) {
839 /* There's at least enough room for a hex escape
840 and a closing quote. */
841 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
842 c = self->ob_bytes[i];
843 if (c == quote || c == '\\')
844 *p++ = '\\', *p++ = c;
845 else if (c == '\t')
846 *p++ = '\\', *p++ = 't';
847 else if (c == '\n')
848 *p++ = '\\', *p++ = 'n';
849 else if (c == '\r')
850 *p++ = '\\', *p++ = 'r';
851 else if (c == 0)
Guido van Rossum57b93ad2007-05-08 19:09:34 +0000852 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
Georg Brandlee91be42007-02-24 19:41:35 +0000853 else if (c < ' ' || c >= 0x7f) {
854 /* For performance, we don't want to call
855 PyOS_snprintf here (extra layers of
856 function call). */
857 sprintf(p, "\\x%02x", c & 0xff);
858 p += 4;
859 }
860 else
861 *p++ = c;
862 }
863 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
864 *p++ = quote;
865 *p = '\0';
866 _PyString_Resize(
867 &v, (p - PyString_AS_STRING(v)));
868 return v;
869 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000870}
871
872static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000873bytes_str(PyBytesObject *self)
874{
875 return PyString_FromStringAndSize(self->ob_bytes, self->ob_size);
876}
877
878static PyObject *
Guido van Rossum343e97f2007-04-09 00:43:24 +0000879bytes_richcompare(PyObject *self, PyObject *other, int op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000880{
Guido van Rossum343e97f2007-04-09 00:43:24 +0000881 Py_ssize_t self_size, other_size;
882 void *self_bytes, *other_bytes;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000883 PyObject *res;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000884 Py_ssize_t minsize;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000885 int cmp;
886
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000887 /* Bytes can be compared to anything that supports the (binary) buffer
888 API. Except Unicode. */
Guido van Rossumebea9be2007-04-09 00:49:13 +0000889
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000890 self_size = _getbuffer(self, &self_bytes);
891 if (self_size < 0) {
Guido van Rossumebea9be2007-04-09 00:49:13 +0000892 Py_INCREF(Py_NotImplemented);
893 return Py_NotImplemented;
894 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000895
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000896 other_size = _getbuffer(other, &other_bytes);
897 if (other_size < 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000898 Py_INCREF(Py_NotImplemented);
899 return Py_NotImplemented;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000900 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000901
902 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000903 /* Shortcut: if the lengths differ, the objects differ */
904 cmp = (op == Py_NE);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000905 }
906 else {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000907 minsize = self_size;
908 if (other_size < minsize)
909 minsize = other_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000910
Guido van Rossum343e97f2007-04-09 00:43:24 +0000911 cmp = memcmp(self_bytes, other_bytes, minsize);
Guido van Rossumd624f182006-04-24 13:47:05 +0000912 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000913
Guido van Rossumd624f182006-04-24 13:47:05 +0000914 if (cmp == 0) {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000915 if (self_size < other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +0000916 cmp = -1;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000917 else if (self_size > other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +0000918 cmp = 1;
919 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000920
Guido van Rossumd624f182006-04-24 13:47:05 +0000921 switch (op) {
922 case Py_LT: cmp = cmp < 0; break;
923 case Py_LE: cmp = cmp <= 0; break;
924 case Py_EQ: cmp = cmp == 0; break;
925 case Py_NE: cmp = cmp != 0; break;
926 case Py_GT: cmp = cmp > 0; break;
927 case Py_GE: cmp = cmp >= 0; break;
928 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000929 }
930
931 res = cmp ? Py_True : Py_False;
932 Py_INCREF(res);
933 return res;
934}
935
936static void
937bytes_dealloc(PyBytesObject *self)
938{
Guido van Rossumd624f182006-04-24 13:47:05 +0000939 if (self->ob_bytes != 0) {
940 PyMem_Free(self->ob_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000941 }
942 self->ob_type->tp_free((PyObject *)self);
943}
944
Guido van Rossumd624f182006-04-24 13:47:05 +0000945static Py_ssize_t
946bytes_getbuffer(PyBytesObject *self, Py_ssize_t index, const void **ptr)
947{
948 if (index != 0) {
949 PyErr_SetString(PyExc_SystemError,
Neal Norwitz6968b052007-02-27 19:02:19 +0000950 "accessing non-existent bytes segment");
Guido van Rossumd624f182006-04-24 13:47:05 +0000951 return -1;
952 }
Guido van Rossum63eac152007-05-09 23:36:14 +0000953 if (self->ob_bytes == NULL)
954 *ptr = "";
955 else
956 *ptr = self->ob_bytes;
Guido van Rossumd624f182006-04-24 13:47:05 +0000957 return self->ob_size;
958}
959
960static Py_ssize_t
961bytes_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
962{
963 if (lenp)
964 *lenp = self->ob_size;
965 return 1;
966}
967
Neal Norwitz6968b052007-02-27 19:02:19 +0000968
969
970/* -------------------------------------------------------------------- */
971/* Methods */
972
973#define STRINGLIB_CHAR char
974#define STRINGLIB_CMP memcmp
975#define STRINGLIB_LEN PyBytes_GET_SIZE
976#define STRINGLIB_NEW PyBytes_FromStringAndSize
977#define STRINGLIB_EMPTY nullbytes
978
979#include "stringlib/fastsearch.h"
980#include "stringlib/count.h"
981#include "stringlib/find.h"
982#include "stringlib/partition.h"
983
984
985/* The following Py_LOCAL_INLINE and Py_LOCAL functions
986were copied from the old char* style string object. */
987
988Py_LOCAL_INLINE(void)
989_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
990{
991 if (*end > len)
992 *end = len;
993 else if (*end < 0)
994 *end += len;
995 if (*end < 0)
996 *end = 0;
997 if (*start < 0)
998 *start += len;
999 if (*start < 0)
1000 *start = 0;
1001}
1002
1003
1004Py_LOCAL_INLINE(Py_ssize_t)
1005bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
1006{
1007 PyObject *subobj;
1008 const char *sub;
1009 Py_ssize_t sub_len;
1010 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1011
1012 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1013 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1014 return -2;
1015 if (PyBytes_Check(subobj)) {
1016 sub = PyBytes_AS_STRING(subobj);
1017 sub_len = PyBytes_GET_SIZE(subobj);
1018 }
1019 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1020 /* XXX - the "expected a character buffer object" is pretty
1021 confusing for a non-expert. remap to something else ? */
1022 return -2;
1023
1024 if (dir > 0)
1025 return stringlib_find_slice(
1026 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1027 sub, sub_len, start, end);
1028 else
1029 return stringlib_rfind_slice(
1030 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1031 sub, sub_len, start, end);
1032}
1033
1034
1035PyDoc_STRVAR(find__doc__,
1036"B.find(sub [,start [,end]]) -> int\n\
1037\n\
1038Return the lowest index in B where subsection sub is found,\n\
1039such that sub is contained within s[start,end]. Optional\n\
1040arguments start and end are interpreted as in slice notation.\n\
1041\n\
1042Return -1 on failure.");
1043
1044static PyObject *
1045bytes_find(PyBytesObject *self, PyObject *args)
1046{
1047 Py_ssize_t result = bytes_find_internal(self, args, +1);
1048 if (result == -2)
1049 return NULL;
1050 return PyInt_FromSsize_t(result);
1051}
1052
1053PyDoc_STRVAR(count__doc__,
1054"B.count(sub[, start[, end]]) -> int\n\
1055\n\
1056Return the number of non-overlapping occurrences of subsection sub in\n\
1057bytes B[start:end]. Optional arguments start and end are interpreted\n\
1058as in slice notation.");
1059
1060static PyObject *
1061bytes_count(PyBytesObject *self, PyObject *args)
1062{
1063 PyObject *sub_obj;
1064 const char *str = PyBytes_AS_STRING(self), *sub;
1065 Py_ssize_t sub_len;
1066 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1067
1068 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1069 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1070 return NULL;
1071
1072 if (PyBytes_Check(sub_obj)) {
1073 sub = PyBytes_AS_STRING(sub_obj);
1074 sub_len = PyBytes_GET_SIZE(sub_obj);
1075 }
1076 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1077 return NULL;
1078
1079 _adjust_indices(&start, &end, PyString_GET_SIZE(self));
1080
1081 return PyInt_FromSsize_t(
1082 stringlib_count(str + start, end - start, sub, sub_len)
1083 );
1084}
1085
1086
1087PyDoc_STRVAR(index__doc__,
1088"B.index(sub [,start [,end]]) -> int\n\
1089\n\
1090Like B.find() but raise ValueError when the subsection is not found.");
1091
1092static PyObject *
1093bytes_index(PyBytesObject *self, PyObject *args)
1094{
1095 Py_ssize_t result = bytes_find_internal(self, args, +1);
1096 if (result == -2)
1097 return NULL;
1098 if (result == -1) {
1099 PyErr_SetString(PyExc_ValueError,
1100 "subsection not found");
1101 return NULL;
1102 }
1103 return PyInt_FromSsize_t(result);
1104}
1105
1106
1107PyDoc_STRVAR(rfind__doc__,
1108"B.rfind(sub [,start [,end]]) -> int\n\
1109\n\
1110Return the highest index in B where subsection sub is found,\n\
1111such that sub is contained within s[start,end]. Optional\n\
1112arguments start and end are interpreted as in slice notation.\n\
1113\n\
1114Return -1 on failure.");
1115
1116static PyObject *
1117bytes_rfind(PyBytesObject *self, PyObject *args)
1118{
1119 Py_ssize_t result = bytes_find_internal(self, args, -1);
1120 if (result == -2)
1121 return NULL;
1122 return PyInt_FromSsize_t(result);
1123}
1124
1125
1126PyDoc_STRVAR(rindex__doc__,
1127"B.rindex(sub [,start [,end]]) -> int\n\
1128\n\
1129Like B.rfind() but raise ValueError when the subsection is not found.");
1130
1131static PyObject *
1132bytes_rindex(PyBytesObject *self, PyObject *args)
1133{
1134 Py_ssize_t result = bytes_find_internal(self, args, -1);
1135 if (result == -2)
1136 return NULL;
1137 if (result == -1) {
1138 PyErr_SetString(PyExc_ValueError,
1139 "subsection not found");
1140 return NULL;
1141 }
1142 return PyInt_FromSsize_t(result);
1143}
1144
1145
1146/* Matches the end (direction >= 0) or start (direction < 0) of self
1147 * against substr, using the start and end arguments. Returns
1148 * -1 on error, 0 if not found and 1 if found.
1149 */
1150Py_LOCAL(int)
1151_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
1152 Py_ssize_t end, int direction)
1153{
1154 Py_ssize_t len = PyBytes_GET_SIZE(self);
1155 Py_ssize_t slen;
1156 const char* sub;
1157 const char* str;
1158
1159 if (PyBytes_Check(substr)) {
1160 sub = PyBytes_AS_STRING(substr);
1161 slen = PyBytes_GET_SIZE(substr);
1162 }
1163 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
1164 return -1;
1165 str = PyBytes_AS_STRING(self);
1166
1167 _adjust_indices(&start, &end, len);
1168
1169 if (direction < 0) {
1170 /* startswith */
1171 if (start+slen > len)
1172 return 0;
1173 } else {
1174 /* endswith */
1175 if (end-start < slen || start > len)
1176 return 0;
1177
1178 if (end-slen > start)
1179 start = end - slen;
1180 }
1181 if (end-start >= slen)
1182 return ! memcmp(str+start, sub, slen);
1183 return 0;
1184}
1185
1186
1187PyDoc_STRVAR(startswith__doc__,
1188"B.startswith(prefix[, start[, end]]) -> bool\n\
1189\n\
1190Return True if B starts with the specified prefix, False otherwise.\n\
1191With optional start, test B beginning at that position.\n\
1192With optional end, stop comparing B at that position.\n\
1193prefix can also be a tuple of strings to try.");
1194
1195static PyObject *
1196bytes_startswith(PyBytesObject *self, PyObject *args)
1197{
1198 Py_ssize_t start = 0;
1199 Py_ssize_t end = PY_SSIZE_T_MAX;
1200 PyObject *subobj;
1201 int result;
1202
1203 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1204 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1205 return NULL;
1206 if (PyTuple_Check(subobj)) {
1207 Py_ssize_t i;
1208 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1209 result = _bytes_tailmatch(self,
1210 PyTuple_GET_ITEM(subobj, i),
1211 start, end, -1);
1212 if (result == -1)
1213 return NULL;
1214 else if (result) {
1215 Py_RETURN_TRUE;
1216 }
1217 }
1218 Py_RETURN_FALSE;
1219 }
1220 result = _bytes_tailmatch(self, subobj, start, end, -1);
1221 if (result == -1)
1222 return NULL;
1223 else
1224 return PyBool_FromLong(result);
1225}
1226
1227PyDoc_STRVAR(endswith__doc__,
1228"B.endswith(suffix[, start[, end]]) -> bool\n\
1229\n\
1230Return True if B ends with the specified suffix, False otherwise.\n\
1231With optional start, test B beginning at that position.\n\
1232With optional end, stop comparing B at that position.\n\
1233suffix can also be a tuple of strings to try.");
1234
1235static PyObject *
1236bytes_endswith(PyBytesObject *self, PyObject *args)
1237{
1238 Py_ssize_t start = 0;
1239 Py_ssize_t end = PY_SSIZE_T_MAX;
1240 PyObject *subobj;
1241 int result;
1242
1243 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1244 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1245 return NULL;
1246 if (PyTuple_Check(subobj)) {
1247 Py_ssize_t i;
1248 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1249 result = _bytes_tailmatch(self,
1250 PyTuple_GET_ITEM(subobj, i),
1251 start, end, +1);
1252 if (result == -1)
1253 return NULL;
1254 else if (result) {
1255 Py_RETURN_TRUE;
1256 }
1257 }
1258 Py_RETURN_FALSE;
1259 }
1260 result = _bytes_tailmatch(self, subobj, start, end, +1);
1261 if (result == -1)
1262 return NULL;
1263 else
1264 return PyBool_FromLong(result);
1265}
1266
1267
1268
1269PyDoc_STRVAR(translate__doc__,
1270"B.translate(table [,deletechars]) -> bytes\n\
1271\n\
1272Return a copy of the bytes B, where all characters occurring\n\
1273in the optional argument deletechars are removed, and the\n\
1274remaining characters have been mapped through the given\n\
1275translation table, which must be a bytes of length 256.");
1276
1277static PyObject *
1278bytes_translate(PyBytesObject *self, PyObject *args)
1279{
1280 register char *input, *output;
1281 register const char *table;
1282 register Py_ssize_t i, c, changed = 0;
1283 PyObject *input_obj = (PyObject*)self;
1284 const char *table1, *output_start, *del_table=NULL;
1285 Py_ssize_t inlen, tablen, dellen = 0;
1286 PyObject *result;
1287 int trans_table[256];
1288 PyObject *tableobj, *delobj = NULL;
1289
1290 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1291 &tableobj, &delobj))
1292 return NULL;
1293
1294 if (PyBytes_Check(tableobj)) {
1295 table1 = PyBytes_AS_STRING(tableobj);
1296 tablen = PyBytes_GET_SIZE(tableobj);
1297 }
1298 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1299 return NULL;
1300
1301 if (tablen != 256) {
1302 PyErr_SetString(PyExc_ValueError,
1303 "translation table must be 256 characters long");
1304 return NULL;
1305 }
1306
1307 if (delobj != NULL) {
1308 if (PyBytes_Check(delobj)) {
1309 del_table = PyBytes_AS_STRING(delobj);
1310 dellen = PyBytes_GET_SIZE(delobj);
1311 }
1312 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1313 return NULL;
1314 }
1315 else {
1316 del_table = NULL;
1317 dellen = 0;
1318 }
1319
1320 table = table1;
1321 inlen = PyBytes_GET_SIZE(input_obj);
1322 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1323 if (result == NULL)
1324 return NULL;
1325 output_start = output = PyBytes_AsString(result);
1326 input = PyBytes_AS_STRING(input_obj);
1327
1328 if (dellen == 0) {
1329 /* If no deletions are required, use faster code */
1330 for (i = inlen; --i >= 0; ) {
1331 c = Py_CHARMASK(*input++);
1332 if (Py_CHARMASK((*output++ = table[c])) != c)
1333 changed = 1;
1334 }
1335 if (changed || !PyBytes_CheckExact(input_obj))
1336 return result;
1337 Py_DECREF(result);
1338 Py_INCREF(input_obj);
1339 return input_obj;
1340 }
1341
1342 for (i = 0; i < 256; i++)
1343 trans_table[i] = Py_CHARMASK(table[i]);
1344
1345 for (i = 0; i < dellen; i++)
1346 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1347
1348 for (i = inlen; --i >= 0; ) {
1349 c = Py_CHARMASK(*input++);
1350 if (trans_table[c] != -1)
1351 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1352 continue;
1353 changed = 1;
1354 }
1355 if (!changed && PyBytes_CheckExact(input_obj)) {
1356 Py_DECREF(result);
1357 Py_INCREF(input_obj);
1358 return input_obj;
1359 }
1360 /* Fix the size of the resulting string */
1361 if (inlen > 0)
1362 PyBytes_Resize(result, output - output_start);
1363 return result;
1364}
1365
1366
1367#define FORWARD 1
1368#define REVERSE -1
1369
1370/* find and count characters and substrings */
1371
1372#define findchar(target, target_len, c) \
1373 ((char *)memchr((const void *)(target), c, target_len))
1374
1375/* Don't call if length < 2 */
1376#define Py_STRING_MATCH(target, offset, pattern, length) \
1377 (target[offset] == pattern[0] && \
1378 target[offset+length-1] == pattern[length-1] && \
1379 !memcmp(target+offset+1, pattern+1, length-2) )
1380
1381
1382/* Bytes ops must return a string. */
1383/* If the object is subclass of bytes, create a copy */
1384Py_LOCAL(PyBytesObject *)
1385return_self(PyBytesObject *self)
1386{
1387 if (PyBytes_CheckExact(self)) {
1388 Py_INCREF(self);
1389 return (PyBytesObject *)self;
1390 }
1391 return (PyBytesObject *)PyBytes_FromStringAndSize(
1392 PyBytes_AS_STRING(self),
1393 PyBytes_GET_SIZE(self));
1394}
1395
1396Py_LOCAL_INLINE(Py_ssize_t)
1397countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1398{
1399 Py_ssize_t count=0;
1400 const char *start=target;
1401 const char *end=target+target_len;
1402
1403 while ( (start=findchar(start, end-start, c)) != NULL ) {
1404 count++;
1405 if (count >= maxcount)
1406 break;
1407 start += 1;
1408 }
1409 return count;
1410}
1411
1412Py_LOCAL(Py_ssize_t)
1413findstring(const char *target, Py_ssize_t target_len,
1414 const char *pattern, Py_ssize_t pattern_len,
1415 Py_ssize_t start,
1416 Py_ssize_t end,
1417 int direction)
1418{
1419 if (start < 0) {
1420 start += target_len;
1421 if (start < 0)
1422 start = 0;
1423 }
1424 if (end > target_len) {
1425 end = target_len;
1426 } else if (end < 0) {
1427 end += target_len;
1428 if (end < 0)
1429 end = 0;
1430 }
1431
1432 /* zero-length substrings always match at the first attempt */
1433 if (pattern_len == 0)
1434 return (direction > 0) ? start : end;
1435
1436 end -= pattern_len;
1437
1438 if (direction < 0) {
1439 for (; end >= start; end--)
1440 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1441 return end;
1442 } else {
1443 for (; start <= end; start++)
1444 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1445 return start;
1446 }
1447 return -1;
1448}
1449
1450Py_LOCAL_INLINE(Py_ssize_t)
1451countstring(const char *target, Py_ssize_t target_len,
1452 const char *pattern, Py_ssize_t pattern_len,
1453 Py_ssize_t start,
1454 Py_ssize_t end,
1455 int direction, Py_ssize_t maxcount)
1456{
1457 Py_ssize_t count=0;
1458
1459 if (start < 0) {
1460 start += target_len;
1461 if (start < 0)
1462 start = 0;
1463 }
1464 if (end > target_len) {
1465 end = target_len;
1466 } else if (end < 0) {
1467 end += target_len;
1468 if (end < 0)
1469 end = 0;
1470 }
1471
1472 /* zero-length substrings match everywhere */
1473 if (pattern_len == 0 || maxcount == 0) {
1474 if (target_len+1 < maxcount)
1475 return target_len+1;
1476 return maxcount;
1477 }
1478
1479 end -= pattern_len;
1480 if (direction < 0) {
1481 for (; (end >= start); end--)
1482 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1483 count++;
1484 if (--maxcount <= 0) break;
1485 end -= pattern_len-1;
1486 }
1487 } else {
1488 for (; (start <= end); start++)
1489 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1490 count++;
1491 if (--maxcount <= 0)
1492 break;
1493 start += pattern_len-1;
1494 }
1495 }
1496 return count;
1497}
1498
1499
1500/* Algorithms for different cases of string replacement */
1501
1502/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1503Py_LOCAL(PyBytesObject *)
1504replace_interleave(PyBytesObject *self,
1505 const char *to_s, Py_ssize_t to_len,
1506 Py_ssize_t maxcount)
1507{
1508 char *self_s, *result_s;
1509 Py_ssize_t self_len, result_len;
1510 Py_ssize_t count, i, product;
1511 PyBytesObject *result;
1512
1513 self_len = PyBytes_GET_SIZE(self);
1514
1515 /* 1 at the end plus 1 after every character */
1516 count = self_len+1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001517 if (maxcount < count)
Neal Norwitz6968b052007-02-27 19:02:19 +00001518 count = maxcount;
1519
1520 /* Check for overflow */
1521 /* result_len = count * to_len + self_len; */
1522 product = count * to_len;
1523 if (product / to_len != count) {
1524 PyErr_SetString(PyExc_OverflowError,
1525 "replace string is too long");
1526 return NULL;
1527 }
1528 result_len = product + self_len;
1529 if (result_len < 0) {
1530 PyErr_SetString(PyExc_OverflowError,
1531 "replace string is too long");
1532 return NULL;
1533 }
1534
1535 if (! (result = (PyBytesObject *)
1536 PyBytes_FromStringAndSize(NULL, result_len)) )
1537 return NULL;
1538
1539 self_s = PyBytes_AS_STRING(self);
1540 result_s = PyBytes_AS_STRING(result);
1541
1542 /* TODO: special case single character, which doesn't need memcpy */
1543
1544 /* Lay the first one down (guaranteed this will occur) */
1545 Py_MEMCPY(result_s, to_s, to_len);
1546 result_s += to_len;
1547 count -= 1;
1548
1549 for (i=0; i<count; i++) {
1550 *result_s++ = *self_s++;
1551 Py_MEMCPY(result_s, to_s, to_len);
1552 result_s += to_len;
1553 }
1554
1555 /* Copy the rest of the original string */
1556 Py_MEMCPY(result_s, self_s, self_len-i);
1557
1558 return result;
1559}
1560
1561/* Special case for deleting a single character */
1562/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1563Py_LOCAL(PyBytesObject *)
1564replace_delete_single_character(PyBytesObject *self,
1565 char from_c, Py_ssize_t maxcount)
1566{
1567 char *self_s, *result_s;
1568 char *start, *next, *end;
1569 Py_ssize_t self_len, result_len;
1570 Py_ssize_t count;
1571 PyBytesObject *result;
1572
1573 self_len = PyBytes_GET_SIZE(self);
1574 self_s = PyBytes_AS_STRING(self);
1575
1576 count = countchar(self_s, self_len, from_c, maxcount);
1577 if (count == 0) {
1578 return return_self(self);
1579 }
1580
1581 result_len = self_len - count; /* from_len == 1 */
1582 assert(result_len>=0);
1583
1584 if ( (result = (PyBytesObject *)
1585 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1586 return NULL;
1587 result_s = PyBytes_AS_STRING(result);
1588
1589 start = self_s;
1590 end = self_s + self_len;
1591 while (count-- > 0) {
1592 next = findchar(start, end-start, from_c);
1593 if (next == NULL)
1594 break;
1595 Py_MEMCPY(result_s, start, next-start);
1596 result_s += (next-start);
1597 start = next+1;
1598 }
1599 Py_MEMCPY(result_s, start, end-start);
1600
1601 return result;
1602}
1603
1604/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1605
1606Py_LOCAL(PyBytesObject *)
1607replace_delete_substring(PyBytesObject *self,
1608 const char *from_s, Py_ssize_t from_len,
1609 Py_ssize_t maxcount)
1610{
1611 char *self_s, *result_s;
1612 char *start, *next, *end;
1613 Py_ssize_t self_len, result_len;
1614 Py_ssize_t count, offset;
1615 PyBytesObject *result;
1616
1617 self_len = PyBytes_GET_SIZE(self);
1618 self_s = PyBytes_AS_STRING(self);
1619
1620 count = countstring(self_s, self_len,
1621 from_s, from_len,
1622 0, self_len, 1,
1623 maxcount);
1624
1625 if (count == 0) {
1626 /* no matches */
1627 return return_self(self);
1628 }
1629
1630 result_len = self_len - (count * from_len);
1631 assert (result_len>=0);
1632
1633 if ( (result = (PyBytesObject *)
1634 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1635 return NULL;
1636
1637 result_s = PyBytes_AS_STRING(result);
1638
1639 start = self_s;
1640 end = self_s + self_len;
1641 while (count-- > 0) {
1642 offset = findstring(start, end-start,
1643 from_s, from_len,
1644 0, end-start, FORWARD);
1645 if (offset == -1)
1646 break;
1647 next = start + offset;
1648
1649 Py_MEMCPY(result_s, start, next-start);
1650
1651 result_s += (next-start);
1652 start = next+from_len;
1653 }
1654 Py_MEMCPY(result_s, start, end-start);
1655 return result;
1656}
1657
1658/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1659Py_LOCAL(PyBytesObject *)
1660replace_single_character_in_place(PyBytesObject *self,
1661 char from_c, char to_c,
1662 Py_ssize_t maxcount)
1663{
1664 char *self_s, *result_s, *start, *end, *next;
1665 Py_ssize_t self_len;
1666 PyBytesObject *result;
1667
1668 /* The result string will be the same size */
1669 self_s = PyBytes_AS_STRING(self);
1670 self_len = PyBytes_GET_SIZE(self);
1671
1672 next = findchar(self_s, self_len, from_c);
1673
1674 if (next == NULL) {
1675 /* No matches; return the original bytes */
1676 return return_self(self);
1677 }
1678
1679 /* Need to make a new bytes */
1680 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1681 if (result == NULL)
1682 return NULL;
1683 result_s = PyBytes_AS_STRING(result);
1684 Py_MEMCPY(result_s, self_s, self_len);
1685
1686 /* change everything in-place, starting with this one */
1687 start = result_s + (next-self_s);
1688 *start = to_c;
1689 start++;
1690 end = result_s + self_len;
1691
1692 while (--maxcount > 0) {
1693 next = findchar(start, end-start, from_c);
1694 if (next == NULL)
1695 break;
1696 *next = to_c;
1697 start = next+1;
1698 }
1699
1700 return result;
1701}
1702
1703/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1704Py_LOCAL(PyBytesObject *)
1705replace_substring_in_place(PyBytesObject *self,
1706 const char *from_s, Py_ssize_t from_len,
1707 const char *to_s, Py_ssize_t to_len,
1708 Py_ssize_t maxcount)
1709{
1710 char *result_s, *start, *end;
1711 char *self_s;
1712 Py_ssize_t self_len, offset;
1713 PyBytesObject *result;
1714
1715 /* The result bytes will be the same size */
1716
1717 self_s = PyBytes_AS_STRING(self);
1718 self_len = PyBytes_GET_SIZE(self);
1719
1720 offset = findstring(self_s, self_len,
1721 from_s, from_len,
1722 0, self_len, FORWARD);
1723 if (offset == -1) {
1724 /* No matches; return the original bytes */
1725 return return_self(self);
1726 }
1727
1728 /* Need to make a new bytes */
1729 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1730 if (result == NULL)
1731 return NULL;
1732 result_s = PyBytes_AS_STRING(result);
1733 Py_MEMCPY(result_s, self_s, self_len);
1734
1735 /* change everything in-place, starting with this one */
1736 start = result_s + offset;
1737 Py_MEMCPY(start, to_s, from_len);
1738 start += from_len;
1739 end = result_s + self_len;
1740
1741 while ( --maxcount > 0) {
1742 offset = findstring(start, end-start,
1743 from_s, from_len,
1744 0, end-start, FORWARD);
1745 if (offset==-1)
1746 break;
1747 Py_MEMCPY(start+offset, to_s, from_len);
1748 start += offset+from_len;
1749 }
1750
1751 return result;
1752}
1753
1754/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1755Py_LOCAL(PyBytesObject *)
1756replace_single_character(PyBytesObject *self,
1757 char from_c,
1758 const char *to_s, Py_ssize_t to_len,
1759 Py_ssize_t maxcount)
1760{
1761 char *self_s, *result_s;
1762 char *start, *next, *end;
1763 Py_ssize_t self_len, result_len;
1764 Py_ssize_t count, product;
1765 PyBytesObject *result;
1766
1767 self_s = PyBytes_AS_STRING(self);
1768 self_len = PyBytes_GET_SIZE(self);
1769
1770 count = countchar(self_s, self_len, from_c, maxcount);
1771 if (count == 0) {
1772 /* no matches, return unchanged */
1773 return return_self(self);
1774 }
1775
1776 /* use the difference between current and new, hence the "-1" */
1777 /* result_len = self_len + count * (to_len-1) */
1778 product = count * (to_len-1);
1779 if (product / (to_len-1) != count) {
1780 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1781 return NULL;
1782 }
1783 result_len = self_len + product;
1784 if (result_len < 0) {
1785 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1786 return NULL;
1787 }
1788
1789 if ( (result = (PyBytesObject *)
1790 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1791 return NULL;
1792 result_s = PyBytes_AS_STRING(result);
1793
1794 start = self_s;
1795 end = self_s + self_len;
1796 while (count-- > 0) {
1797 next = findchar(start, end-start, from_c);
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001798 if (next == NULL)
Neal Norwitz6968b052007-02-27 19:02:19 +00001799 break;
1800
1801 if (next == start) {
1802 /* replace with the 'to' */
1803 Py_MEMCPY(result_s, to_s, to_len);
1804 result_s += to_len;
1805 start += 1;
1806 } else {
1807 /* copy the unchanged old then the 'to' */
1808 Py_MEMCPY(result_s, start, next-start);
1809 result_s += (next-start);
1810 Py_MEMCPY(result_s, to_s, to_len);
1811 result_s += to_len;
1812 start = next+1;
1813 }
1814 }
1815 /* Copy the remainder of the remaining bytes */
1816 Py_MEMCPY(result_s, start, end-start);
1817
1818 return result;
1819}
1820
1821/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1822Py_LOCAL(PyBytesObject *)
1823replace_substring(PyBytesObject *self,
1824 const char *from_s, Py_ssize_t from_len,
1825 const char *to_s, Py_ssize_t to_len,
1826 Py_ssize_t maxcount)
1827{
1828 char *self_s, *result_s;
1829 char *start, *next, *end;
1830 Py_ssize_t self_len, result_len;
1831 Py_ssize_t count, offset, product;
1832 PyBytesObject *result;
1833
1834 self_s = PyBytes_AS_STRING(self);
1835 self_len = PyBytes_GET_SIZE(self);
1836
1837 count = countstring(self_s, self_len,
1838 from_s, from_len,
1839 0, self_len, FORWARD, maxcount);
1840 if (count == 0) {
1841 /* no matches, return unchanged */
1842 return return_self(self);
1843 }
1844
1845 /* Check for overflow */
1846 /* result_len = self_len + count * (to_len-from_len) */
1847 product = count * (to_len-from_len);
1848 if (product / (to_len-from_len) != count) {
1849 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1850 return NULL;
1851 }
1852 result_len = self_len + product;
1853 if (result_len < 0) {
1854 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1855 return NULL;
1856 }
1857
1858 if ( (result = (PyBytesObject *)
1859 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1860 return NULL;
1861 result_s = PyBytes_AS_STRING(result);
1862
1863 start = self_s;
1864 end = self_s + self_len;
1865 while (count-- > 0) {
1866 offset = findstring(start, end-start,
1867 from_s, from_len,
1868 0, end-start, FORWARD);
1869 if (offset == -1)
1870 break;
1871 next = start+offset;
1872 if (next == start) {
1873 /* replace with the 'to' */
1874 Py_MEMCPY(result_s, to_s, to_len);
1875 result_s += to_len;
1876 start += from_len;
1877 } else {
1878 /* copy the unchanged old then the 'to' */
1879 Py_MEMCPY(result_s, start, next-start);
1880 result_s += (next-start);
1881 Py_MEMCPY(result_s, to_s, to_len);
1882 result_s += to_len;
1883 start = next+from_len;
1884 }
1885 }
1886 /* Copy the remainder of the remaining bytes */
1887 Py_MEMCPY(result_s, start, end-start);
1888
1889 return result;
1890}
1891
1892
1893Py_LOCAL(PyBytesObject *)
1894replace(PyBytesObject *self,
1895 const char *from_s, Py_ssize_t from_len,
1896 const char *to_s, Py_ssize_t to_len,
1897 Py_ssize_t maxcount)
1898{
1899 if (maxcount < 0) {
1900 maxcount = PY_SSIZE_T_MAX;
1901 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
1902 /* nothing to do; return the original bytes */
1903 return return_self(self);
1904 }
1905
1906 if (maxcount == 0 ||
1907 (from_len == 0 && to_len == 0)) {
1908 /* nothing to do; return the original bytes */
1909 return return_self(self);
1910 }
1911
1912 /* Handle zero-length special cases */
1913
1914 if (from_len == 0) {
1915 /* insert the 'to' bytes everywhere. */
1916 /* >>> "Python".replace("", ".") */
1917 /* '.P.y.t.h.o.n.' */
1918 return replace_interleave(self, to_s, to_len, maxcount);
1919 }
1920
1921 /* Except for "".replace("", "A") == "A" there is no way beyond this */
1922 /* point for an empty self bytes to generate a non-empty bytes */
1923 /* Special case so the remaining code always gets a non-empty bytes */
1924 if (PyBytes_GET_SIZE(self) == 0) {
1925 return return_self(self);
1926 }
1927
1928 if (to_len == 0) {
1929 /* delete all occurances of 'from' bytes */
1930 if (from_len == 1) {
1931 return replace_delete_single_character(
1932 self, from_s[0], maxcount);
1933 } else {
1934 return replace_delete_substring(self, from_s, from_len, maxcount);
1935 }
1936 }
1937
1938 /* Handle special case where both bytes have the same length */
1939
1940 if (from_len == to_len) {
1941 if (from_len == 1) {
1942 return replace_single_character_in_place(
1943 self,
1944 from_s[0],
1945 to_s[0],
1946 maxcount);
1947 } else {
1948 return replace_substring_in_place(
1949 self, from_s, from_len, to_s, to_len, maxcount);
1950 }
1951 }
1952
1953 /* Otherwise use the more generic algorithms */
1954 if (from_len == 1) {
1955 return replace_single_character(self, from_s[0],
1956 to_s, to_len, maxcount);
1957 } else {
1958 /* len('from')>=2, len('to')>=1 */
1959 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
1960 }
1961}
1962
1963PyDoc_STRVAR(replace__doc__,
1964"B.replace (old, new[, count]) -> bytes\n\
1965\n\
1966Return a copy of bytes B with all occurrences of subsection\n\
1967old replaced by new. If the optional argument count is\n\
1968given, only the first count occurrences are replaced.");
1969
1970static PyObject *
1971bytes_replace(PyBytesObject *self, PyObject *args)
1972{
1973 Py_ssize_t count = -1;
1974 PyObject *from, *to;
1975 const char *from_s, *to_s;
1976 Py_ssize_t from_len, to_len;
1977
1978 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
1979 return NULL;
1980
1981 if (PyBytes_Check(from)) {
1982 from_s = PyBytes_AS_STRING(from);
1983 from_len = PyBytes_GET_SIZE(from);
1984 }
1985 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
1986 return NULL;
1987
1988 if (PyBytes_Check(to)) {
1989 to_s = PyBytes_AS_STRING(to);
1990 to_len = PyBytes_GET_SIZE(to);
1991 }
1992 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
1993 return NULL;
1994
1995 return (PyObject *)replace((PyBytesObject *) self,
1996 from_s, from_len,
1997 to_s, to_len, count);
1998}
1999
2000
2001/* Overallocate the initial list to reduce the number of reallocs for small
2002 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2003 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2004 text (roughly 11 words per line) and field delimited data (usually 1-10
2005 fields). For large strings the split algorithms are bandwidth limited
2006 so increasing the preallocation likely will not improve things.*/
2007
2008#define MAX_PREALLOC 12
2009
2010/* 5 splits gives 6 elements */
2011#define PREALLOC_SIZE(maxsplit) \
2012 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2013
2014#define SPLIT_APPEND(data, left, right) \
2015 str = PyBytes_FromStringAndSize((data) + (left), \
2016 (right) - (left)); \
2017 if (str == NULL) \
2018 goto onError; \
2019 if (PyList_Append(list, str)) { \
2020 Py_DECREF(str); \
2021 goto onError; \
2022 } \
2023 else \
2024 Py_DECREF(str);
2025
2026#define SPLIT_ADD(data, left, right) { \
2027 str = PyBytes_FromStringAndSize((data) + (left), \
2028 (right) - (left)); \
2029 if (str == NULL) \
2030 goto onError; \
2031 if (count < MAX_PREALLOC) { \
2032 PyList_SET_ITEM(list, count, str); \
2033 } else { \
2034 if (PyList_Append(list, str)) { \
2035 Py_DECREF(str); \
2036 goto onError; \
2037 } \
2038 else \
2039 Py_DECREF(str); \
2040 } \
2041 count++; }
2042
2043/* Always force the list to the expected size. */
2044#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
2045
2046
2047Py_LOCAL_INLINE(PyObject *)
2048split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2049{
2050 register Py_ssize_t i, j, count=0;
2051 PyObject *str;
2052 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2053
2054 if (list == NULL)
2055 return NULL;
2056
2057 i = j = 0;
2058 while ((j < len) && (maxcount-- > 0)) {
2059 for(; j<len; j++) {
2060 /* I found that using memchr makes no difference */
2061 if (s[j] == ch) {
2062 SPLIT_ADD(s, i, j);
2063 i = j = j + 1;
2064 break;
2065 }
2066 }
2067 }
2068 if (i <= len) {
2069 SPLIT_ADD(s, i, len);
2070 }
2071 FIX_PREALLOC_SIZE(list);
2072 return list;
2073
2074 onError:
2075 Py_DECREF(list);
2076 return NULL;
2077}
2078
2079PyDoc_STRVAR(split__doc__,
2080"B.split(sep [,maxsplit]) -> list of bytes\n\
2081\n\
2082Return a list of the bytes in the string B, using sep as the\n\
2083delimiter. If maxsplit is given, at most maxsplit\n\
2084splits are done.");
2085
2086static PyObject *
2087bytes_split(PyBytesObject *self, PyObject *args)
2088{
2089 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2090 Py_ssize_t maxsplit = -1, count=0;
2091 const char *s = PyBytes_AS_STRING(self), *sub;
2092 PyObject *list, *str, *subobj;
2093#ifdef USE_FAST
2094 Py_ssize_t pos;
2095#endif
2096
2097 if (!PyArg_ParseTuple(args, "O|n:split", &subobj, &maxsplit))
2098 return NULL;
2099 if (maxsplit < 0)
2100 maxsplit = PY_SSIZE_T_MAX;
2101 if (PyBytes_Check(subobj)) {
2102 sub = PyBytes_AS_STRING(subobj);
2103 n = PyBytes_GET_SIZE(subobj);
2104 }
2105 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2106 return NULL;
2107
2108 if (n == 0) {
2109 PyErr_SetString(PyExc_ValueError, "empty separator");
2110 return NULL;
2111 }
2112 else if (n == 1)
2113 return split_char(s, len, sub[0], maxsplit);
2114
2115 list = PyList_New(PREALLOC_SIZE(maxsplit));
2116 if (list == NULL)
2117 return NULL;
2118
2119#ifdef USE_FAST
2120 i = j = 0;
2121 while (maxsplit-- > 0) {
2122 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2123 if (pos < 0)
2124 break;
2125 j = i+pos;
2126 SPLIT_ADD(s, i, j);
2127 i = j + n;
2128 }
2129#else
2130 i = j = 0;
2131 while ((j+n <= len) && (maxsplit-- > 0)) {
2132 for (; j+n <= len; j++) {
2133 if (Py_STRING_MATCH(s, j, sub, n)) {
2134 SPLIT_ADD(s, i, j);
2135 i = j = j + n;
2136 break;
2137 }
2138 }
2139 }
2140#endif
2141 SPLIT_ADD(s, i, len);
2142 FIX_PREALLOC_SIZE(list);
2143 return list;
2144
2145 onError:
2146 Py_DECREF(list);
2147 return NULL;
2148}
2149
2150PyDoc_STRVAR(partition__doc__,
2151"B.partition(sep) -> (head, sep, tail)\n\
2152\n\
2153Searches for the separator sep in B, and returns the part before it,\n\
2154the separator itself, and the part after it. If the separator is not\n\
2155found, returns B and two empty bytes.");
2156
2157static PyObject *
2158bytes_partition(PyBytesObject *self, PyObject *sep_obj)
2159{
2160 PyObject *bytesep, *result;
2161
2162 bytesep = PyBytes_FromObject(sep_obj);
2163 if (! bytesep)
2164 return NULL;
2165
2166 result = stringlib_partition(
2167 (PyObject*) self,
2168 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002169 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002170 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2171 );
2172
2173 Py_DECREF(bytesep);
2174 return result;
2175}
2176
2177PyDoc_STRVAR(rpartition__doc__,
2178"B.rpartition(sep) -> (tail, sep, head)\n\
2179\n\
2180Searches for the separator sep in B, starting at the end of B, and returns\n\
2181the part before it, the separator itself, and the part after it. If the\n\
2182separator is not found, returns two empty bytes and B.");
2183
2184static PyObject *
2185bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
2186{
2187 PyObject *bytesep, *result;
2188
2189 bytesep = PyBytes_FromObject(sep_obj);
2190 if (! bytesep)
2191 return NULL;
2192
2193 result = stringlib_rpartition(
2194 (PyObject*) self,
2195 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002196 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002197 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2198 );
2199
2200 Py_DECREF(bytesep);
2201 return result;
2202}
2203
2204Py_LOCAL_INLINE(PyObject *)
2205rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2206{
2207 register Py_ssize_t i, j, count=0;
2208 PyObject *str;
2209 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2210
2211 if (list == NULL)
2212 return NULL;
2213
2214 i = j = len - 1;
2215 while ((i >= 0) && (maxcount-- > 0)) {
2216 for (; i >= 0; i--) {
2217 if (s[i] == ch) {
2218 SPLIT_ADD(s, i + 1, j + 1);
2219 j = i = i - 1;
2220 break;
2221 }
2222 }
2223 }
2224 if (j >= -1) {
2225 SPLIT_ADD(s, 0, j + 1);
2226 }
2227 FIX_PREALLOC_SIZE(list);
2228 if (PyList_Reverse(list) < 0)
2229 goto onError;
2230
2231 return list;
2232
2233 onError:
2234 Py_DECREF(list);
2235 return NULL;
2236}
2237
2238PyDoc_STRVAR(rsplit__doc__,
2239"B.rsplit(sep [,maxsplit]) -> list of bytes\n\
2240\n\
2241Return a list of the sections in the byte B, using sep as the\n\
2242delimiter, starting at the end of the bytes and working\n\
2243to the front. If maxsplit is given, at most maxsplit splits are\n\
2244done.");
2245
2246static PyObject *
2247bytes_rsplit(PyBytesObject *self, PyObject *args)
2248{
2249 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2250 Py_ssize_t maxsplit = -1, count=0;
2251 const char *s = PyBytes_AS_STRING(self), *sub;
2252 PyObject *list, *str, *subobj;
2253
2254 if (!PyArg_ParseTuple(args, "O|n:rsplit", &subobj, &maxsplit))
2255 return NULL;
2256 if (maxsplit < 0)
2257 maxsplit = PY_SSIZE_T_MAX;
2258 if (PyBytes_Check(subobj)) {
2259 sub = PyBytes_AS_STRING(subobj);
2260 n = PyBytes_GET_SIZE(subobj);
2261 }
2262 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2263 return NULL;
2264
2265 if (n == 0) {
2266 PyErr_SetString(PyExc_ValueError, "empty separator");
2267 return NULL;
2268 }
2269 else if (n == 1)
2270 return rsplit_char(s, len, sub[0], maxsplit);
2271
2272 list = PyList_New(PREALLOC_SIZE(maxsplit));
2273 if (list == NULL)
2274 return NULL;
2275
2276 j = len;
2277 i = j - n;
2278
2279 while ( (i >= 0) && (maxsplit-- > 0) ) {
2280 for (; i>=0; i--) {
2281 if (Py_STRING_MATCH(s, i, sub, n)) {
2282 SPLIT_ADD(s, i + n, j);
2283 j = i;
2284 i -= n;
2285 break;
2286 }
2287 }
2288 }
2289 SPLIT_ADD(s, 0, j);
2290 FIX_PREALLOC_SIZE(list);
2291 if (PyList_Reverse(list) < 0)
2292 goto onError;
2293 return list;
2294
2295onError:
2296 Py_DECREF(list);
2297 return NULL;
2298}
2299
2300PyDoc_STRVAR(extend__doc__,
2301"B.extend(iterable int) -> None\n\
2302\n\
2303Append all the elements from the iterator or sequence to the\n\
2304end of the bytes.");
2305static PyObject *
2306bytes_extend(PyBytesObject *self, PyObject *arg)
2307{
2308 if (bytes_setslice(self, self->ob_size, self->ob_size, arg) == -1)
2309 return NULL;
2310 Py_RETURN_NONE;
2311}
2312
2313
2314PyDoc_STRVAR(reverse__doc__,
2315"B.reverse() -> None\n\
2316\n\
2317Reverse the order of the values in bytes in place.");
2318static PyObject *
2319bytes_reverse(PyBytesObject *self, PyObject *unused)
2320{
2321 char swap, *head, *tail;
2322 Py_ssize_t i, j, n = self->ob_size;
2323
2324 j = n / 2;
2325 head = self->ob_bytes;
2326 tail = head + n - 1;
2327 for (i = 0; i < j; i++) {
2328 swap = *head;
2329 *head++ = *tail;
2330 *tail-- = swap;
2331 }
2332
2333 Py_RETURN_NONE;
2334}
2335
2336PyDoc_STRVAR(insert__doc__,
2337"B.insert(index, int) -> None\n\
2338\n\
2339Insert a single item into the bytes before the given index.");
2340static PyObject *
2341bytes_insert(PyBytesObject *self, PyObject *args)
2342{
2343 int value;
2344 Py_ssize_t where, n = self->ob_size;
2345
2346 if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
2347 return NULL;
2348
2349 if (n == PY_SSIZE_T_MAX) {
2350 PyErr_SetString(PyExc_OverflowError,
2351 "cannot add more objects to bytes");
2352 return NULL;
2353 }
2354 if (value < 0 || value >= 256) {
2355 PyErr_SetString(PyExc_ValueError,
2356 "byte must be in range(0, 256)");
2357 return NULL;
2358 }
2359 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2360 return NULL;
2361
2362 if (where < 0) {
2363 where += n;
2364 if (where < 0)
2365 where = 0;
2366 }
2367 if (where > n)
2368 where = n;
Guido van Rossum4fc8ae42007-02-27 20:57:45 +00002369 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
Neal Norwitz6968b052007-02-27 19:02:19 +00002370 self->ob_bytes[where] = value;
2371
2372 Py_RETURN_NONE;
2373}
2374
2375PyDoc_STRVAR(append__doc__,
2376"B.append(int) -> None\n\
2377\n\
2378Append a single item to the end of the bytes.");
2379static PyObject *
2380bytes_append(PyBytesObject *self, PyObject *arg)
2381{
2382 int value;
2383 Py_ssize_t n = self->ob_size;
2384
2385 if (! _getbytevalue(arg, &value))
2386 return NULL;
2387 if (n == PY_SSIZE_T_MAX) {
2388 PyErr_SetString(PyExc_OverflowError,
2389 "cannot add more objects to bytes");
2390 return NULL;
2391 }
2392 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2393 return NULL;
2394
2395 self->ob_bytes[n] = value;
2396
2397 Py_RETURN_NONE;
2398}
2399
2400PyDoc_STRVAR(pop__doc__,
2401"B.pop([index]) -> int\n\
2402\n\
2403Remove and return a single item from the bytes. If no index\n\
2404argument is give, will pop the last value.");
2405static PyObject *
2406bytes_pop(PyBytesObject *self, PyObject *args)
2407{
2408 int value;
2409 Py_ssize_t where = -1, n = self->ob_size;
2410
2411 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2412 return NULL;
2413
2414 if (n == 0) {
2415 PyErr_SetString(PyExc_OverflowError,
2416 "cannot pop an empty bytes");
2417 return NULL;
2418 }
2419 if (where < 0)
2420 where += self->ob_size;
2421 if (where < 0 || where >= self->ob_size) {
2422 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2423 return NULL;
2424 }
2425
2426 value = self->ob_bytes[where];
2427 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2428 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2429 return NULL;
2430
2431 return PyInt_FromLong(value);
2432}
2433
2434PyDoc_STRVAR(remove__doc__,
2435"B.remove(int) -> None\n\
2436\n\
2437Remove the first occurance of a value in bytes");
2438static PyObject *
2439bytes_remove(PyBytesObject *self, PyObject *arg)
2440{
2441 int value;
2442 Py_ssize_t where, n = self->ob_size;
2443
2444 if (! _getbytevalue(arg, &value))
2445 return NULL;
2446
2447 for (where = 0; where < n; where++) {
2448 if (self->ob_bytes[where] == value)
2449 break;
2450 }
2451 if (where == n) {
2452 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2453 return NULL;
2454 }
2455
2456 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2457 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2458 return NULL;
2459
2460 Py_RETURN_NONE;
2461}
2462
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002463/* XXX These two helpers could be optimized if argsize == 1 */
2464
2465Py_ssize_t
2466lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2467 void *argptr, Py_ssize_t argsize)
2468{
2469 Py_ssize_t i = 0;
2470 while (i < mysize && memchr(argptr, myptr[i], argsize))
2471 i++;
2472 return i;
2473}
2474
2475Py_ssize_t
2476rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2477 void *argptr, Py_ssize_t argsize)
2478{
2479 Py_ssize_t i = mysize - 1;
2480 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2481 i--;
2482 return i + 1;
2483}
2484
2485PyDoc_STRVAR(strip__doc__,
2486"B.strip(bytes) -> bytes\n\
2487\n\
2488Strip leading and trailing bytes contained in the argument.");
2489static PyObject *
2490bytes_strip(PyBytesObject *self, PyObject *arg)
2491{
2492 Py_ssize_t left, right, mysize, argsize;
2493 void *myptr, *argptr;
2494 if (arg == NULL || !PyBytes_Check(arg)) {
2495 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2496 return NULL;
2497 }
2498 myptr = self->ob_bytes;
2499 mysize = self->ob_size;
2500 argptr = ((PyBytesObject *)arg)->ob_bytes;
2501 argsize = ((PyBytesObject *)arg)->ob_size;
2502 left = lstrip_helper(myptr, mysize, argptr, argsize);
2503 right = rstrip_helper(myptr, mysize, argptr, argsize);
2504 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2505}
2506
2507PyDoc_STRVAR(lstrip__doc__,
2508"B.lstrip(bytes) -> bytes\n\
2509\n\
2510Strip leading bytes contained in the argument.");
2511static PyObject *
2512bytes_lstrip(PyBytesObject *self, PyObject *arg)
2513{
2514 Py_ssize_t left, right, mysize, argsize;
2515 void *myptr, *argptr;
2516 if (arg == NULL || !PyBytes_Check(arg)) {
2517 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2518 return NULL;
2519 }
2520 myptr = self->ob_bytes;
2521 mysize = self->ob_size;
2522 argptr = ((PyBytesObject *)arg)->ob_bytes;
2523 argsize = ((PyBytesObject *)arg)->ob_size;
2524 left = lstrip_helper(myptr, mysize, argptr, argsize);
2525 right = mysize;
2526 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2527}
2528
2529PyDoc_STRVAR(rstrip__doc__,
2530"B.rstrip(bytes) -> bytes\n\
2531\n\
2532Strip trailing bytes contained in the argument.");
2533static PyObject *
2534bytes_rstrip(PyBytesObject *self, PyObject *arg)
2535{
2536 Py_ssize_t left, right, mysize, argsize;
2537 void *myptr, *argptr;
2538 if (arg == NULL || !PyBytes_Check(arg)) {
2539 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2540 return NULL;
2541 }
2542 myptr = self->ob_bytes;
2543 mysize = self->ob_size;
2544 argptr = ((PyBytesObject *)arg)->ob_bytes;
2545 argsize = ((PyBytesObject *)arg)->ob_size;
2546 left = 0;
2547 right = rstrip_helper(myptr, mysize, argptr, argsize);
2548 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2549}
Neal Norwitz6968b052007-02-27 19:02:19 +00002550
Guido van Rossumd624f182006-04-24 13:47:05 +00002551PyDoc_STRVAR(decode_doc,
2552"B.decode([encoding[,errors]]) -> unicode obect.\n\
2553\n\
2554Decodes B using the codec registered for encoding. encoding defaults\n\
2555to the default encoding. errors may be given to set a different error\n\
2556handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2557a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2558as well as any other name registerd with codecs.register_error that is\n\
2559able to handle UnicodeDecodeErrors.");
2560
2561static PyObject *
2562bytes_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002563{
Guido van Rossumd624f182006-04-24 13:47:05 +00002564 const char *encoding = NULL;
2565 const char *errors = NULL;
2566
2567 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2568 return NULL;
2569 if (encoding == NULL)
2570 encoding = PyUnicode_GetDefaultEncoding();
2571 return PyCodec_Decode(self, encoding, errors);
2572}
2573
Guido van Rossuma0867f72006-05-05 04:34:18 +00002574PyDoc_STRVAR(alloc_doc,
2575"B.__alloc__() -> int\n\
2576\n\
2577Returns the number of bytes actually allocated.");
2578
2579static PyObject *
2580bytes_alloc(PyBytesObject *self)
2581{
2582 return PyInt_FromSsize_t(self->ob_alloc);
2583}
2584
Guido van Rossum20188312006-05-05 15:15:40 +00002585PyDoc_STRVAR(join_doc,
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002586"B.join(iterable_of_bytes) -> bytes\n\
Guido van Rossum20188312006-05-05 15:15:40 +00002587\n\
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002588Concatenates any number of bytes objects, with B in between each pair.\n\
2589Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
Guido van Rossum20188312006-05-05 15:15:40 +00002590
2591static PyObject *
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002592bytes_join(PyBytesObject *self, PyObject *it)
Guido van Rossum20188312006-05-05 15:15:40 +00002593{
2594 PyObject *seq;
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002595 Py_ssize_t mysize = self->ob_size;
Guido van Rossum20188312006-05-05 15:15:40 +00002596 Py_ssize_t i;
2597 Py_ssize_t n;
2598 PyObject **items;
2599 Py_ssize_t totalsize = 0;
2600 PyObject *result;
2601 char *dest;
2602
2603 seq = PySequence_Fast(it, "can only join an iterable");
2604 if (seq == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002605 return NULL;
Guido van Rossum20188312006-05-05 15:15:40 +00002606 n = PySequence_Fast_GET_SIZE(seq);
2607 items = PySequence_Fast_ITEMS(seq);
2608
2609 /* Compute the total size, and check that they are all bytes */
2610 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002611 PyObject *obj = items[i];
2612 if (!PyBytes_Check(obj)) {
2613 PyErr_Format(PyExc_TypeError,
2614 "can only join an iterable of bytes "
2615 "(item %ld has type '%.100s')",
Guido van Rossum3cf5b1e2006-07-27 21:53:35 +00002616 /* XXX %ld isn't right on Win64 */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002617 (long)i, obj->ob_type->tp_name);
2618 goto error;
2619 }
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002620 if (i > 0)
2621 totalsize += mysize;
Georg Brandlb3f568f2007-02-27 08:49:18 +00002622 totalsize += PyBytes_GET_SIZE(obj);
2623 if (totalsize < 0) {
2624 PyErr_NoMemory();
2625 goto error;
2626 }
Guido van Rossum20188312006-05-05 15:15:40 +00002627 }
2628
2629 /* Allocate the result, and copy the bytes */
2630 result = PyBytes_FromStringAndSize(NULL, totalsize);
2631 if (result == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002632 goto error;
Guido van Rossum20188312006-05-05 15:15:40 +00002633 dest = PyBytes_AS_STRING(result);
2634 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002635 PyObject *obj = items[i];
2636 Py_ssize_t size = PyBytes_GET_SIZE(obj);
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002637 if (i > 0) {
2638 memcpy(dest, self->ob_bytes, mysize);
2639 dest += mysize;
2640 }
Georg Brandlb3f568f2007-02-27 08:49:18 +00002641 memcpy(dest, PyBytes_AS_STRING(obj), size);
2642 dest += size;
Guido van Rossum20188312006-05-05 15:15:40 +00002643 }
2644
2645 /* Done */
2646 Py_DECREF(seq);
2647 return result;
2648
2649 /* Error handling */
2650 error:
2651 Py_DECREF(seq);
2652 return NULL;
2653}
2654
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002655PyDoc_STRVAR(fromhex_doc,
2656"bytes.fromhex(string) -> bytes\n\
2657\n\
2658Create a bytes object from a string of hexadecimal numbers.\n\
2659Spaces between two numbers are accepted. Example:\n\
2660bytes.fromhex('10 2030') -> bytes([0x10, 0x20, 0x30]).");
2661
2662static int
2663hex_digit_to_int(int c)
2664{
Georg Brandlb3f568f2007-02-27 08:49:18 +00002665 if (isdigit(c))
2666 return c - '0';
2667 else {
2668 if (isupper(c))
2669 c = tolower(c);
2670 if (c >= 'a' && c <= 'f')
2671 return c - 'a' + 10;
2672 }
2673 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002674}
2675
2676static PyObject *
2677bytes_fromhex(PyObject *cls, PyObject *args)
2678{
2679 PyObject *newbytes;
2680 char *hex, *buf;
2681 Py_ssize_t len, byteslen, i, j;
2682 int top, bot;
2683
2684 if (!PyArg_ParseTuple(args, "s#:fromhex", &hex, &len))
2685 return NULL;
2686
2687 byteslen = len / 2; /* max length if there are no spaces */
2688
2689 newbytes = PyBytes_FromStringAndSize(NULL, byteslen);
2690 if (!newbytes)
2691 return NULL;
2692 buf = PyBytes_AS_STRING(newbytes);
2693
Guido van Rossum4355a472007-05-04 05:00:04 +00002694 for (i = j = 0; i < len; i += 2) {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002695 /* skip over spaces in the input */
2696 while (Py_CHARMASK(hex[i]) == ' ')
2697 i++;
2698 if (i >= len)
2699 break;
2700 top = hex_digit_to_int(Py_CHARMASK(hex[i]));
2701 bot = hex_digit_to_int(Py_CHARMASK(hex[i+1]));
2702 if (top == -1 || bot == -1) {
2703 PyErr_Format(PyExc_ValueError,
2704 "non-hexadecimal number string '%c%c' found in "
2705 "fromhex() arg at position %zd",
2706 hex[i], hex[i+1], i);
2707 goto error;
2708 }
2709 buf[j++] = (top << 4) + bot;
2710 }
2711 if (PyBytes_Resize(newbytes, j) < 0)
2712 goto error;
2713 return newbytes;
2714
2715 error:
2716 Py_DECREF(newbytes);
2717 return NULL;
2718}
2719
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002720PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2721
2722static PyObject *
2723bytes_reduce(PyBytesObject *self)
2724{
2725 return Py_BuildValue("(O(s#))",
2726 self->ob_type,
2727 self->ob_bytes == NULL ? "" : self->ob_bytes,
2728 self->ob_size);
2729}
2730
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002731static PySequenceMethods bytes_as_sequence = {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002732 (lenfunc)bytes_length, /* sq_length */
2733 (binaryfunc)bytes_concat, /* sq_concat */
2734 (ssizeargfunc)bytes_repeat, /* sq_repeat */
2735 (ssizeargfunc)bytes_getitem, /* sq_item */
2736 0, /* sq_slice */
2737 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
2738 0, /* sq_ass_slice */
Guido van Rossumd624f182006-04-24 13:47:05 +00002739 (objobjproc)bytes_contains, /* sq_contains */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002740 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
2741 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002742};
2743
2744static PyMappingMethods bytes_as_mapping = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002745 (lenfunc)bytes_length,
Thomas Wouters376446d2006-12-19 08:30:14 +00002746 (binaryfunc)bytes_subscript,
2747 (objobjargproc)bytes_ass_subscript,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002748};
2749
2750static PyBufferProcs bytes_as_buffer = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002751 (readbufferproc)bytes_getbuffer,
2752 (writebufferproc)bytes_getbuffer,
2753 (segcountproc)bytes_getsegcount,
2754 /* XXX Bytes are not characters! But we need to implement
2755 bf_getcharbuffer() so we can be used as 't#' argument to codecs. */
2756 (charbufferproc)bytes_getbuffer,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002757};
2758
2759static PyMethodDef
2760bytes_methods[] = {
Neal Norwitz6968b052007-02-27 19:02:19 +00002761 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2762 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2763 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2764 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2765 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2766 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
2767 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2768 startswith__doc__},
2769 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2770 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, translate__doc__},
2771 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2772 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
2773 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2774 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2775 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
2776 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
2777 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
2778 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
2779 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
2780 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002781 {"strip", (PyCFunction)bytes_strip, METH_O, strip__doc__},
2782 {"lstrip", (PyCFunction)bytes_lstrip, METH_O, lstrip__doc__},
2783 {"rstrip", (PyCFunction)bytes_rstrip, METH_O, rstrip__doc__},
Guido van Rossumd624f182006-04-24 13:47:05 +00002784 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002785 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002786 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2787 fromhex_doc},
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002788 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002789 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002790 {NULL}
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002791};
2792
2793PyDoc_STRVAR(bytes_doc,
2794"bytes([iterable]) -> new array of bytes.\n\
2795\n\
2796If an argument is given it must be an iterable yielding ints in range(256).");
2797
2798PyTypeObject PyBytes_Type = {
2799 PyObject_HEAD_INIT(&PyType_Type)
2800 0,
2801 "bytes",
2802 sizeof(PyBytesObject),
2803 0,
Guido van Rossumd624f182006-04-24 13:47:05 +00002804 (destructor)bytes_dealloc, /* tp_dealloc */
2805 0, /* tp_print */
2806 0, /* tp_getattr */
2807 0, /* tp_setattr */
2808 0, /* tp_compare */
2809 (reprfunc)bytes_repr, /* tp_repr */
2810 0, /* tp_as_number */
2811 &bytes_as_sequence, /* tp_as_sequence */
2812 &bytes_as_mapping, /* tp_as_mapping */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002813 0, /* tp_hash */
Guido van Rossumd624f182006-04-24 13:47:05 +00002814 0, /* tp_call */
2815 (reprfunc)bytes_str, /* tp_str */
2816 PyObject_GenericGetAttr, /* tp_getattro */
2817 0, /* tp_setattro */
2818 &bytes_as_buffer, /* tp_as_buffer */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002819 /* bytes is 'final' or 'sealed' */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002820 Py_TPFLAGS_DEFAULT, /* tp_flags */
Guido van Rossumd624f182006-04-24 13:47:05 +00002821 bytes_doc, /* tp_doc */
2822 0, /* tp_traverse */
2823 0, /* tp_clear */
2824 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2825 0, /* tp_weaklistoffset */
2826 0, /* tp_iter */
2827 0, /* tp_iternext */
2828 bytes_methods, /* tp_methods */
2829 0, /* tp_members */
2830 0, /* tp_getset */
2831 0, /* tp_base */
2832 0, /* tp_dict */
2833 0, /* tp_descr_get */
2834 0, /* tp_descr_set */
2835 0, /* tp_dictoffset */
2836 (initproc)bytes_init, /* tp_init */
2837 PyType_GenericAlloc, /* tp_alloc */
2838 PyType_GenericNew, /* tp_new */
2839 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002840};