blob: 75b7939c51cbb865b661666e021a22ff8f38b142 [file] [log] [blame]
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001/* Bytes object implementation */
2
3/* XXX TO DO: optimizations */
4
5#define PY_SSIZE_T_CLEAN
6#include "Python.h"
Guido van Rossuma0867f72006-05-05 04:34:18 +00007#include "structmember.h"
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00008
Neal Norwitz6968b052007-02-27 19:02:19 +00009/* The nullbytes are used by the stringlib during partition.
10 * If partition is removed from bytes, nullbytes and its helper
11 * Init/Fini should also be removed.
12 */
13static PyBytesObject *nullbytes = NULL;
14
15void
16PyBytes_Fini(void)
17{
18 Py_CLEAR(nullbytes);
19}
20
21int
22PyBytes_Init(void)
23{
24 nullbytes = PyObject_New(PyBytesObject, &PyBytes_Type);
25 if (nullbytes == NULL)
26 return 0;
27 nullbytes->ob_bytes = NULL;
28 nullbytes->ob_size = nullbytes->ob_alloc = 0;
29 return 1;
30}
31
32/* end nullbytes support */
33
Guido van Rossumad7d8d12007-04-13 01:39:34 +000034/* Helpers */
35
36static int
37_getbytevalue(PyObject* arg, int *value)
Neal Norwitz6968b052007-02-27 19:02:19 +000038{
39 PyObject *intarg = PyNumber_Int(arg);
40 if (! intarg)
41 return 0;
42 *value = PyInt_AsLong(intarg);
43 Py_DECREF(intarg);
44 if (*value < 0 || *value >= 256) {
45 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
46 return 0;
47 }
48 return 1;
49}
50
Guido van Rossumad7d8d12007-04-13 01:39:34 +000051Py_ssize_t
52_getbuffer(PyObject *obj, void **ptr)
53{
54 PyBufferProcs *buffer = obj->ob_type->tp_as_buffer;
55
56 if (buffer == NULL ||
57 PyUnicode_Check(obj) ||
58 buffer->bf_getreadbuffer == NULL ||
59 buffer->bf_getsegcount == NULL ||
60 buffer->bf_getsegcount(obj, NULL) != 1)
61 {
62 *ptr = NULL;
63 return -1;
64 }
65
66 return buffer->bf_getreadbuffer(obj, 0, ptr);
67}
68
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000069/* Direct API functions */
70
71PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +000072PyBytes_FromObject(PyObject *input)
73{
74 return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
75 input, NULL);
76}
77
78PyObject *
79PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000080{
81 PyBytesObject *new;
Guido van Rossumf15a29f2007-05-04 00:41:39 +000082 int alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000083
Guido van Rossumd624f182006-04-24 13:47:05 +000084 assert(size >= 0);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000085
86 new = PyObject_New(PyBytesObject, &PyBytes_Type);
87 if (new == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +000088 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000089
Guido van Rossumf15a29f2007-05-04 00:41:39 +000090 if (size == 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +000091 new->ob_bytes = NULL;
Guido van Rossumf15a29f2007-05-04 00:41:39 +000092 alloc = 0;
93 }
Guido van Rossumd624f182006-04-24 13:47:05 +000094 else {
Guido van Rossumf15a29f2007-05-04 00:41:39 +000095 alloc = size + 1;
96 new->ob_bytes = PyMem_Malloc(alloc);
Guido van Rossumd624f182006-04-24 13:47:05 +000097 if (new->ob_bytes == NULL) {
98 Py_DECREF(new);
99 return NULL;
100 }
101 if (bytes != NULL)
102 memcpy(new->ob_bytes, bytes, size);
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000103 new->ob_bytes[size] = '\0'; /* Trailing null byte */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000104 }
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000105 new->ob_size = size;
106 new->ob_alloc = alloc;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000107
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000108 return (PyObject *)new;
109}
110
111Py_ssize_t
112PyBytes_Size(PyObject *self)
113{
114 assert(self != NULL);
115 assert(PyBytes_Check(self));
116
Guido van Rossum20188312006-05-05 15:15:40 +0000117 return PyBytes_GET_SIZE(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000118}
119
120char *
121PyBytes_AsString(PyObject *self)
122{
123 assert(self != NULL);
124 assert(PyBytes_Check(self));
125
Guido van Rossum20188312006-05-05 15:15:40 +0000126 return PyBytes_AS_STRING(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000127}
128
129int
130PyBytes_Resize(PyObject *self, Py_ssize_t size)
131{
132 void *sval;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000133 Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000134
135 assert(self != NULL);
136 assert(PyBytes_Check(self));
137 assert(size >= 0);
138
Guido van Rossuma0867f72006-05-05 04:34:18 +0000139 if (size < alloc / 2) {
140 /* Major downsize; resize down to exact size */
141 alloc = size;
142 }
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000143 else if (size < alloc) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000144 /* Within allocated size; quick exit */
145 ((PyBytesObject *)self)->ob_size = size;
146 return 0;
147 }
148 else if (size <= alloc * 1.125) {
149 /* Moderate upsize; overallocate similar to list_resize() */
150 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
151 }
152 else {
153 /* Major upsize; resize up to exact size */
154 alloc = size;
155 }
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000156 if (alloc <= size)
157 alloc = size + 1;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000158
159 sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000160 if (sval == NULL) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000161 PyErr_NoMemory();
162 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000163 }
164
Guido van Rossumd624f182006-04-24 13:47:05 +0000165 ((PyBytesObject *)self)->ob_bytes = sval;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000166 ((PyBytesObject *)self)->ob_size = size;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000167 ((PyBytesObject *)self)->ob_alloc = alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000168
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000169 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
170
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000171 return 0;
172}
173
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000174PyObject *
175PyBytes_Concat(PyObject *a, PyObject *b)
176{
177 Py_ssize_t asize, bsize, size;
178 void *aptr, *bptr;
179 PyBytesObject *result;
180
181 asize = _getbuffer(a, &aptr);
182 bsize = _getbuffer(b, &bptr);
183 if (asize < 0 || bsize < 0) {
184 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
185 a->ob_type->tp_name, b->ob_type->tp_name);
186 return NULL;
187 }
188
189 size = asize + bsize;
190 if (size < 0)
191 return PyErr_NoMemory();
192
193 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
194 if (result != NULL) {
195 memcpy(result->ob_bytes, aptr, asize);
196 memcpy(result->ob_bytes + asize, bptr, bsize);
197 }
198 return (PyObject *)result;
199}
200
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000201/* Functions stuffed into the type object */
202
203static Py_ssize_t
204bytes_length(PyBytesObject *self)
205{
206 return self->ob_size;
207}
208
209static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000210bytes_concat(PyBytesObject *self, PyObject *other)
211{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000212 return PyBytes_Concat((PyObject *)self, other);
Guido van Rossumd624f182006-04-24 13:47:05 +0000213}
214
215static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000216bytes_iconcat(PyBytesObject *self, PyObject *other)
217{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000218 void *optr;
Guido van Rossum13e57212006-04-27 22:54:26 +0000219 Py_ssize_t osize;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000220 Py_ssize_t mysize;
Guido van Rossum13e57212006-04-27 22:54:26 +0000221 Py_ssize_t size;
222
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000223 osize = _getbuffer(other, &optr);
224 if (osize < 0) {
Guido van Rossum13e57212006-04-27 22:54:26 +0000225 PyErr_Format(PyExc_TypeError,
226 "can't concat bytes to %.100s", other->ob_type->tp_name);
227 return NULL;
228 }
229
230 mysize = self->ob_size;
Guido van Rossum13e57212006-04-27 22:54:26 +0000231 size = mysize + osize;
232 if (size < 0)
233 return PyErr_NoMemory();
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000234 if (size < self->ob_alloc)
Guido van Rossuma0867f72006-05-05 04:34:18 +0000235 self->ob_size = size;
236 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000237 return NULL;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000238 memcpy(self->ob_bytes + mysize, optr, osize);
Guido van Rossum13e57212006-04-27 22:54:26 +0000239 Py_INCREF(self);
240 return (PyObject *)self;
241}
242
243static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000244bytes_repeat(PyBytesObject *self, Py_ssize_t count)
245{
246 PyBytesObject *result;
247 Py_ssize_t mysize;
248 Py_ssize_t size;
249
250 if (count < 0)
251 count = 0;
252 mysize = self->ob_size;
253 size = mysize * count;
254 if (count != 0 && size / count != mysize)
255 return PyErr_NoMemory();
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000256 result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
Guido van Rossumd624f182006-04-24 13:47:05 +0000257 if (result != NULL && size != 0) {
258 if (mysize == 1)
259 memset(result->ob_bytes, self->ob_bytes[0], size);
260 else {
Guido van Rossum13e57212006-04-27 22:54:26 +0000261 Py_ssize_t i;
Guido van Rossumd624f182006-04-24 13:47:05 +0000262 for (i = 0; i < count; i++)
263 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
264 }
265 }
266 return (PyObject *)result;
267}
268
269static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000270bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
271{
272 Py_ssize_t mysize;
273 Py_ssize_t size;
274
275 if (count < 0)
276 count = 0;
277 mysize = self->ob_size;
278 size = mysize * count;
279 if (count != 0 && size / count != mysize)
280 return PyErr_NoMemory();
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000281 if (size < self->ob_alloc)
Guido van Rossuma0867f72006-05-05 04:34:18 +0000282 self->ob_size = size;
283 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000284 return NULL;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000285
Guido van Rossum13e57212006-04-27 22:54:26 +0000286 if (mysize == 1)
287 memset(self->ob_bytes, self->ob_bytes[0], size);
288 else {
289 Py_ssize_t i;
290 for (i = 1; i < count; i++)
291 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
292 }
293
294 Py_INCREF(self);
295 return (PyObject *)self;
296}
297
298static int
299bytes_substring(PyBytesObject *self, PyBytesObject *other)
300{
301 Py_ssize_t i;
302
303 if (other->ob_size == 1) {
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000304 return memchr(self->ob_bytes, other->ob_bytes[0],
Guido van Rossum13e57212006-04-27 22:54:26 +0000305 self->ob_size) != NULL;
306 }
307 if (other->ob_size == 0)
308 return 1; /* Edge case */
309 for (i = 0; i + other->ob_size <= self->ob_size; i++) {
310 /* XXX Yeah, yeah, lots of optimizations possible... */
311 if (memcmp(self->ob_bytes + i, other->ob_bytes, other->ob_size) == 0)
312 return 1;
313 }
314 return 0;
315}
316
317static int
318bytes_contains(PyBytesObject *self, PyObject *value)
319{
320 Py_ssize_t ival;
321
322 if (PyBytes_Check(value))
323 return bytes_substring(self, (PyBytesObject *)value);
324
Thomas Woutersd204a712006-08-22 13:41:17 +0000325 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossum13e57212006-04-27 22:54:26 +0000326 if (ival == -1 && PyErr_Occurred())
327 return -1;
Guido van Rossum13e57212006-04-27 22:54:26 +0000328 if (ival < 0 || ival >= 256) {
329 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
330 return -1;
331 }
332
333 return memchr(self->ob_bytes, ival, self->ob_size) != NULL;
334}
335
336static PyObject *
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000337bytes_getitem(PyBytesObject *self, Py_ssize_t i)
338{
339 if (i < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000340 i += self->ob_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000341 if (i < 0 || i >= self->ob_size) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000342 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
343 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000344 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000345 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
346}
347
348static PyObject *
Thomas Wouters376446d2006-12-19 08:30:14 +0000349bytes_subscript(PyBytesObject *self, PyObject *item)
Guido van Rossumd624f182006-04-24 13:47:05 +0000350{
Thomas Wouters376446d2006-12-19 08:30:14 +0000351 if (PyIndex_Check(item)) {
352 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000353
Thomas Wouters376446d2006-12-19 08:30:14 +0000354 if (i == -1 && PyErr_Occurred())
355 return NULL;
356
357 if (i < 0)
358 i += PyBytes_GET_SIZE(self);
359
360 if (i < 0 || i >= self->ob_size) {
361 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
362 return NULL;
363 }
364 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
365 }
366 else if (PySlice_Check(item)) {
367 Py_ssize_t start, stop, step, slicelength, cur, i;
368 if (PySlice_GetIndicesEx((PySliceObject *)item,
369 PyBytes_GET_SIZE(self),
370 &start, &stop, &step, &slicelength) < 0) {
371 return NULL;
372 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000373
Thomas Wouters376446d2006-12-19 08:30:14 +0000374 if (slicelength <= 0)
375 return PyBytes_FromStringAndSize("", 0);
376 else if (step == 1) {
377 return PyBytes_FromStringAndSize(self->ob_bytes + start,
378 slicelength);
379 }
380 else {
381 char *source_buf = PyBytes_AS_STRING(self);
382 char *result_buf = (char *)PyMem_Malloc(slicelength);
383 PyObject *result;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000384
Thomas Wouters376446d2006-12-19 08:30:14 +0000385 if (result_buf == NULL)
386 return PyErr_NoMemory();
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000387
Thomas Wouters376446d2006-12-19 08:30:14 +0000388 for (cur = start, i = 0; i < slicelength;
389 cur += step, i++) {
390 result_buf[i] = source_buf[cur];
391 }
392 result = PyBytes_FromStringAndSize(result_buf, slicelength);
393 PyMem_Free(result_buf);
394 return result;
395 }
396 }
397 else {
398 PyErr_SetString(PyExc_TypeError, "bytes indices must be integers");
399 return NULL;
400 }
401}
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000402
Guido van Rossumd624f182006-04-24 13:47:05 +0000403static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000404bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
Guido van Rossumd624f182006-04-24 13:47:05 +0000405 PyObject *values)
406{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000407 Py_ssize_t avail, needed;
408 void *bytes;
Guido van Rossumd624f182006-04-24 13:47:05 +0000409
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000410 if (values == (PyObject *)self) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000411 /* Make a copy an call this function recursively */
412 int err;
413 values = PyBytes_FromObject(values);
414 if (values == NULL)
415 return -1;
416 err = bytes_setslice(self, lo, hi, values);
417 Py_DECREF(values);
418 return err;
419 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000420 if (values == NULL) {
421 /* del b[lo:hi] */
422 bytes = NULL;
423 needed = 0;
424 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000425 else {
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000426 needed = _getbuffer(values, &bytes);
427 if (needed < 0) {
428 PyErr_Format(PyExc_TypeError,
429 "can't set bytes slice from %.100s",
430 values->ob_type->tp_name);
431 return -1;
432 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000433 }
434
435 if (lo < 0)
436 lo = 0;
Thomas Wouters9a6e62b2006-08-23 23:20:29 +0000437 if (hi < lo)
438 hi = lo;
Guido van Rossumd624f182006-04-24 13:47:05 +0000439 if (hi > self->ob_size)
440 hi = self->ob_size;
441
442 avail = hi - lo;
443 if (avail < 0)
444 lo = hi = avail = 0;
445
446 if (avail != needed) {
447 if (avail > needed) {
448 /*
449 0 lo hi old_size
450 | |<----avail----->|<-----tomove------>|
451 | |<-needed->|<-----tomove------>|
452 0 lo new_hi new_size
453 */
454 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
455 self->ob_size - hi);
456 }
Thomas Wouters376446d2006-12-19 08:30:14 +0000457 if (PyBytes_Resize((PyObject *)self,
Guido van Rossumd624f182006-04-24 13:47:05 +0000458 self->ob_size + needed - avail) < 0)
459 return -1;
460 if (avail < needed) {
461 /*
462 0 lo hi old_size
463 | |<-avail->|<-----tomove------>|
464 | |<----needed---->|<-----tomove------>|
465 0 lo new_hi new_size
466 */
467 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
468 self->ob_size - lo - needed);
469 }
470 }
471
472 if (needed > 0)
473 memcpy(self->ob_bytes + lo, bytes, needed);
474
475 return 0;
476}
477
478static int
479bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
480{
481 Py_ssize_t ival;
482
483 if (i < 0)
484 i += self->ob_size;
485
486 if (i < 0 || i >= self->ob_size) {
487 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
488 return -1;
489 }
490
491 if (value == NULL)
492 return bytes_setslice(self, i, i+1, NULL);
493
Thomas Woutersd204a712006-08-22 13:41:17 +0000494 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000495 if (ival == -1 && PyErr_Occurred())
496 return -1;
497
498 if (ival < 0 || ival >= 256) {
499 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
500 return -1;
501 }
502
503 self->ob_bytes[i] = ival;
504 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000505}
506
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000507static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000508bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
509{
510 Py_ssize_t start, stop, step, slicelen, needed;
511 char *bytes;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000512
Thomas Wouters376446d2006-12-19 08:30:14 +0000513 if (PyIndex_Check(item)) {
514 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
515
516 if (i == -1 && PyErr_Occurred())
517 return -1;
518
519 if (i < 0)
520 i += PyBytes_GET_SIZE(self);
521
522 if (i < 0 || i >= self->ob_size) {
523 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
524 return -1;
525 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000526
Thomas Wouters376446d2006-12-19 08:30:14 +0000527 if (values == NULL) {
528 /* Fall through to slice assignment */
529 start = i;
530 stop = i + 1;
531 step = 1;
532 slicelen = 1;
533 }
534 else {
535 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
536 if (ival == -1 && PyErr_Occurred())
537 return -1;
538 if (ival < 0 || ival >= 256) {
539 PyErr_SetString(PyExc_ValueError,
540 "byte must be in range(0, 256)");
541 return -1;
542 }
543 self->ob_bytes[i] = (char)ival;
544 return 0;
545 }
546 }
547 else if (PySlice_Check(item)) {
548 if (PySlice_GetIndicesEx((PySliceObject *)item,
549 PyBytes_GET_SIZE(self),
550 &start, &stop, &step, &slicelen) < 0) {
551 return -1;
552 }
553 }
554 else {
555 PyErr_SetString(PyExc_TypeError, "bytes indices must be integer");
556 return -1;
557 }
558
559 if (values == NULL) {
560 bytes = NULL;
561 needed = 0;
562 }
563 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
564 /* Make a copy an call this function recursively */
565 int err;
566 values = PyBytes_FromObject(values);
567 if (values == NULL)
568 return -1;
569 err = bytes_ass_subscript(self, item, values);
570 Py_DECREF(values);
571 return err;
572 }
573 else {
574 assert(PyBytes_Check(values));
575 bytes = ((PyBytesObject *)values)->ob_bytes;
576 needed = ((PyBytesObject *)values)->ob_size;
577 }
578 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
579 if ((step < 0 && start < stop) ||
580 (step > 0 && start > stop))
581 stop = start;
582 if (step == 1) {
583 if (slicelen != needed) {
584 if (slicelen > needed) {
585 /*
586 0 start stop old_size
587 | |<---slicelen--->|<-----tomove------>|
588 | |<-needed->|<-----tomove------>|
589 0 lo new_hi new_size
590 */
591 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
592 self->ob_size - stop);
593 }
594 if (PyBytes_Resize((PyObject *)self,
595 self->ob_size + needed - slicelen) < 0)
596 return -1;
597 if (slicelen < needed) {
598 /*
599 0 lo hi old_size
600 | |<-avail->|<-----tomove------>|
601 | |<----needed---->|<-----tomove------>|
602 0 lo new_hi new_size
603 */
604 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
605 self->ob_size - start - needed);
606 }
607 }
608
609 if (needed > 0)
610 memcpy(self->ob_bytes + start, bytes, needed);
611
612 return 0;
613 }
614 else {
615 if (needed == 0) {
616 /* Delete slice */
617 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000618
Thomas Wouters376446d2006-12-19 08:30:14 +0000619 if (step < 0) {
620 stop = start + 1;
621 start = stop + step * (slicelen - 1) - 1;
622 step = -step;
623 }
624 for (cur = start, i = 0;
625 i < slicelen; cur += step, i++) {
626 Py_ssize_t lim = step - 1;
627
628 if (cur + step >= PyBytes_GET_SIZE(self))
629 lim = PyBytes_GET_SIZE(self) - cur - 1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000630
Thomas Wouters376446d2006-12-19 08:30:14 +0000631 memmove(self->ob_bytes + cur - i,
632 self->ob_bytes + cur + 1, lim);
633 }
634 /* Move the tail of the bytes, in one chunk */
635 cur = start + slicelen*step;
636 if (cur < PyBytes_GET_SIZE(self)) {
637 memmove(self->ob_bytes + cur - slicelen,
638 self->ob_bytes + cur,
639 PyBytes_GET_SIZE(self) - cur);
640 }
641 if (PyBytes_Resize((PyObject *)self,
642 PyBytes_GET_SIZE(self) - slicelen) < 0)
643 return -1;
644
645 return 0;
646 }
647 else {
648 /* Assign slice */
649 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000650
Thomas Wouters376446d2006-12-19 08:30:14 +0000651 if (needed != slicelen) {
652 PyErr_Format(PyExc_ValueError,
653 "attempt to assign bytes of size %zd "
654 "to extended slice of size %zd",
655 needed, slicelen);
656 return -1;
657 }
658 for (cur = start, i = 0; i < slicelen; cur += step, i++)
659 self->ob_bytes[cur] = bytes[i];
660 return 0;
661 }
662 }
663}
664
665static int
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000666bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
667{
Guido van Rossumd624f182006-04-24 13:47:05 +0000668 static char *kwlist[] = {"source", "encoding", "errors", 0};
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000669 PyObject *arg = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +0000670 const char *encoding = NULL;
671 const char *errors = NULL;
672 Py_ssize_t count;
673 PyObject *it;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000674 PyObject *(*iternext)(PyObject *);
675
Guido van Rossuma0867f72006-05-05 04:34:18 +0000676 if (self->ob_size != 0) {
677 /* Empty previous contents (yes, do this first of all!) */
678 if (PyBytes_Resize((PyObject *)self, 0) < 0)
679 return -1;
680 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000681
Guido van Rossumd624f182006-04-24 13:47:05 +0000682 /* Parse arguments */
683 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
684 &arg, &encoding, &errors))
685 return -1;
686
687 /* Make a quick exit if no first argument */
688 if (arg == NULL) {
689 if (encoding != NULL || errors != NULL) {
690 PyErr_SetString(PyExc_TypeError,
691 "encoding or errors without sequence argument");
692 return -1;
693 }
694 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000695 }
696
Guido van Rossumd624f182006-04-24 13:47:05 +0000697 if (PyUnicode_Check(arg)) {
698 /* Encode via the codec registry */
699 PyObject *encoded;
700 char *bytes;
701 Py_ssize_t size;
702 if (encoding == NULL)
703 encoding = PyUnicode_GetDefaultEncoding();
704 encoded = PyCodec_Encode(arg, encoding, errors);
705 if (encoded == NULL)
706 return -1;
707 if (!PyString_Check(encoded)) {
708 PyErr_Format(PyExc_TypeError,
709 "encoder did not return a string object (type=%.400s)",
710 encoded->ob_type->tp_name);
711 Py_DECREF(encoded);
712 return -1;
713 }
714 bytes = PyString_AS_STRING(encoded);
715 size = PyString_GET_SIZE(encoded);
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000716 if (size < self->ob_alloc)
Guido van Rossuma0867f72006-05-05 04:34:18 +0000717 self->ob_size = size;
718 else if (PyBytes_Resize((PyObject *)self, size) < 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000719 Py_DECREF(encoded);
720 return -1;
721 }
722 memcpy(self->ob_bytes, bytes, size);
723 Py_DECREF(encoded);
724 return 0;
725 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000726
Guido van Rossumd624f182006-04-24 13:47:05 +0000727 /* If it's not unicode, there can't be encoding or errors */
728 if (encoding != NULL || errors != NULL) {
729 PyErr_SetString(PyExc_TypeError,
730 "encoding or errors without a string argument");
731 return -1;
732 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000733
Guido van Rossumd624f182006-04-24 13:47:05 +0000734 /* Is it an int? */
Thomas Woutersd204a712006-08-22 13:41:17 +0000735 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000736 if (count == -1 && PyErr_Occurred())
737 PyErr_Clear();
738 else {
739 if (count < 0) {
740 PyErr_SetString(PyExc_ValueError, "negative count");
741 return -1;
742 }
743 if (count > 0) {
744 if (PyBytes_Resize((PyObject *)self, count))
745 return -1;
746 memset(self->ob_bytes, 0, count);
747 }
748 return 0;
749 }
750
751 if (PyObject_CheckReadBuffer(arg)) {
752 const void *bytes;
753 Py_ssize_t size;
754 if (PyObject_AsReadBuffer(arg, &bytes, &size) < 0)
755 return -1;
756 if (PyBytes_Resize((PyObject *)self, size) < 0)
757 return -1;
758 memcpy(self->ob_bytes, bytes, size);
759 return 0;
760 }
761
762 /* XXX Optimize this if the arguments is a list, tuple */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000763
764 /* Get the iterator */
765 it = PyObject_GetIter(arg);
766 if (it == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000767 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000768 iternext = *it->ob_type->tp_iternext;
769
770 /* Run the iterator to exhaustion */
771 for (;;) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000772 PyObject *item;
773 Py_ssize_t value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000774
Guido van Rossumd624f182006-04-24 13:47:05 +0000775 /* Get the next item */
776 item = iternext(it);
777 if (item == NULL) {
778 if (PyErr_Occurred()) {
779 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
780 goto error;
781 PyErr_Clear();
782 }
783 break;
784 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000785
Guido van Rossumd624f182006-04-24 13:47:05 +0000786 /* Interpret it as an int (__index__) */
Thomas Woutersd204a712006-08-22 13:41:17 +0000787 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000788 Py_DECREF(item);
789 if (value == -1 && PyErr_Occurred())
790 goto error;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000791
Guido van Rossumd624f182006-04-24 13:47:05 +0000792 /* Range check */
793 if (value < 0 || value >= 256) {
794 PyErr_SetString(PyExc_ValueError,
795 "bytes must be in range(0, 256)");
796 goto error;
797 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000798
Guido van Rossumd624f182006-04-24 13:47:05 +0000799 /* Append the byte */
Guido van Rossuma0867f72006-05-05 04:34:18 +0000800 if (self->ob_size < self->ob_alloc)
801 self->ob_size++;
802 else if (PyBytes_Resize((PyObject *)self, self->ob_size+1) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000803 goto error;
804 self->ob_bytes[self->ob_size-1] = value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000805 }
806
807 /* Clean up and return success */
808 Py_DECREF(it);
809 return 0;
810
811 error:
812 /* Error handling when it != NULL */
813 Py_DECREF(it);
814 return -1;
815}
816
Georg Brandlee91be42007-02-24 19:41:35 +0000817/* Mostly copied from string_repr, but without the
818 "smart quote" functionality. */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000819static PyObject *
820bytes_repr(PyBytesObject *self)
821{
Georg Brandlee91be42007-02-24 19:41:35 +0000822 size_t newsize = 3 + 4 * self->ob_size;
823 PyObject *v;
824 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != self->ob_size) {
825 PyErr_SetString(PyExc_OverflowError,
826 "bytes object is too large to make repr");
Guido van Rossumd624f182006-04-24 13:47:05 +0000827 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000828 }
Georg Brandlee91be42007-02-24 19:41:35 +0000829 v = PyString_FromStringAndSize((char *)NULL, newsize);
830 if (v == NULL) {
831 return NULL;
832 }
833 else {
834 register Py_ssize_t i;
835 register char c;
836 register char *p;
837 int quote = '\'';
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000838
Georg Brandlee91be42007-02-24 19:41:35 +0000839 p = PyString_AS_STRING(v);
840 *p++ = 'b';
841 *p++ = quote;
842 for (i = 0; i < self->ob_size; i++) {
843 /* There's at least enough room for a hex escape
844 and a closing quote. */
845 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
846 c = self->ob_bytes[i];
847 if (c == quote || c == '\\')
848 *p++ = '\\', *p++ = c;
849 else if (c == '\t')
850 *p++ = '\\', *p++ = 't';
851 else if (c == '\n')
852 *p++ = '\\', *p++ = 'n';
853 else if (c == '\r')
854 *p++ = '\\', *p++ = 'r';
855 else if (c == 0)
856 *p++ = '\\', *p++ = '0';
857 else if (c < ' ' || c >= 0x7f) {
858 /* For performance, we don't want to call
859 PyOS_snprintf here (extra layers of
860 function call). */
861 sprintf(p, "\\x%02x", c & 0xff);
862 p += 4;
863 }
864 else
865 *p++ = c;
866 }
867 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
868 *p++ = quote;
869 *p = '\0';
870 _PyString_Resize(
871 &v, (p - PyString_AS_STRING(v)));
872 return v;
873 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000874}
875
876static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000877bytes_str(PyBytesObject *self)
878{
879 return PyString_FromStringAndSize(self->ob_bytes, self->ob_size);
880}
881
882static PyObject *
Guido van Rossum343e97f2007-04-09 00:43:24 +0000883bytes_richcompare(PyObject *self, PyObject *other, int op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000884{
Guido van Rossum343e97f2007-04-09 00:43:24 +0000885 Py_ssize_t self_size, other_size;
886 void *self_bytes, *other_bytes;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000887 PyObject *res;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000888 Py_ssize_t minsize;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000889 int cmp;
890
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000891 /* Bytes can be compared to anything that supports the (binary) buffer
892 API. Except Unicode. */
Guido van Rossumebea9be2007-04-09 00:49:13 +0000893
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000894 self_size = _getbuffer(self, &self_bytes);
895 if (self_size < 0) {
Guido van Rossumebea9be2007-04-09 00:49:13 +0000896 Py_INCREF(Py_NotImplemented);
897 return Py_NotImplemented;
898 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000899
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000900 other_size = _getbuffer(other, &other_bytes);
901 if (other_size < 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000902 Py_INCREF(Py_NotImplemented);
903 return Py_NotImplemented;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000904 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000905
906 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000907 /* Shortcut: if the lengths differ, the objects differ */
908 cmp = (op == Py_NE);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000909 }
910 else {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000911 minsize = self_size;
912 if (other_size < minsize)
913 minsize = other_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000914
Guido van Rossum343e97f2007-04-09 00:43:24 +0000915 cmp = memcmp(self_bytes, other_bytes, minsize);
Guido van Rossumd624f182006-04-24 13:47:05 +0000916 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000917
Guido van Rossumd624f182006-04-24 13:47:05 +0000918 if (cmp == 0) {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000919 if (self_size < other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +0000920 cmp = -1;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000921 else if (self_size > other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +0000922 cmp = 1;
923 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000924
Guido van Rossumd624f182006-04-24 13:47:05 +0000925 switch (op) {
926 case Py_LT: cmp = cmp < 0; break;
927 case Py_LE: cmp = cmp <= 0; break;
928 case Py_EQ: cmp = cmp == 0; break;
929 case Py_NE: cmp = cmp != 0; break;
930 case Py_GT: cmp = cmp > 0; break;
931 case Py_GE: cmp = cmp >= 0; break;
932 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000933 }
934
935 res = cmp ? Py_True : Py_False;
936 Py_INCREF(res);
937 return res;
938}
939
940static void
941bytes_dealloc(PyBytesObject *self)
942{
Guido van Rossumd624f182006-04-24 13:47:05 +0000943 if (self->ob_bytes != 0) {
944 PyMem_Free(self->ob_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000945 }
946 self->ob_type->tp_free((PyObject *)self);
947}
948
Guido van Rossumd624f182006-04-24 13:47:05 +0000949static Py_ssize_t
950bytes_getbuffer(PyBytesObject *self, Py_ssize_t index, const void **ptr)
951{
952 if (index != 0) {
953 PyErr_SetString(PyExc_SystemError,
Neal Norwitz6968b052007-02-27 19:02:19 +0000954 "accessing non-existent bytes segment");
Guido van Rossumd624f182006-04-24 13:47:05 +0000955 return -1;
956 }
957 *ptr = (void *)self->ob_bytes;
958 return self->ob_size;
959}
960
961static Py_ssize_t
962bytes_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
963{
964 if (lenp)
965 *lenp = self->ob_size;
966 return 1;
967}
968
Neal Norwitz6968b052007-02-27 19:02:19 +0000969
970
971/* -------------------------------------------------------------------- */
972/* Methods */
973
974#define STRINGLIB_CHAR char
975#define STRINGLIB_CMP memcmp
976#define STRINGLIB_LEN PyBytes_GET_SIZE
977#define STRINGLIB_NEW PyBytes_FromStringAndSize
978#define STRINGLIB_EMPTY nullbytes
979
980#include "stringlib/fastsearch.h"
981#include "stringlib/count.h"
982#include "stringlib/find.h"
983#include "stringlib/partition.h"
984
985
986/* The following Py_LOCAL_INLINE and Py_LOCAL functions
987were copied from the old char* style string object. */
988
989Py_LOCAL_INLINE(void)
990_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
991{
992 if (*end > len)
993 *end = len;
994 else if (*end < 0)
995 *end += len;
996 if (*end < 0)
997 *end = 0;
998 if (*start < 0)
999 *start += len;
1000 if (*start < 0)
1001 *start = 0;
1002}
1003
1004
1005Py_LOCAL_INLINE(Py_ssize_t)
1006bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
1007{
1008 PyObject *subobj;
1009 const char *sub;
1010 Py_ssize_t sub_len;
1011 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1012
1013 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1014 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1015 return -2;
1016 if (PyBytes_Check(subobj)) {
1017 sub = PyBytes_AS_STRING(subobj);
1018 sub_len = PyBytes_GET_SIZE(subobj);
1019 }
1020 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1021 /* XXX - the "expected a character buffer object" is pretty
1022 confusing for a non-expert. remap to something else ? */
1023 return -2;
1024
1025 if (dir > 0)
1026 return stringlib_find_slice(
1027 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1028 sub, sub_len, start, end);
1029 else
1030 return stringlib_rfind_slice(
1031 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1032 sub, sub_len, start, end);
1033}
1034
1035
1036PyDoc_STRVAR(find__doc__,
1037"B.find(sub [,start [,end]]) -> int\n\
1038\n\
1039Return the lowest index in B where subsection sub is found,\n\
1040such that sub is contained within s[start,end]. Optional\n\
1041arguments start and end are interpreted as in slice notation.\n\
1042\n\
1043Return -1 on failure.");
1044
1045static PyObject *
1046bytes_find(PyBytesObject *self, PyObject *args)
1047{
1048 Py_ssize_t result = bytes_find_internal(self, args, +1);
1049 if (result == -2)
1050 return NULL;
1051 return PyInt_FromSsize_t(result);
1052}
1053
1054PyDoc_STRVAR(count__doc__,
1055"B.count(sub[, start[, end]]) -> int\n\
1056\n\
1057Return the number of non-overlapping occurrences of subsection sub in\n\
1058bytes B[start:end]. Optional arguments start and end are interpreted\n\
1059as in slice notation.");
1060
1061static PyObject *
1062bytes_count(PyBytesObject *self, PyObject *args)
1063{
1064 PyObject *sub_obj;
1065 const char *str = PyBytes_AS_STRING(self), *sub;
1066 Py_ssize_t sub_len;
1067 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1068
1069 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1070 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1071 return NULL;
1072
1073 if (PyBytes_Check(sub_obj)) {
1074 sub = PyBytes_AS_STRING(sub_obj);
1075 sub_len = PyBytes_GET_SIZE(sub_obj);
1076 }
1077 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1078 return NULL;
1079
1080 _adjust_indices(&start, &end, PyString_GET_SIZE(self));
1081
1082 return PyInt_FromSsize_t(
1083 stringlib_count(str + start, end - start, sub, sub_len)
1084 );
1085}
1086
1087
1088PyDoc_STRVAR(index__doc__,
1089"B.index(sub [,start [,end]]) -> int\n\
1090\n\
1091Like B.find() but raise ValueError when the subsection is not found.");
1092
1093static PyObject *
1094bytes_index(PyBytesObject *self, PyObject *args)
1095{
1096 Py_ssize_t result = bytes_find_internal(self, args, +1);
1097 if (result == -2)
1098 return NULL;
1099 if (result == -1) {
1100 PyErr_SetString(PyExc_ValueError,
1101 "subsection not found");
1102 return NULL;
1103 }
1104 return PyInt_FromSsize_t(result);
1105}
1106
1107
1108PyDoc_STRVAR(rfind__doc__,
1109"B.rfind(sub [,start [,end]]) -> int\n\
1110\n\
1111Return the highest index in B where subsection sub is found,\n\
1112such that sub is contained within s[start,end]. Optional\n\
1113arguments start and end are interpreted as in slice notation.\n\
1114\n\
1115Return -1 on failure.");
1116
1117static PyObject *
1118bytes_rfind(PyBytesObject *self, PyObject *args)
1119{
1120 Py_ssize_t result = bytes_find_internal(self, args, -1);
1121 if (result == -2)
1122 return NULL;
1123 return PyInt_FromSsize_t(result);
1124}
1125
1126
1127PyDoc_STRVAR(rindex__doc__,
1128"B.rindex(sub [,start [,end]]) -> int\n\
1129\n\
1130Like B.rfind() but raise ValueError when the subsection is not found.");
1131
1132static PyObject *
1133bytes_rindex(PyBytesObject *self, PyObject *args)
1134{
1135 Py_ssize_t result = bytes_find_internal(self, args, -1);
1136 if (result == -2)
1137 return NULL;
1138 if (result == -1) {
1139 PyErr_SetString(PyExc_ValueError,
1140 "subsection not found");
1141 return NULL;
1142 }
1143 return PyInt_FromSsize_t(result);
1144}
1145
1146
1147/* Matches the end (direction >= 0) or start (direction < 0) of self
1148 * against substr, using the start and end arguments. Returns
1149 * -1 on error, 0 if not found and 1 if found.
1150 */
1151Py_LOCAL(int)
1152_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
1153 Py_ssize_t end, int direction)
1154{
1155 Py_ssize_t len = PyBytes_GET_SIZE(self);
1156 Py_ssize_t slen;
1157 const char* sub;
1158 const char* str;
1159
1160 if (PyBytes_Check(substr)) {
1161 sub = PyBytes_AS_STRING(substr);
1162 slen = PyBytes_GET_SIZE(substr);
1163 }
1164 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
1165 return -1;
1166 str = PyBytes_AS_STRING(self);
1167
1168 _adjust_indices(&start, &end, len);
1169
1170 if (direction < 0) {
1171 /* startswith */
1172 if (start+slen > len)
1173 return 0;
1174 } else {
1175 /* endswith */
1176 if (end-start < slen || start > len)
1177 return 0;
1178
1179 if (end-slen > start)
1180 start = end - slen;
1181 }
1182 if (end-start >= slen)
1183 return ! memcmp(str+start, sub, slen);
1184 return 0;
1185}
1186
1187
1188PyDoc_STRVAR(startswith__doc__,
1189"B.startswith(prefix[, start[, end]]) -> bool\n\
1190\n\
1191Return True if B starts with the specified prefix, False otherwise.\n\
1192With optional start, test B beginning at that position.\n\
1193With optional end, stop comparing B at that position.\n\
1194prefix can also be a tuple of strings to try.");
1195
1196static PyObject *
1197bytes_startswith(PyBytesObject *self, PyObject *args)
1198{
1199 Py_ssize_t start = 0;
1200 Py_ssize_t end = PY_SSIZE_T_MAX;
1201 PyObject *subobj;
1202 int result;
1203
1204 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1205 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1206 return NULL;
1207 if (PyTuple_Check(subobj)) {
1208 Py_ssize_t i;
1209 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1210 result = _bytes_tailmatch(self,
1211 PyTuple_GET_ITEM(subobj, i),
1212 start, end, -1);
1213 if (result == -1)
1214 return NULL;
1215 else if (result) {
1216 Py_RETURN_TRUE;
1217 }
1218 }
1219 Py_RETURN_FALSE;
1220 }
1221 result = _bytes_tailmatch(self, subobj, start, end, -1);
1222 if (result == -1)
1223 return NULL;
1224 else
1225 return PyBool_FromLong(result);
1226}
1227
1228PyDoc_STRVAR(endswith__doc__,
1229"B.endswith(suffix[, start[, end]]) -> bool\n\
1230\n\
1231Return True if B ends with the specified suffix, False otherwise.\n\
1232With optional start, test B beginning at that position.\n\
1233With optional end, stop comparing B at that position.\n\
1234suffix can also be a tuple of strings to try.");
1235
1236static PyObject *
1237bytes_endswith(PyBytesObject *self, PyObject *args)
1238{
1239 Py_ssize_t start = 0;
1240 Py_ssize_t end = PY_SSIZE_T_MAX;
1241 PyObject *subobj;
1242 int result;
1243
1244 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1245 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1246 return NULL;
1247 if (PyTuple_Check(subobj)) {
1248 Py_ssize_t i;
1249 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1250 result = _bytes_tailmatch(self,
1251 PyTuple_GET_ITEM(subobj, i),
1252 start, end, +1);
1253 if (result == -1)
1254 return NULL;
1255 else if (result) {
1256 Py_RETURN_TRUE;
1257 }
1258 }
1259 Py_RETURN_FALSE;
1260 }
1261 result = _bytes_tailmatch(self, subobj, start, end, +1);
1262 if (result == -1)
1263 return NULL;
1264 else
1265 return PyBool_FromLong(result);
1266}
1267
1268
1269
1270PyDoc_STRVAR(translate__doc__,
1271"B.translate(table [,deletechars]) -> bytes\n\
1272\n\
1273Return a copy of the bytes B, where all characters occurring\n\
1274in the optional argument deletechars are removed, and the\n\
1275remaining characters have been mapped through the given\n\
1276translation table, which must be a bytes of length 256.");
1277
1278static PyObject *
1279bytes_translate(PyBytesObject *self, PyObject *args)
1280{
1281 register char *input, *output;
1282 register const char *table;
1283 register Py_ssize_t i, c, changed = 0;
1284 PyObject *input_obj = (PyObject*)self;
1285 const char *table1, *output_start, *del_table=NULL;
1286 Py_ssize_t inlen, tablen, dellen = 0;
1287 PyObject *result;
1288 int trans_table[256];
1289 PyObject *tableobj, *delobj = NULL;
1290
1291 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1292 &tableobj, &delobj))
1293 return NULL;
1294
1295 if (PyBytes_Check(tableobj)) {
1296 table1 = PyBytes_AS_STRING(tableobj);
1297 tablen = PyBytes_GET_SIZE(tableobj);
1298 }
1299 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1300 return NULL;
1301
1302 if (tablen != 256) {
1303 PyErr_SetString(PyExc_ValueError,
1304 "translation table must be 256 characters long");
1305 return NULL;
1306 }
1307
1308 if (delobj != NULL) {
1309 if (PyBytes_Check(delobj)) {
1310 del_table = PyBytes_AS_STRING(delobj);
1311 dellen = PyBytes_GET_SIZE(delobj);
1312 }
1313 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1314 return NULL;
1315 }
1316 else {
1317 del_table = NULL;
1318 dellen = 0;
1319 }
1320
1321 table = table1;
1322 inlen = PyBytes_GET_SIZE(input_obj);
1323 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1324 if (result == NULL)
1325 return NULL;
1326 output_start = output = PyBytes_AsString(result);
1327 input = PyBytes_AS_STRING(input_obj);
1328
1329 if (dellen == 0) {
1330 /* If no deletions are required, use faster code */
1331 for (i = inlen; --i >= 0; ) {
1332 c = Py_CHARMASK(*input++);
1333 if (Py_CHARMASK((*output++ = table[c])) != c)
1334 changed = 1;
1335 }
1336 if (changed || !PyBytes_CheckExact(input_obj))
1337 return result;
1338 Py_DECREF(result);
1339 Py_INCREF(input_obj);
1340 return input_obj;
1341 }
1342
1343 for (i = 0; i < 256; i++)
1344 trans_table[i] = Py_CHARMASK(table[i]);
1345
1346 for (i = 0; i < dellen; i++)
1347 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1348
1349 for (i = inlen; --i >= 0; ) {
1350 c = Py_CHARMASK(*input++);
1351 if (trans_table[c] != -1)
1352 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1353 continue;
1354 changed = 1;
1355 }
1356 if (!changed && PyBytes_CheckExact(input_obj)) {
1357 Py_DECREF(result);
1358 Py_INCREF(input_obj);
1359 return input_obj;
1360 }
1361 /* Fix the size of the resulting string */
1362 if (inlen > 0)
1363 PyBytes_Resize(result, output - output_start);
1364 return result;
1365}
1366
1367
1368#define FORWARD 1
1369#define REVERSE -1
1370
1371/* find and count characters and substrings */
1372
1373#define findchar(target, target_len, c) \
1374 ((char *)memchr((const void *)(target), c, target_len))
1375
1376/* Don't call if length < 2 */
1377#define Py_STRING_MATCH(target, offset, pattern, length) \
1378 (target[offset] == pattern[0] && \
1379 target[offset+length-1] == pattern[length-1] && \
1380 !memcmp(target+offset+1, pattern+1, length-2) )
1381
1382
1383/* Bytes ops must return a string. */
1384/* If the object is subclass of bytes, create a copy */
1385Py_LOCAL(PyBytesObject *)
1386return_self(PyBytesObject *self)
1387{
1388 if (PyBytes_CheckExact(self)) {
1389 Py_INCREF(self);
1390 return (PyBytesObject *)self;
1391 }
1392 return (PyBytesObject *)PyBytes_FromStringAndSize(
1393 PyBytes_AS_STRING(self),
1394 PyBytes_GET_SIZE(self));
1395}
1396
1397Py_LOCAL_INLINE(Py_ssize_t)
1398countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1399{
1400 Py_ssize_t count=0;
1401 const char *start=target;
1402 const char *end=target+target_len;
1403
1404 while ( (start=findchar(start, end-start, c)) != NULL ) {
1405 count++;
1406 if (count >= maxcount)
1407 break;
1408 start += 1;
1409 }
1410 return count;
1411}
1412
1413Py_LOCAL(Py_ssize_t)
1414findstring(const char *target, Py_ssize_t target_len,
1415 const char *pattern, Py_ssize_t pattern_len,
1416 Py_ssize_t start,
1417 Py_ssize_t end,
1418 int direction)
1419{
1420 if (start < 0) {
1421 start += target_len;
1422 if (start < 0)
1423 start = 0;
1424 }
1425 if (end > target_len) {
1426 end = target_len;
1427 } else if (end < 0) {
1428 end += target_len;
1429 if (end < 0)
1430 end = 0;
1431 }
1432
1433 /* zero-length substrings always match at the first attempt */
1434 if (pattern_len == 0)
1435 return (direction > 0) ? start : end;
1436
1437 end -= pattern_len;
1438
1439 if (direction < 0) {
1440 for (; end >= start; end--)
1441 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1442 return end;
1443 } else {
1444 for (; start <= end; start++)
1445 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1446 return start;
1447 }
1448 return -1;
1449}
1450
1451Py_LOCAL_INLINE(Py_ssize_t)
1452countstring(const char *target, Py_ssize_t target_len,
1453 const char *pattern, Py_ssize_t pattern_len,
1454 Py_ssize_t start,
1455 Py_ssize_t end,
1456 int direction, Py_ssize_t maxcount)
1457{
1458 Py_ssize_t count=0;
1459
1460 if (start < 0) {
1461 start += target_len;
1462 if (start < 0)
1463 start = 0;
1464 }
1465 if (end > target_len) {
1466 end = target_len;
1467 } else if (end < 0) {
1468 end += target_len;
1469 if (end < 0)
1470 end = 0;
1471 }
1472
1473 /* zero-length substrings match everywhere */
1474 if (pattern_len == 0 || maxcount == 0) {
1475 if (target_len+1 < maxcount)
1476 return target_len+1;
1477 return maxcount;
1478 }
1479
1480 end -= pattern_len;
1481 if (direction < 0) {
1482 for (; (end >= start); end--)
1483 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1484 count++;
1485 if (--maxcount <= 0) break;
1486 end -= pattern_len-1;
1487 }
1488 } else {
1489 for (; (start <= end); start++)
1490 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1491 count++;
1492 if (--maxcount <= 0)
1493 break;
1494 start += pattern_len-1;
1495 }
1496 }
1497 return count;
1498}
1499
1500
1501/* Algorithms for different cases of string replacement */
1502
1503/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1504Py_LOCAL(PyBytesObject *)
1505replace_interleave(PyBytesObject *self,
1506 const char *to_s, Py_ssize_t to_len,
1507 Py_ssize_t maxcount)
1508{
1509 char *self_s, *result_s;
1510 Py_ssize_t self_len, result_len;
1511 Py_ssize_t count, i, product;
1512 PyBytesObject *result;
1513
1514 self_len = PyBytes_GET_SIZE(self);
1515
1516 /* 1 at the end plus 1 after every character */
1517 count = self_len+1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001518 if (maxcount < count)
Neal Norwitz6968b052007-02-27 19:02:19 +00001519 count = maxcount;
1520
1521 /* Check for overflow */
1522 /* result_len = count * to_len + self_len; */
1523 product = count * to_len;
1524 if (product / to_len != count) {
1525 PyErr_SetString(PyExc_OverflowError,
1526 "replace string is too long");
1527 return NULL;
1528 }
1529 result_len = product + self_len;
1530 if (result_len < 0) {
1531 PyErr_SetString(PyExc_OverflowError,
1532 "replace string is too long");
1533 return NULL;
1534 }
1535
1536 if (! (result = (PyBytesObject *)
1537 PyBytes_FromStringAndSize(NULL, result_len)) )
1538 return NULL;
1539
1540 self_s = PyBytes_AS_STRING(self);
1541 result_s = PyBytes_AS_STRING(result);
1542
1543 /* TODO: special case single character, which doesn't need memcpy */
1544
1545 /* Lay the first one down (guaranteed this will occur) */
1546 Py_MEMCPY(result_s, to_s, to_len);
1547 result_s += to_len;
1548 count -= 1;
1549
1550 for (i=0; i<count; i++) {
1551 *result_s++ = *self_s++;
1552 Py_MEMCPY(result_s, to_s, to_len);
1553 result_s += to_len;
1554 }
1555
1556 /* Copy the rest of the original string */
1557 Py_MEMCPY(result_s, self_s, self_len-i);
1558
1559 return result;
1560}
1561
1562/* Special case for deleting a single character */
1563/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1564Py_LOCAL(PyBytesObject *)
1565replace_delete_single_character(PyBytesObject *self,
1566 char from_c, Py_ssize_t maxcount)
1567{
1568 char *self_s, *result_s;
1569 char *start, *next, *end;
1570 Py_ssize_t self_len, result_len;
1571 Py_ssize_t count;
1572 PyBytesObject *result;
1573
1574 self_len = PyBytes_GET_SIZE(self);
1575 self_s = PyBytes_AS_STRING(self);
1576
1577 count = countchar(self_s, self_len, from_c, maxcount);
1578 if (count == 0) {
1579 return return_self(self);
1580 }
1581
1582 result_len = self_len - count; /* from_len == 1 */
1583 assert(result_len>=0);
1584
1585 if ( (result = (PyBytesObject *)
1586 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1587 return NULL;
1588 result_s = PyBytes_AS_STRING(result);
1589
1590 start = self_s;
1591 end = self_s + self_len;
1592 while (count-- > 0) {
1593 next = findchar(start, end-start, from_c);
1594 if (next == NULL)
1595 break;
1596 Py_MEMCPY(result_s, start, next-start);
1597 result_s += (next-start);
1598 start = next+1;
1599 }
1600 Py_MEMCPY(result_s, start, end-start);
1601
1602 return result;
1603}
1604
1605/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1606
1607Py_LOCAL(PyBytesObject *)
1608replace_delete_substring(PyBytesObject *self,
1609 const char *from_s, Py_ssize_t from_len,
1610 Py_ssize_t maxcount)
1611{
1612 char *self_s, *result_s;
1613 char *start, *next, *end;
1614 Py_ssize_t self_len, result_len;
1615 Py_ssize_t count, offset;
1616 PyBytesObject *result;
1617
1618 self_len = PyBytes_GET_SIZE(self);
1619 self_s = PyBytes_AS_STRING(self);
1620
1621 count = countstring(self_s, self_len,
1622 from_s, from_len,
1623 0, self_len, 1,
1624 maxcount);
1625
1626 if (count == 0) {
1627 /* no matches */
1628 return return_self(self);
1629 }
1630
1631 result_len = self_len - (count * from_len);
1632 assert (result_len>=0);
1633
1634 if ( (result = (PyBytesObject *)
1635 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1636 return NULL;
1637
1638 result_s = PyBytes_AS_STRING(result);
1639
1640 start = self_s;
1641 end = self_s + self_len;
1642 while (count-- > 0) {
1643 offset = findstring(start, end-start,
1644 from_s, from_len,
1645 0, end-start, FORWARD);
1646 if (offset == -1)
1647 break;
1648 next = start + offset;
1649
1650 Py_MEMCPY(result_s, start, next-start);
1651
1652 result_s += (next-start);
1653 start = next+from_len;
1654 }
1655 Py_MEMCPY(result_s, start, end-start);
1656 return result;
1657}
1658
1659/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1660Py_LOCAL(PyBytesObject *)
1661replace_single_character_in_place(PyBytesObject *self,
1662 char from_c, char to_c,
1663 Py_ssize_t maxcount)
1664{
1665 char *self_s, *result_s, *start, *end, *next;
1666 Py_ssize_t self_len;
1667 PyBytesObject *result;
1668
1669 /* The result string will be the same size */
1670 self_s = PyBytes_AS_STRING(self);
1671 self_len = PyBytes_GET_SIZE(self);
1672
1673 next = findchar(self_s, self_len, from_c);
1674
1675 if (next == NULL) {
1676 /* No matches; return the original bytes */
1677 return return_self(self);
1678 }
1679
1680 /* Need to make a new bytes */
1681 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1682 if (result == NULL)
1683 return NULL;
1684 result_s = PyBytes_AS_STRING(result);
1685 Py_MEMCPY(result_s, self_s, self_len);
1686
1687 /* change everything in-place, starting with this one */
1688 start = result_s + (next-self_s);
1689 *start = to_c;
1690 start++;
1691 end = result_s + self_len;
1692
1693 while (--maxcount > 0) {
1694 next = findchar(start, end-start, from_c);
1695 if (next == NULL)
1696 break;
1697 *next = to_c;
1698 start = next+1;
1699 }
1700
1701 return result;
1702}
1703
1704/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1705Py_LOCAL(PyBytesObject *)
1706replace_substring_in_place(PyBytesObject *self,
1707 const char *from_s, Py_ssize_t from_len,
1708 const char *to_s, Py_ssize_t to_len,
1709 Py_ssize_t maxcount)
1710{
1711 char *result_s, *start, *end;
1712 char *self_s;
1713 Py_ssize_t self_len, offset;
1714 PyBytesObject *result;
1715
1716 /* The result bytes will be the same size */
1717
1718 self_s = PyBytes_AS_STRING(self);
1719 self_len = PyBytes_GET_SIZE(self);
1720
1721 offset = findstring(self_s, self_len,
1722 from_s, from_len,
1723 0, self_len, FORWARD);
1724 if (offset == -1) {
1725 /* No matches; return the original bytes */
1726 return return_self(self);
1727 }
1728
1729 /* Need to make a new bytes */
1730 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1731 if (result == NULL)
1732 return NULL;
1733 result_s = PyBytes_AS_STRING(result);
1734 Py_MEMCPY(result_s, self_s, self_len);
1735
1736 /* change everything in-place, starting with this one */
1737 start = result_s + offset;
1738 Py_MEMCPY(start, to_s, from_len);
1739 start += from_len;
1740 end = result_s + self_len;
1741
1742 while ( --maxcount > 0) {
1743 offset = findstring(start, end-start,
1744 from_s, from_len,
1745 0, end-start, FORWARD);
1746 if (offset==-1)
1747 break;
1748 Py_MEMCPY(start+offset, to_s, from_len);
1749 start += offset+from_len;
1750 }
1751
1752 return result;
1753}
1754
1755/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1756Py_LOCAL(PyBytesObject *)
1757replace_single_character(PyBytesObject *self,
1758 char from_c,
1759 const char *to_s, Py_ssize_t to_len,
1760 Py_ssize_t maxcount)
1761{
1762 char *self_s, *result_s;
1763 char *start, *next, *end;
1764 Py_ssize_t self_len, result_len;
1765 Py_ssize_t count, product;
1766 PyBytesObject *result;
1767
1768 self_s = PyBytes_AS_STRING(self);
1769 self_len = PyBytes_GET_SIZE(self);
1770
1771 count = countchar(self_s, self_len, from_c, maxcount);
1772 if (count == 0) {
1773 /* no matches, return unchanged */
1774 return return_self(self);
1775 }
1776
1777 /* use the difference between current and new, hence the "-1" */
1778 /* result_len = self_len + count * (to_len-1) */
1779 product = count * (to_len-1);
1780 if (product / (to_len-1) != count) {
1781 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1782 return NULL;
1783 }
1784 result_len = self_len + product;
1785 if (result_len < 0) {
1786 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1787 return NULL;
1788 }
1789
1790 if ( (result = (PyBytesObject *)
1791 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1792 return NULL;
1793 result_s = PyBytes_AS_STRING(result);
1794
1795 start = self_s;
1796 end = self_s + self_len;
1797 while (count-- > 0) {
1798 next = findchar(start, end-start, from_c);
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001799 if (next == NULL)
Neal Norwitz6968b052007-02-27 19:02:19 +00001800 break;
1801
1802 if (next == start) {
1803 /* replace with the 'to' */
1804 Py_MEMCPY(result_s, to_s, to_len);
1805 result_s += to_len;
1806 start += 1;
1807 } else {
1808 /* copy the unchanged old then the 'to' */
1809 Py_MEMCPY(result_s, start, next-start);
1810 result_s += (next-start);
1811 Py_MEMCPY(result_s, to_s, to_len);
1812 result_s += to_len;
1813 start = next+1;
1814 }
1815 }
1816 /* Copy the remainder of the remaining bytes */
1817 Py_MEMCPY(result_s, start, end-start);
1818
1819 return result;
1820}
1821
1822/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1823Py_LOCAL(PyBytesObject *)
1824replace_substring(PyBytesObject *self,
1825 const char *from_s, Py_ssize_t from_len,
1826 const char *to_s, Py_ssize_t to_len,
1827 Py_ssize_t maxcount)
1828{
1829 char *self_s, *result_s;
1830 char *start, *next, *end;
1831 Py_ssize_t self_len, result_len;
1832 Py_ssize_t count, offset, product;
1833 PyBytesObject *result;
1834
1835 self_s = PyBytes_AS_STRING(self);
1836 self_len = PyBytes_GET_SIZE(self);
1837
1838 count = countstring(self_s, self_len,
1839 from_s, from_len,
1840 0, self_len, FORWARD, maxcount);
1841 if (count == 0) {
1842 /* no matches, return unchanged */
1843 return return_self(self);
1844 }
1845
1846 /* Check for overflow */
1847 /* result_len = self_len + count * (to_len-from_len) */
1848 product = count * (to_len-from_len);
1849 if (product / (to_len-from_len) != count) {
1850 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1851 return NULL;
1852 }
1853 result_len = self_len + product;
1854 if (result_len < 0) {
1855 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1856 return NULL;
1857 }
1858
1859 if ( (result = (PyBytesObject *)
1860 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1861 return NULL;
1862 result_s = PyBytes_AS_STRING(result);
1863
1864 start = self_s;
1865 end = self_s + self_len;
1866 while (count-- > 0) {
1867 offset = findstring(start, end-start,
1868 from_s, from_len,
1869 0, end-start, FORWARD);
1870 if (offset == -1)
1871 break;
1872 next = start+offset;
1873 if (next == start) {
1874 /* replace with the 'to' */
1875 Py_MEMCPY(result_s, to_s, to_len);
1876 result_s += to_len;
1877 start += from_len;
1878 } else {
1879 /* copy the unchanged old then the 'to' */
1880 Py_MEMCPY(result_s, start, next-start);
1881 result_s += (next-start);
1882 Py_MEMCPY(result_s, to_s, to_len);
1883 result_s += to_len;
1884 start = next+from_len;
1885 }
1886 }
1887 /* Copy the remainder of the remaining bytes */
1888 Py_MEMCPY(result_s, start, end-start);
1889
1890 return result;
1891}
1892
1893
1894Py_LOCAL(PyBytesObject *)
1895replace(PyBytesObject *self,
1896 const char *from_s, Py_ssize_t from_len,
1897 const char *to_s, Py_ssize_t to_len,
1898 Py_ssize_t maxcount)
1899{
1900 if (maxcount < 0) {
1901 maxcount = PY_SSIZE_T_MAX;
1902 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
1903 /* nothing to do; return the original bytes */
1904 return return_self(self);
1905 }
1906
1907 if (maxcount == 0 ||
1908 (from_len == 0 && to_len == 0)) {
1909 /* nothing to do; return the original bytes */
1910 return return_self(self);
1911 }
1912
1913 /* Handle zero-length special cases */
1914
1915 if (from_len == 0) {
1916 /* insert the 'to' bytes everywhere. */
1917 /* >>> "Python".replace("", ".") */
1918 /* '.P.y.t.h.o.n.' */
1919 return replace_interleave(self, to_s, to_len, maxcount);
1920 }
1921
1922 /* Except for "".replace("", "A") == "A" there is no way beyond this */
1923 /* point for an empty self bytes to generate a non-empty bytes */
1924 /* Special case so the remaining code always gets a non-empty bytes */
1925 if (PyBytes_GET_SIZE(self) == 0) {
1926 return return_self(self);
1927 }
1928
1929 if (to_len == 0) {
1930 /* delete all occurances of 'from' bytes */
1931 if (from_len == 1) {
1932 return replace_delete_single_character(
1933 self, from_s[0], maxcount);
1934 } else {
1935 return replace_delete_substring(self, from_s, from_len, maxcount);
1936 }
1937 }
1938
1939 /* Handle special case where both bytes have the same length */
1940
1941 if (from_len == to_len) {
1942 if (from_len == 1) {
1943 return replace_single_character_in_place(
1944 self,
1945 from_s[0],
1946 to_s[0],
1947 maxcount);
1948 } else {
1949 return replace_substring_in_place(
1950 self, from_s, from_len, to_s, to_len, maxcount);
1951 }
1952 }
1953
1954 /* Otherwise use the more generic algorithms */
1955 if (from_len == 1) {
1956 return replace_single_character(self, from_s[0],
1957 to_s, to_len, maxcount);
1958 } else {
1959 /* len('from')>=2, len('to')>=1 */
1960 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
1961 }
1962}
1963
1964PyDoc_STRVAR(replace__doc__,
1965"B.replace (old, new[, count]) -> bytes\n\
1966\n\
1967Return a copy of bytes B with all occurrences of subsection\n\
1968old replaced by new. If the optional argument count is\n\
1969given, only the first count occurrences are replaced.");
1970
1971static PyObject *
1972bytes_replace(PyBytesObject *self, PyObject *args)
1973{
1974 Py_ssize_t count = -1;
1975 PyObject *from, *to;
1976 const char *from_s, *to_s;
1977 Py_ssize_t from_len, to_len;
1978
1979 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
1980 return NULL;
1981
1982 if (PyBytes_Check(from)) {
1983 from_s = PyBytes_AS_STRING(from);
1984 from_len = PyBytes_GET_SIZE(from);
1985 }
1986 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
1987 return NULL;
1988
1989 if (PyBytes_Check(to)) {
1990 to_s = PyBytes_AS_STRING(to);
1991 to_len = PyBytes_GET_SIZE(to);
1992 }
1993 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
1994 return NULL;
1995
1996 return (PyObject *)replace((PyBytesObject *) self,
1997 from_s, from_len,
1998 to_s, to_len, count);
1999}
2000
2001
2002/* Overallocate the initial list to reduce the number of reallocs for small
2003 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2004 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2005 text (roughly 11 words per line) and field delimited data (usually 1-10
2006 fields). For large strings the split algorithms are bandwidth limited
2007 so increasing the preallocation likely will not improve things.*/
2008
2009#define MAX_PREALLOC 12
2010
2011/* 5 splits gives 6 elements */
2012#define PREALLOC_SIZE(maxsplit) \
2013 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2014
2015#define SPLIT_APPEND(data, left, right) \
2016 str = PyBytes_FromStringAndSize((data) + (left), \
2017 (right) - (left)); \
2018 if (str == NULL) \
2019 goto onError; \
2020 if (PyList_Append(list, str)) { \
2021 Py_DECREF(str); \
2022 goto onError; \
2023 } \
2024 else \
2025 Py_DECREF(str);
2026
2027#define SPLIT_ADD(data, left, right) { \
2028 str = PyBytes_FromStringAndSize((data) + (left), \
2029 (right) - (left)); \
2030 if (str == NULL) \
2031 goto onError; \
2032 if (count < MAX_PREALLOC) { \
2033 PyList_SET_ITEM(list, count, str); \
2034 } else { \
2035 if (PyList_Append(list, str)) { \
2036 Py_DECREF(str); \
2037 goto onError; \
2038 } \
2039 else \
2040 Py_DECREF(str); \
2041 } \
2042 count++; }
2043
2044/* Always force the list to the expected size. */
2045#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
2046
2047
2048Py_LOCAL_INLINE(PyObject *)
2049split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2050{
2051 register Py_ssize_t i, j, count=0;
2052 PyObject *str;
2053 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2054
2055 if (list == NULL)
2056 return NULL;
2057
2058 i = j = 0;
2059 while ((j < len) && (maxcount-- > 0)) {
2060 for(; j<len; j++) {
2061 /* I found that using memchr makes no difference */
2062 if (s[j] == ch) {
2063 SPLIT_ADD(s, i, j);
2064 i = j = j + 1;
2065 break;
2066 }
2067 }
2068 }
2069 if (i <= len) {
2070 SPLIT_ADD(s, i, len);
2071 }
2072 FIX_PREALLOC_SIZE(list);
2073 return list;
2074
2075 onError:
2076 Py_DECREF(list);
2077 return NULL;
2078}
2079
2080PyDoc_STRVAR(split__doc__,
2081"B.split(sep [,maxsplit]) -> list of bytes\n\
2082\n\
2083Return a list of the bytes in the string B, using sep as the\n\
2084delimiter. If maxsplit is given, at most maxsplit\n\
2085splits are done.");
2086
2087static PyObject *
2088bytes_split(PyBytesObject *self, PyObject *args)
2089{
2090 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2091 Py_ssize_t maxsplit = -1, count=0;
2092 const char *s = PyBytes_AS_STRING(self), *sub;
2093 PyObject *list, *str, *subobj;
2094#ifdef USE_FAST
2095 Py_ssize_t pos;
2096#endif
2097
2098 if (!PyArg_ParseTuple(args, "O|n:split", &subobj, &maxsplit))
2099 return NULL;
2100 if (maxsplit < 0)
2101 maxsplit = PY_SSIZE_T_MAX;
2102 if (PyBytes_Check(subobj)) {
2103 sub = PyBytes_AS_STRING(subobj);
2104 n = PyBytes_GET_SIZE(subobj);
2105 }
2106 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2107 return NULL;
2108
2109 if (n == 0) {
2110 PyErr_SetString(PyExc_ValueError, "empty separator");
2111 return NULL;
2112 }
2113 else if (n == 1)
2114 return split_char(s, len, sub[0], maxsplit);
2115
2116 list = PyList_New(PREALLOC_SIZE(maxsplit));
2117 if (list == NULL)
2118 return NULL;
2119
2120#ifdef USE_FAST
2121 i = j = 0;
2122 while (maxsplit-- > 0) {
2123 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2124 if (pos < 0)
2125 break;
2126 j = i+pos;
2127 SPLIT_ADD(s, i, j);
2128 i = j + n;
2129 }
2130#else
2131 i = j = 0;
2132 while ((j+n <= len) && (maxsplit-- > 0)) {
2133 for (; j+n <= len; j++) {
2134 if (Py_STRING_MATCH(s, j, sub, n)) {
2135 SPLIT_ADD(s, i, j);
2136 i = j = j + n;
2137 break;
2138 }
2139 }
2140 }
2141#endif
2142 SPLIT_ADD(s, i, len);
2143 FIX_PREALLOC_SIZE(list);
2144 return list;
2145
2146 onError:
2147 Py_DECREF(list);
2148 return NULL;
2149}
2150
2151PyDoc_STRVAR(partition__doc__,
2152"B.partition(sep) -> (head, sep, tail)\n\
2153\n\
2154Searches for the separator sep in B, and returns the part before it,\n\
2155the separator itself, and the part after it. If the separator is not\n\
2156found, returns B and two empty bytes.");
2157
2158static PyObject *
2159bytes_partition(PyBytesObject *self, PyObject *sep_obj)
2160{
2161 PyObject *bytesep, *result;
2162
2163 bytesep = PyBytes_FromObject(sep_obj);
2164 if (! bytesep)
2165 return NULL;
2166
2167 result = stringlib_partition(
2168 (PyObject*) self,
2169 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002170 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002171 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2172 );
2173
2174 Py_DECREF(bytesep);
2175 return result;
2176}
2177
2178PyDoc_STRVAR(rpartition__doc__,
2179"B.rpartition(sep) -> (tail, sep, head)\n\
2180\n\
2181Searches for the separator sep in B, starting at the end of B, and returns\n\
2182the part before it, the separator itself, and the part after it. If the\n\
2183separator is not found, returns two empty bytes and B.");
2184
2185static PyObject *
2186bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
2187{
2188 PyObject *bytesep, *result;
2189
2190 bytesep = PyBytes_FromObject(sep_obj);
2191 if (! bytesep)
2192 return NULL;
2193
2194 result = stringlib_rpartition(
2195 (PyObject*) self,
2196 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002197 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002198 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2199 );
2200
2201 Py_DECREF(bytesep);
2202 return result;
2203}
2204
2205Py_LOCAL_INLINE(PyObject *)
2206rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2207{
2208 register Py_ssize_t i, j, count=0;
2209 PyObject *str;
2210 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2211
2212 if (list == NULL)
2213 return NULL;
2214
2215 i = j = len - 1;
2216 while ((i >= 0) && (maxcount-- > 0)) {
2217 for (; i >= 0; i--) {
2218 if (s[i] == ch) {
2219 SPLIT_ADD(s, i + 1, j + 1);
2220 j = i = i - 1;
2221 break;
2222 }
2223 }
2224 }
2225 if (j >= -1) {
2226 SPLIT_ADD(s, 0, j + 1);
2227 }
2228 FIX_PREALLOC_SIZE(list);
2229 if (PyList_Reverse(list) < 0)
2230 goto onError;
2231
2232 return list;
2233
2234 onError:
2235 Py_DECREF(list);
2236 return NULL;
2237}
2238
2239PyDoc_STRVAR(rsplit__doc__,
2240"B.rsplit(sep [,maxsplit]) -> list of bytes\n\
2241\n\
2242Return a list of the sections in the byte B, using sep as the\n\
2243delimiter, starting at the end of the bytes and working\n\
2244to the front. If maxsplit is given, at most maxsplit splits are\n\
2245done.");
2246
2247static PyObject *
2248bytes_rsplit(PyBytesObject *self, PyObject *args)
2249{
2250 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2251 Py_ssize_t maxsplit = -1, count=0;
2252 const char *s = PyBytes_AS_STRING(self), *sub;
2253 PyObject *list, *str, *subobj;
2254
2255 if (!PyArg_ParseTuple(args, "O|n:rsplit", &subobj, &maxsplit))
2256 return NULL;
2257 if (maxsplit < 0)
2258 maxsplit = PY_SSIZE_T_MAX;
2259 if (PyBytes_Check(subobj)) {
2260 sub = PyBytes_AS_STRING(subobj);
2261 n = PyBytes_GET_SIZE(subobj);
2262 }
2263 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2264 return NULL;
2265
2266 if (n == 0) {
2267 PyErr_SetString(PyExc_ValueError, "empty separator");
2268 return NULL;
2269 }
2270 else if (n == 1)
2271 return rsplit_char(s, len, sub[0], maxsplit);
2272
2273 list = PyList_New(PREALLOC_SIZE(maxsplit));
2274 if (list == NULL)
2275 return NULL;
2276
2277 j = len;
2278 i = j - n;
2279
2280 while ( (i >= 0) && (maxsplit-- > 0) ) {
2281 for (; i>=0; i--) {
2282 if (Py_STRING_MATCH(s, i, sub, n)) {
2283 SPLIT_ADD(s, i + n, j);
2284 j = i;
2285 i -= n;
2286 break;
2287 }
2288 }
2289 }
2290 SPLIT_ADD(s, 0, j);
2291 FIX_PREALLOC_SIZE(list);
2292 if (PyList_Reverse(list) < 0)
2293 goto onError;
2294 return list;
2295
2296onError:
2297 Py_DECREF(list);
2298 return NULL;
2299}
2300
2301PyDoc_STRVAR(extend__doc__,
2302"B.extend(iterable int) -> None\n\
2303\n\
2304Append all the elements from the iterator or sequence to the\n\
2305end of the bytes.");
2306static PyObject *
2307bytes_extend(PyBytesObject *self, PyObject *arg)
2308{
2309 if (bytes_setslice(self, self->ob_size, self->ob_size, arg) == -1)
2310 return NULL;
2311 Py_RETURN_NONE;
2312}
2313
2314
2315PyDoc_STRVAR(reverse__doc__,
2316"B.reverse() -> None\n\
2317\n\
2318Reverse the order of the values in bytes in place.");
2319static PyObject *
2320bytes_reverse(PyBytesObject *self, PyObject *unused)
2321{
2322 char swap, *head, *tail;
2323 Py_ssize_t i, j, n = self->ob_size;
2324
2325 j = n / 2;
2326 head = self->ob_bytes;
2327 tail = head + n - 1;
2328 for (i = 0; i < j; i++) {
2329 swap = *head;
2330 *head++ = *tail;
2331 *tail-- = swap;
2332 }
2333
2334 Py_RETURN_NONE;
2335}
2336
2337PyDoc_STRVAR(insert__doc__,
2338"B.insert(index, int) -> None\n\
2339\n\
2340Insert a single item into the bytes before the given index.");
2341static PyObject *
2342bytes_insert(PyBytesObject *self, PyObject *args)
2343{
2344 int value;
2345 Py_ssize_t where, n = self->ob_size;
2346
2347 if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
2348 return NULL;
2349
2350 if (n == PY_SSIZE_T_MAX) {
2351 PyErr_SetString(PyExc_OverflowError,
2352 "cannot add more objects to bytes");
2353 return NULL;
2354 }
2355 if (value < 0 || value >= 256) {
2356 PyErr_SetString(PyExc_ValueError,
2357 "byte must be in range(0, 256)");
2358 return NULL;
2359 }
2360 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2361 return NULL;
2362
2363 if (where < 0) {
2364 where += n;
2365 if (where < 0)
2366 where = 0;
2367 }
2368 if (where > n)
2369 where = n;
Guido van Rossum4fc8ae42007-02-27 20:57:45 +00002370 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
Neal Norwitz6968b052007-02-27 19:02:19 +00002371 self->ob_bytes[where] = value;
2372
2373 Py_RETURN_NONE;
2374}
2375
2376PyDoc_STRVAR(append__doc__,
2377"B.append(int) -> None\n\
2378\n\
2379Append a single item to the end of the bytes.");
2380static PyObject *
2381bytes_append(PyBytesObject *self, PyObject *arg)
2382{
2383 int value;
2384 Py_ssize_t n = self->ob_size;
2385
2386 if (! _getbytevalue(arg, &value))
2387 return NULL;
2388 if (n == PY_SSIZE_T_MAX) {
2389 PyErr_SetString(PyExc_OverflowError,
2390 "cannot add more objects to bytes");
2391 return NULL;
2392 }
2393 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2394 return NULL;
2395
2396 self->ob_bytes[n] = value;
2397
2398 Py_RETURN_NONE;
2399}
2400
2401PyDoc_STRVAR(pop__doc__,
2402"B.pop([index]) -> int\n\
2403\n\
2404Remove and return a single item from the bytes. If no index\n\
2405argument is give, will pop the last value.");
2406static PyObject *
2407bytes_pop(PyBytesObject *self, PyObject *args)
2408{
2409 int value;
2410 Py_ssize_t where = -1, n = self->ob_size;
2411
2412 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2413 return NULL;
2414
2415 if (n == 0) {
2416 PyErr_SetString(PyExc_OverflowError,
2417 "cannot pop an empty bytes");
2418 return NULL;
2419 }
2420 if (where < 0)
2421 where += self->ob_size;
2422 if (where < 0 || where >= self->ob_size) {
2423 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2424 return NULL;
2425 }
2426
2427 value = self->ob_bytes[where];
2428 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2429 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2430 return NULL;
2431
2432 return PyInt_FromLong(value);
2433}
2434
2435PyDoc_STRVAR(remove__doc__,
2436"B.remove(int) -> None\n\
2437\n\
2438Remove the first occurance of a value in bytes");
2439static PyObject *
2440bytes_remove(PyBytesObject *self, PyObject *arg)
2441{
2442 int value;
2443 Py_ssize_t where, n = self->ob_size;
2444
2445 if (! _getbytevalue(arg, &value))
2446 return NULL;
2447
2448 for (where = 0; where < n; where++) {
2449 if (self->ob_bytes[where] == value)
2450 break;
2451 }
2452 if (where == n) {
2453 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2454 return NULL;
2455 }
2456
2457 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2458 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2459 return NULL;
2460
2461 Py_RETURN_NONE;
2462}
2463
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002464/* XXX These two helpers could be optimized if argsize == 1 */
2465
2466Py_ssize_t
2467lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2468 void *argptr, Py_ssize_t argsize)
2469{
2470 Py_ssize_t i = 0;
2471 while (i < mysize && memchr(argptr, myptr[i], argsize))
2472 i++;
2473 return i;
2474}
2475
2476Py_ssize_t
2477rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2478 void *argptr, Py_ssize_t argsize)
2479{
2480 Py_ssize_t i = mysize - 1;
2481 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2482 i--;
2483 return i + 1;
2484}
2485
2486PyDoc_STRVAR(strip__doc__,
2487"B.strip(bytes) -> bytes\n\
2488\n\
2489Strip leading and trailing bytes contained in the argument.");
2490static PyObject *
2491bytes_strip(PyBytesObject *self, PyObject *arg)
2492{
2493 Py_ssize_t left, right, mysize, argsize;
2494 void *myptr, *argptr;
2495 if (arg == NULL || !PyBytes_Check(arg)) {
2496 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2497 return NULL;
2498 }
2499 myptr = self->ob_bytes;
2500 mysize = self->ob_size;
2501 argptr = ((PyBytesObject *)arg)->ob_bytes;
2502 argsize = ((PyBytesObject *)arg)->ob_size;
2503 left = lstrip_helper(myptr, mysize, argptr, argsize);
2504 right = rstrip_helper(myptr, mysize, argptr, argsize);
2505 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2506}
2507
2508PyDoc_STRVAR(lstrip__doc__,
2509"B.lstrip(bytes) -> bytes\n\
2510\n\
2511Strip leading bytes contained in the argument.");
2512static PyObject *
2513bytes_lstrip(PyBytesObject *self, PyObject *arg)
2514{
2515 Py_ssize_t left, right, mysize, argsize;
2516 void *myptr, *argptr;
2517 if (arg == NULL || !PyBytes_Check(arg)) {
2518 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2519 return NULL;
2520 }
2521 myptr = self->ob_bytes;
2522 mysize = self->ob_size;
2523 argptr = ((PyBytesObject *)arg)->ob_bytes;
2524 argsize = ((PyBytesObject *)arg)->ob_size;
2525 left = lstrip_helper(myptr, mysize, argptr, argsize);
2526 right = mysize;
2527 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2528}
2529
2530PyDoc_STRVAR(rstrip__doc__,
2531"B.rstrip(bytes) -> bytes\n\
2532\n\
2533Strip trailing bytes contained in the argument.");
2534static PyObject *
2535bytes_rstrip(PyBytesObject *self, PyObject *arg)
2536{
2537 Py_ssize_t left, right, mysize, argsize;
2538 void *myptr, *argptr;
2539 if (arg == NULL || !PyBytes_Check(arg)) {
2540 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2541 return NULL;
2542 }
2543 myptr = self->ob_bytes;
2544 mysize = self->ob_size;
2545 argptr = ((PyBytesObject *)arg)->ob_bytes;
2546 argsize = ((PyBytesObject *)arg)->ob_size;
2547 left = 0;
2548 right = rstrip_helper(myptr, mysize, argptr, argsize);
2549 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2550}
Neal Norwitz6968b052007-02-27 19:02:19 +00002551
Guido van Rossumd624f182006-04-24 13:47:05 +00002552PyDoc_STRVAR(decode_doc,
2553"B.decode([encoding[,errors]]) -> unicode obect.\n\
2554\n\
2555Decodes B using the codec registered for encoding. encoding defaults\n\
2556to the default encoding. errors may be given to set a different error\n\
2557handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2558a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2559as well as any other name registerd with codecs.register_error that is\n\
2560able to handle UnicodeDecodeErrors.");
2561
2562static PyObject *
2563bytes_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002564{
Guido van Rossumd624f182006-04-24 13:47:05 +00002565 const char *encoding = NULL;
2566 const char *errors = NULL;
2567
2568 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2569 return NULL;
2570 if (encoding == NULL)
2571 encoding = PyUnicode_GetDefaultEncoding();
2572 return PyCodec_Decode(self, encoding, errors);
2573}
2574
Guido van Rossuma0867f72006-05-05 04:34:18 +00002575PyDoc_STRVAR(alloc_doc,
2576"B.__alloc__() -> int\n\
2577\n\
2578Returns the number of bytes actually allocated.");
2579
2580static PyObject *
2581bytes_alloc(PyBytesObject *self)
2582{
2583 return PyInt_FromSsize_t(self->ob_alloc);
2584}
2585
Guido van Rossum20188312006-05-05 15:15:40 +00002586PyDoc_STRVAR(join_doc,
2587"bytes.join(iterable_of_bytes) -> bytes\n\
2588\n\
2589Concatenates any number of bytes objects. Example:\n\
2590bytes.join([bytes('ab'), bytes('pq'), bytes('rs')]) -> bytes('abpqrs').");
2591
2592static PyObject *
2593bytes_join(PyObject *cls, PyObject *it)
2594{
2595 PyObject *seq;
2596 Py_ssize_t i;
2597 Py_ssize_t n;
2598 PyObject **items;
2599 Py_ssize_t totalsize = 0;
2600 PyObject *result;
2601 char *dest;
2602
2603 seq = PySequence_Fast(it, "can only join an iterable");
2604 if (seq == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002605 return NULL;
Guido van Rossum20188312006-05-05 15:15:40 +00002606 n = PySequence_Fast_GET_SIZE(seq);
2607 items = PySequence_Fast_ITEMS(seq);
2608
2609 /* Compute the total size, and check that they are all bytes */
2610 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002611 PyObject *obj = items[i];
2612 if (!PyBytes_Check(obj)) {
2613 PyErr_Format(PyExc_TypeError,
2614 "can only join an iterable of bytes "
2615 "(item %ld has type '%.100s')",
Guido van Rossum3cf5b1e2006-07-27 21:53:35 +00002616 /* XXX %ld isn't right on Win64 */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002617 (long)i, obj->ob_type->tp_name);
2618 goto error;
2619 }
2620 totalsize += PyBytes_GET_SIZE(obj);
2621 if (totalsize < 0) {
2622 PyErr_NoMemory();
2623 goto error;
2624 }
Guido van Rossum20188312006-05-05 15:15:40 +00002625 }
2626
2627 /* Allocate the result, and copy the bytes */
2628 result = PyBytes_FromStringAndSize(NULL, totalsize);
2629 if (result == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002630 goto error;
Guido van Rossum20188312006-05-05 15:15:40 +00002631 dest = PyBytes_AS_STRING(result);
2632 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002633 PyObject *obj = items[i];
2634 Py_ssize_t size = PyBytes_GET_SIZE(obj);
2635 memcpy(dest, PyBytes_AS_STRING(obj), size);
2636 dest += size;
Guido van Rossum20188312006-05-05 15:15:40 +00002637 }
2638
2639 /* Done */
2640 Py_DECREF(seq);
2641 return result;
2642
2643 /* Error handling */
2644 error:
2645 Py_DECREF(seq);
2646 return NULL;
2647}
2648
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002649PyDoc_STRVAR(fromhex_doc,
2650"bytes.fromhex(string) -> bytes\n\
2651\n\
2652Create a bytes object from a string of hexadecimal numbers.\n\
2653Spaces between two numbers are accepted. Example:\n\
2654bytes.fromhex('10 2030') -> bytes([0x10, 0x20, 0x30]).");
2655
2656static int
2657hex_digit_to_int(int c)
2658{
Georg Brandlb3f568f2007-02-27 08:49:18 +00002659 if (isdigit(c))
2660 return c - '0';
2661 else {
2662 if (isupper(c))
2663 c = tolower(c);
2664 if (c >= 'a' && c <= 'f')
2665 return c - 'a' + 10;
2666 }
2667 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002668}
2669
2670static PyObject *
2671bytes_fromhex(PyObject *cls, PyObject *args)
2672{
2673 PyObject *newbytes;
2674 char *hex, *buf;
2675 Py_ssize_t len, byteslen, i, j;
2676 int top, bot;
2677
2678 if (!PyArg_ParseTuple(args, "s#:fromhex", &hex, &len))
2679 return NULL;
2680
2681 byteslen = len / 2; /* max length if there are no spaces */
2682
2683 newbytes = PyBytes_FromStringAndSize(NULL, byteslen);
2684 if (!newbytes)
2685 return NULL;
2686 buf = PyBytes_AS_STRING(newbytes);
2687
2688 for (i = j = 0; ; i += 2) {
2689 /* skip over spaces in the input */
2690 while (Py_CHARMASK(hex[i]) == ' ')
2691 i++;
2692 if (i >= len)
2693 break;
2694 top = hex_digit_to_int(Py_CHARMASK(hex[i]));
2695 bot = hex_digit_to_int(Py_CHARMASK(hex[i+1]));
2696 if (top == -1 || bot == -1) {
2697 PyErr_Format(PyExc_ValueError,
2698 "non-hexadecimal number string '%c%c' found in "
2699 "fromhex() arg at position %zd",
2700 hex[i], hex[i+1], i);
2701 goto error;
2702 }
2703 buf[j++] = (top << 4) + bot;
2704 }
2705 if (PyBytes_Resize(newbytes, j) < 0)
2706 goto error;
2707 return newbytes;
2708
2709 error:
2710 Py_DECREF(newbytes);
2711 return NULL;
2712}
2713
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002714PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2715
2716static PyObject *
2717bytes_reduce(PyBytesObject *self)
2718{
2719 return Py_BuildValue("(O(s#))",
2720 self->ob_type,
2721 self->ob_bytes == NULL ? "" : self->ob_bytes,
2722 self->ob_size);
2723}
2724
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002725static PySequenceMethods bytes_as_sequence = {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002726 (lenfunc)bytes_length, /* sq_length */
2727 (binaryfunc)bytes_concat, /* sq_concat */
2728 (ssizeargfunc)bytes_repeat, /* sq_repeat */
2729 (ssizeargfunc)bytes_getitem, /* sq_item */
2730 0, /* sq_slice */
2731 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
2732 0, /* sq_ass_slice */
Guido van Rossumd624f182006-04-24 13:47:05 +00002733 (objobjproc)bytes_contains, /* sq_contains */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002734 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
2735 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002736};
2737
2738static PyMappingMethods bytes_as_mapping = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002739 (lenfunc)bytes_length,
Thomas Wouters376446d2006-12-19 08:30:14 +00002740 (binaryfunc)bytes_subscript,
2741 (objobjargproc)bytes_ass_subscript,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002742};
2743
2744static PyBufferProcs bytes_as_buffer = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002745 (readbufferproc)bytes_getbuffer,
2746 (writebufferproc)bytes_getbuffer,
2747 (segcountproc)bytes_getsegcount,
2748 /* XXX Bytes are not characters! But we need to implement
2749 bf_getcharbuffer() so we can be used as 't#' argument to codecs. */
2750 (charbufferproc)bytes_getbuffer,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002751};
2752
2753static PyMethodDef
2754bytes_methods[] = {
Neal Norwitz6968b052007-02-27 19:02:19 +00002755 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2756 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2757 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2758 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2759 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2760 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
2761 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2762 startswith__doc__},
2763 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2764 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, translate__doc__},
2765 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2766 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
2767 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2768 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2769 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
2770 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
2771 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
2772 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
2773 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
2774 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002775 {"strip", (PyCFunction)bytes_strip, METH_O, strip__doc__},
2776 {"lstrip", (PyCFunction)bytes_lstrip, METH_O, lstrip__doc__},
2777 {"rstrip", (PyCFunction)bytes_rstrip, METH_O, rstrip__doc__},
Guido van Rossumd624f182006-04-24 13:47:05 +00002778 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002779 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002780 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2781 fromhex_doc},
Guido van Rossum20188312006-05-05 15:15:40 +00002782 {"join", (PyCFunction)bytes_join, METH_O|METH_CLASS, join_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002783 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002784 {NULL}
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002785};
2786
2787PyDoc_STRVAR(bytes_doc,
2788"bytes([iterable]) -> new array of bytes.\n\
2789\n\
2790If an argument is given it must be an iterable yielding ints in range(256).");
2791
2792PyTypeObject PyBytes_Type = {
2793 PyObject_HEAD_INIT(&PyType_Type)
2794 0,
2795 "bytes",
2796 sizeof(PyBytesObject),
2797 0,
Guido van Rossumd624f182006-04-24 13:47:05 +00002798 (destructor)bytes_dealloc, /* tp_dealloc */
2799 0, /* tp_print */
2800 0, /* tp_getattr */
2801 0, /* tp_setattr */
2802 0, /* tp_compare */
2803 (reprfunc)bytes_repr, /* tp_repr */
2804 0, /* tp_as_number */
2805 &bytes_as_sequence, /* tp_as_sequence */
2806 &bytes_as_mapping, /* tp_as_mapping */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002807 0, /* tp_hash */
Guido van Rossumd624f182006-04-24 13:47:05 +00002808 0, /* tp_call */
2809 (reprfunc)bytes_str, /* tp_str */
2810 PyObject_GenericGetAttr, /* tp_getattro */
2811 0, /* tp_setattro */
2812 &bytes_as_buffer, /* tp_as_buffer */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002813 /* bytes is 'final' or 'sealed' */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002814 Py_TPFLAGS_DEFAULT, /* tp_flags */
Guido van Rossumd624f182006-04-24 13:47:05 +00002815 bytes_doc, /* tp_doc */
2816 0, /* tp_traverse */
2817 0, /* tp_clear */
2818 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2819 0, /* tp_weaklistoffset */
2820 0, /* tp_iter */
2821 0, /* tp_iternext */
2822 bytes_methods, /* tp_methods */
2823 0, /* tp_members */
2824 0, /* tp_getset */
2825 0, /* tp_base */
2826 0, /* tp_dict */
2827 0, /* tp_descr_get */
2828 0, /* tp_descr_set */
2829 0, /* tp_dictoffset */
2830 (initproc)bytes_init, /* tp_init */
2831 PyType_GenericAlloc, /* tp_alloc */
2832 PyType_GenericNew, /* tp_new */
2833 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002834};