blob: 987a3c5db56f17e15994fb44174361d480076225 [file] [log] [blame]
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001/* Bytes object implementation */
2
3/* XXX TO DO: optimizations */
4
5#define PY_SSIZE_T_CLEAN
6#include "Python.h"
Guido van Rossuma0867f72006-05-05 04:34:18 +00007#include "structmember.h"
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00008
Neal Norwitz6968b052007-02-27 19:02:19 +00009/* The nullbytes are used by the stringlib during partition.
10 * If partition is removed from bytes, nullbytes and its helper
11 * Init/Fini should also be removed.
12 */
13static PyBytesObject *nullbytes = NULL;
14
15void
16PyBytes_Fini(void)
17{
18 Py_CLEAR(nullbytes);
19}
20
21int
22PyBytes_Init(void)
23{
24 nullbytes = PyObject_New(PyBytesObject, &PyBytes_Type);
25 if (nullbytes == NULL)
26 return 0;
27 nullbytes->ob_bytes = NULL;
28 nullbytes->ob_size = nullbytes->ob_alloc = 0;
29 return 1;
30}
31
32/* end nullbytes support */
33
Guido van Rossumad7d8d12007-04-13 01:39:34 +000034/* Helpers */
35
36static int
37_getbytevalue(PyObject* arg, int *value)
Neal Norwitz6968b052007-02-27 19:02:19 +000038{
39 PyObject *intarg = PyNumber_Int(arg);
40 if (! intarg)
41 return 0;
42 *value = PyInt_AsLong(intarg);
43 Py_DECREF(intarg);
44 if (*value < 0 || *value >= 256) {
45 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
46 return 0;
47 }
48 return 1;
49}
50
Guido van Rossumad7d8d12007-04-13 01:39:34 +000051Py_ssize_t
52_getbuffer(PyObject *obj, void **ptr)
53{
54 PyBufferProcs *buffer = obj->ob_type->tp_as_buffer;
55
56 if (buffer == NULL ||
57 PyUnicode_Check(obj) ||
58 buffer->bf_getreadbuffer == NULL ||
59 buffer->bf_getsegcount == NULL ||
60 buffer->bf_getsegcount(obj, NULL) != 1)
61 {
62 *ptr = NULL;
63 return -1;
64 }
65
66 return buffer->bf_getreadbuffer(obj, 0, ptr);
67}
68
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000069/* Direct API functions */
70
71PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +000072PyBytes_FromObject(PyObject *input)
73{
74 return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
75 input, NULL);
76}
77
78PyObject *
79PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000080{
81 PyBytesObject *new;
Guido van Rossumf15a29f2007-05-04 00:41:39 +000082 int alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000083
Guido van Rossumd624f182006-04-24 13:47:05 +000084 assert(size >= 0);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000085
86 new = PyObject_New(PyBytesObject, &PyBytes_Type);
87 if (new == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +000088 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000089
Guido van Rossumf15a29f2007-05-04 00:41:39 +000090 if (size == 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +000091 new->ob_bytes = NULL;
Guido van Rossumf15a29f2007-05-04 00:41:39 +000092 alloc = 0;
93 }
Guido van Rossumd624f182006-04-24 13:47:05 +000094 else {
Guido van Rossumf15a29f2007-05-04 00:41:39 +000095 alloc = size + 1;
96 new->ob_bytes = PyMem_Malloc(alloc);
Guido van Rossumd624f182006-04-24 13:47:05 +000097 if (new->ob_bytes == NULL) {
98 Py_DECREF(new);
99 return NULL;
100 }
101 if (bytes != NULL)
102 memcpy(new->ob_bytes, bytes, size);
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000103 new->ob_bytes[size] = '\0'; /* Trailing null byte */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000104 }
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000105 new->ob_size = size;
106 new->ob_alloc = alloc;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000107
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000108 return (PyObject *)new;
109}
110
111Py_ssize_t
112PyBytes_Size(PyObject *self)
113{
114 assert(self != NULL);
115 assert(PyBytes_Check(self));
116
Guido van Rossum20188312006-05-05 15:15:40 +0000117 return PyBytes_GET_SIZE(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000118}
119
120char *
121PyBytes_AsString(PyObject *self)
122{
123 assert(self != NULL);
124 assert(PyBytes_Check(self));
125
Guido van Rossum20188312006-05-05 15:15:40 +0000126 return PyBytes_AS_STRING(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000127}
128
129int
130PyBytes_Resize(PyObject *self, Py_ssize_t size)
131{
132 void *sval;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000133 Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000134
135 assert(self != NULL);
136 assert(PyBytes_Check(self));
137 assert(size >= 0);
138
Guido van Rossuma0867f72006-05-05 04:34:18 +0000139 if (size < alloc / 2) {
140 /* Major downsize; resize down to exact size */
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000141 alloc = size + 1;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000142 }
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000143 else if (size < alloc) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000144 /* Within allocated size; quick exit */
145 ((PyBytesObject *)self)->ob_size = size;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000146 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
Guido van Rossuma0867f72006-05-05 04:34:18 +0000147 return 0;
148 }
149 else if (size <= alloc * 1.125) {
150 /* Moderate upsize; overallocate similar to list_resize() */
151 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
152 }
153 else {
154 /* Major upsize; resize up to exact size */
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000155 alloc = size + 1;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000156 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000157
158 sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000159 if (sval == NULL) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000160 PyErr_NoMemory();
161 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000162 }
163
Guido van Rossumd624f182006-04-24 13:47:05 +0000164 ((PyBytesObject *)self)->ob_bytes = sval;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000165 ((PyBytesObject *)self)->ob_size = size;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000166 ((PyBytesObject *)self)->ob_alloc = alloc;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000167 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
168
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000169 return 0;
170}
171
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000172PyObject *
173PyBytes_Concat(PyObject *a, PyObject *b)
174{
175 Py_ssize_t asize, bsize, size;
176 void *aptr, *bptr;
177 PyBytesObject *result;
178
179 asize = _getbuffer(a, &aptr);
180 bsize = _getbuffer(b, &bptr);
181 if (asize < 0 || bsize < 0) {
182 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
183 a->ob_type->tp_name, b->ob_type->tp_name);
184 return NULL;
185 }
186
187 size = asize + bsize;
188 if (size < 0)
189 return PyErr_NoMemory();
190
191 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
192 if (result != NULL) {
193 memcpy(result->ob_bytes, aptr, asize);
194 memcpy(result->ob_bytes + asize, bptr, bsize);
195 }
196 return (PyObject *)result;
197}
198
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000199/* Functions stuffed into the type object */
200
201static Py_ssize_t
202bytes_length(PyBytesObject *self)
203{
204 return self->ob_size;
205}
206
207static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000208bytes_concat(PyBytesObject *self, PyObject *other)
209{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000210 return PyBytes_Concat((PyObject *)self, other);
Guido van Rossumd624f182006-04-24 13:47:05 +0000211}
212
213static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000214bytes_iconcat(PyBytesObject *self, PyObject *other)
215{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000216 void *optr;
Guido van Rossum13e57212006-04-27 22:54:26 +0000217 Py_ssize_t osize;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000218 Py_ssize_t mysize;
Guido van Rossum13e57212006-04-27 22:54:26 +0000219 Py_ssize_t size;
220
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000221 osize = _getbuffer(other, &optr);
222 if (osize < 0) {
Guido van Rossum13e57212006-04-27 22:54:26 +0000223 PyErr_Format(PyExc_TypeError,
224 "can't concat bytes to %.100s", other->ob_type->tp_name);
225 return NULL;
226 }
227
228 mysize = self->ob_size;
Guido van Rossum13e57212006-04-27 22:54:26 +0000229 size = mysize + osize;
230 if (size < 0)
231 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000232 if (size < self->ob_alloc) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000233 self->ob_size = size;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000234 self->ob_bytes[self->ob_size] = '\0'; /* Trailing null byte */
235 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000236 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000237 return NULL;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000238 memcpy(self->ob_bytes + mysize, optr, osize);
Guido van Rossum13e57212006-04-27 22:54:26 +0000239 Py_INCREF(self);
240 return (PyObject *)self;
241}
242
243static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000244bytes_repeat(PyBytesObject *self, Py_ssize_t count)
245{
246 PyBytesObject *result;
247 Py_ssize_t mysize;
248 Py_ssize_t size;
249
250 if (count < 0)
251 count = 0;
252 mysize = self->ob_size;
253 size = mysize * count;
254 if (count != 0 && size / count != mysize)
255 return PyErr_NoMemory();
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000256 result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
Guido van Rossumd624f182006-04-24 13:47:05 +0000257 if (result != NULL && size != 0) {
258 if (mysize == 1)
259 memset(result->ob_bytes, self->ob_bytes[0], size);
260 else {
Guido van Rossum13e57212006-04-27 22:54:26 +0000261 Py_ssize_t i;
Guido van Rossumd624f182006-04-24 13:47:05 +0000262 for (i = 0; i < count; i++)
263 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
264 }
265 }
266 return (PyObject *)result;
267}
268
269static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000270bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
271{
272 Py_ssize_t mysize;
273 Py_ssize_t size;
274
275 if (count < 0)
276 count = 0;
277 mysize = self->ob_size;
278 size = mysize * count;
279 if (count != 0 && size / count != mysize)
280 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000281 if (size < self->ob_alloc) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000282 self->ob_size = size;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000283 self->ob_bytes[self->ob_size] = '\0'; /* Trailing null byte */
284 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000285 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000286 return NULL;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000287
Guido van Rossum13e57212006-04-27 22:54:26 +0000288 if (mysize == 1)
289 memset(self->ob_bytes, self->ob_bytes[0], size);
290 else {
291 Py_ssize_t i;
292 for (i = 1; i < count; i++)
293 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
294 }
295
296 Py_INCREF(self);
297 return (PyObject *)self;
298}
299
300static int
301bytes_substring(PyBytesObject *self, PyBytesObject *other)
302{
303 Py_ssize_t i;
304
305 if (other->ob_size == 1) {
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000306 return memchr(self->ob_bytes, other->ob_bytes[0],
Guido van Rossum13e57212006-04-27 22:54:26 +0000307 self->ob_size) != NULL;
308 }
309 if (other->ob_size == 0)
310 return 1; /* Edge case */
311 for (i = 0; i + other->ob_size <= self->ob_size; i++) {
312 /* XXX Yeah, yeah, lots of optimizations possible... */
313 if (memcmp(self->ob_bytes + i, other->ob_bytes, other->ob_size) == 0)
314 return 1;
315 }
316 return 0;
317}
318
319static int
320bytes_contains(PyBytesObject *self, PyObject *value)
321{
322 Py_ssize_t ival;
323
324 if (PyBytes_Check(value))
325 return bytes_substring(self, (PyBytesObject *)value);
326
Thomas Woutersd204a712006-08-22 13:41:17 +0000327 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossum13e57212006-04-27 22:54:26 +0000328 if (ival == -1 && PyErr_Occurred())
329 return -1;
Guido van Rossum13e57212006-04-27 22:54:26 +0000330 if (ival < 0 || ival >= 256) {
331 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
332 return -1;
333 }
334
335 return memchr(self->ob_bytes, ival, self->ob_size) != NULL;
336}
337
338static PyObject *
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000339bytes_getitem(PyBytesObject *self, Py_ssize_t i)
340{
341 if (i < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000342 i += self->ob_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000343 if (i < 0 || i >= self->ob_size) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000344 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
345 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000346 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000347 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
348}
349
350static PyObject *
Thomas Wouters376446d2006-12-19 08:30:14 +0000351bytes_subscript(PyBytesObject *self, PyObject *item)
Guido van Rossumd624f182006-04-24 13:47:05 +0000352{
Thomas Wouters376446d2006-12-19 08:30:14 +0000353 if (PyIndex_Check(item)) {
354 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000355
Thomas Wouters376446d2006-12-19 08:30:14 +0000356 if (i == -1 && PyErr_Occurred())
357 return NULL;
358
359 if (i < 0)
360 i += PyBytes_GET_SIZE(self);
361
362 if (i < 0 || i >= self->ob_size) {
363 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
364 return NULL;
365 }
366 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
367 }
368 else if (PySlice_Check(item)) {
369 Py_ssize_t start, stop, step, slicelength, cur, i;
370 if (PySlice_GetIndicesEx((PySliceObject *)item,
371 PyBytes_GET_SIZE(self),
372 &start, &stop, &step, &slicelength) < 0) {
373 return NULL;
374 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000375
Thomas Wouters376446d2006-12-19 08:30:14 +0000376 if (slicelength <= 0)
377 return PyBytes_FromStringAndSize("", 0);
378 else if (step == 1) {
379 return PyBytes_FromStringAndSize(self->ob_bytes + start,
380 slicelength);
381 }
382 else {
383 char *source_buf = PyBytes_AS_STRING(self);
384 char *result_buf = (char *)PyMem_Malloc(slicelength);
385 PyObject *result;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000386
Thomas Wouters376446d2006-12-19 08:30:14 +0000387 if (result_buf == NULL)
388 return PyErr_NoMemory();
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000389
Thomas Wouters376446d2006-12-19 08:30:14 +0000390 for (cur = start, i = 0; i < slicelength;
391 cur += step, i++) {
392 result_buf[i] = source_buf[cur];
393 }
394 result = PyBytes_FromStringAndSize(result_buf, slicelength);
395 PyMem_Free(result_buf);
396 return result;
397 }
398 }
399 else {
400 PyErr_SetString(PyExc_TypeError, "bytes indices must be integers");
401 return NULL;
402 }
403}
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000404
Guido van Rossumd624f182006-04-24 13:47:05 +0000405static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000406bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
Guido van Rossumd624f182006-04-24 13:47:05 +0000407 PyObject *values)
408{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000409 Py_ssize_t avail, needed;
410 void *bytes;
Guido van Rossumd624f182006-04-24 13:47:05 +0000411
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000412 if (values == (PyObject *)self) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000413 /* Make a copy an call this function recursively */
414 int err;
415 values = PyBytes_FromObject(values);
416 if (values == NULL)
417 return -1;
418 err = bytes_setslice(self, lo, hi, values);
419 Py_DECREF(values);
420 return err;
421 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000422 if (values == NULL) {
423 /* del b[lo:hi] */
424 bytes = NULL;
425 needed = 0;
426 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000427 else {
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000428 needed = _getbuffer(values, &bytes);
429 if (needed < 0) {
430 PyErr_Format(PyExc_TypeError,
431 "can't set bytes slice from %.100s",
432 values->ob_type->tp_name);
433 return -1;
434 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000435 }
436
437 if (lo < 0)
438 lo = 0;
Thomas Wouters9a6e62b2006-08-23 23:20:29 +0000439 if (hi < lo)
440 hi = lo;
Guido van Rossumd624f182006-04-24 13:47:05 +0000441 if (hi > self->ob_size)
442 hi = self->ob_size;
443
444 avail = hi - lo;
445 if (avail < 0)
446 lo = hi = avail = 0;
447
448 if (avail != needed) {
449 if (avail > needed) {
450 /*
451 0 lo hi old_size
452 | |<----avail----->|<-----tomove------>|
453 | |<-needed->|<-----tomove------>|
454 0 lo new_hi new_size
455 */
456 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
457 self->ob_size - hi);
458 }
Thomas Wouters376446d2006-12-19 08:30:14 +0000459 if (PyBytes_Resize((PyObject *)self,
Guido van Rossumd624f182006-04-24 13:47:05 +0000460 self->ob_size + needed - avail) < 0)
461 return -1;
462 if (avail < needed) {
463 /*
464 0 lo hi old_size
465 | |<-avail->|<-----tomove------>|
466 | |<----needed---->|<-----tomove------>|
467 0 lo new_hi new_size
468 */
469 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
470 self->ob_size - lo - needed);
471 }
472 }
473
474 if (needed > 0)
475 memcpy(self->ob_bytes + lo, bytes, needed);
476
477 return 0;
478}
479
480static int
481bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
482{
483 Py_ssize_t ival;
484
485 if (i < 0)
486 i += self->ob_size;
487
488 if (i < 0 || i >= self->ob_size) {
489 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
490 return -1;
491 }
492
493 if (value == NULL)
494 return bytes_setslice(self, i, i+1, NULL);
495
Thomas Woutersd204a712006-08-22 13:41:17 +0000496 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000497 if (ival == -1 && PyErr_Occurred())
498 return -1;
499
500 if (ival < 0 || ival >= 256) {
501 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
502 return -1;
503 }
504
505 self->ob_bytes[i] = ival;
506 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000507}
508
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000509static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000510bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
511{
512 Py_ssize_t start, stop, step, slicelen, needed;
513 char *bytes;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000514
Thomas Wouters376446d2006-12-19 08:30:14 +0000515 if (PyIndex_Check(item)) {
516 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
517
518 if (i == -1 && PyErr_Occurred())
519 return -1;
520
521 if (i < 0)
522 i += PyBytes_GET_SIZE(self);
523
524 if (i < 0 || i >= self->ob_size) {
525 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
526 return -1;
527 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000528
Thomas Wouters376446d2006-12-19 08:30:14 +0000529 if (values == NULL) {
530 /* Fall through to slice assignment */
531 start = i;
532 stop = i + 1;
533 step = 1;
534 slicelen = 1;
535 }
536 else {
537 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
538 if (ival == -1 && PyErr_Occurred())
539 return -1;
540 if (ival < 0 || ival >= 256) {
541 PyErr_SetString(PyExc_ValueError,
542 "byte must be in range(0, 256)");
543 return -1;
544 }
545 self->ob_bytes[i] = (char)ival;
546 return 0;
547 }
548 }
549 else if (PySlice_Check(item)) {
550 if (PySlice_GetIndicesEx((PySliceObject *)item,
551 PyBytes_GET_SIZE(self),
552 &start, &stop, &step, &slicelen) < 0) {
553 return -1;
554 }
555 }
556 else {
557 PyErr_SetString(PyExc_TypeError, "bytes indices must be integer");
558 return -1;
559 }
560
561 if (values == NULL) {
562 bytes = NULL;
563 needed = 0;
564 }
565 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
566 /* Make a copy an call this function recursively */
567 int err;
568 values = PyBytes_FromObject(values);
569 if (values == NULL)
570 return -1;
571 err = bytes_ass_subscript(self, item, values);
572 Py_DECREF(values);
573 return err;
574 }
575 else {
576 assert(PyBytes_Check(values));
577 bytes = ((PyBytesObject *)values)->ob_bytes;
578 needed = ((PyBytesObject *)values)->ob_size;
579 }
580 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
581 if ((step < 0 && start < stop) ||
582 (step > 0 && start > stop))
583 stop = start;
584 if (step == 1) {
585 if (slicelen != needed) {
586 if (slicelen > needed) {
587 /*
588 0 start stop old_size
589 | |<---slicelen--->|<-----tomove------>|
590 | |<-needed->|<-----tomove------>|
591 0 lo new_hi new_size
592 */
593 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
594 self->ob_size - stop);
595 }
596 if (PyBytes_Resize((PyObject *)self,
597 self->ob_size + needed - slicelen) < 0)
598 return -1;
599 if (slicelen < needed) {
600 /*
601 0 lo hi old_size
602 | |<-avail->|<-----tomove------>|
603 | |<----needed---->|<-----tomove------>|
604 0 lo new_hi new_size
605 */
606 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
607 self->ob_size - start - needed);
608 }
609 }
610
611 if (needed > 0)
612 memcpy(self->ob_bytes + start, bytes, needed);
613
614 return 0;
615 }
616 else {
617 if (needed == 0) {
618 /* Delete slice */
619 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000620
Thomas Wouters376446d2006-12-19 08:30:14 +0000621 if (step < 0) {
622 stop = start + 1;
623 start = stop + step * (slicelen - 1) - 1;
624 step = -step;
625 }
626 for (cur = start, i = 0;
627 i < slicelen; cur += step, i++) {
628 Py_ssize_t lim = step - 1;
629
630 if (cur + step >= PyBytes_GET_SIZE(self))
631 lim = PyBytes_GET_SIZE(self) - cur - 1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000632
Thomas Wouters376446d2006-12-19 08:30:14 +0000633 memmove(self->ob_bytes + cur - i,
634 self->ob_bytes + cur + 1, lim);
635 }
636 /* Move the tail of the bytes, in one chunk */
637 cur = start + slicelen*step;
638 if (cur < PyBytes_GET_SIZE(self)) {
639 memmove(self->ob_bytes + cur - slicelen,
640 self->ob_bytes + cur,
641 PyBytes_GET_SIZE(self) - cur);
642 }
643 if (PyBytes_Resize((PyObject *)self,
644 PyBytes_GET_SIZE(self) - slicelen) < 0)
645 return -1;
646
647 return 0;
648 }
649 else {
650 /* Assign slice */
651 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000652
Thomas Wouters376446d2006-12-19 08:30:14 +0000653 if (needed != slicelen) {
654 PyErr_Format(PyExc_ValueError,
655 "attempt to assign bytes of size %zd "
656 "to extended slice of size %zd",
657 needed, slicelen);
658 return -1;
659 }
660 for (cur = start, i = 0; i < slicelen; cur += step, i++)
661 self->ob_bytes[cur] = bytes[i];
662 return 0;
663 }
664 }
665}
666
667static int
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000668bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
669{
Guido van Rossumd624f182006-04-24 13:47:05 +0000670 static char *kwlist[] = {"source", "encoding", "errors", 0};
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000671 PyObject *arg = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +0000672 const char *encoding = NULL;
673 const char *errors = NULL;
674 Py_ssize_t count;
675 PyObject *it;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000676 PyObject *(*iternext)(PyObject *);
677
Guido van Rossuma0867f72006-05-05 04:34:18 +0000678 if (self->ob_size != 0) {
679 /* Empty previous contents (yes, do this first of all!) */
680 if (PyBytes_Resize((PyObject *)self, 0) < 0)
681 return -1;
682 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000683
Guido van Rossumd624f182006-04-24 13:47:05 +0000684 /* Parse arguments */
685 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
686 &arg, &encoding, &errors))
687 return -1;
688
689 /* Make a quick exit if no first argument */
690 if (arg == NULL) {
691 if (encoding != NULL || errors != NULL) {
692 PyErr_SetString(PyExc_TypeError,
693 "encoding or errors without sequence argument");
694 return -1;
695 }
696 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000697 }
698
Guido van Rossumd624f182006-04-24 13:47:05 +0000699 if (PyUnicode_Check(arg)) {
700 /* Encode via the codec registry */
701 PyObject *encoded;
702 char *bytes;
703 Py_ssize_t size;
704 if (encoding == NULL)
705 encoding = PyUnicode_GetDefaultEncoding();
706 encoded = PyCodec_Encode(arg, encoding, errors);
707 if (encoded == NULL)
708 return -1;
709 if (!PyString_Check(encoded)) {
710 PyErr_Format(PyExc_TypeError,
711 "encoder did not return a string object (type=%.400s)",
712 encoded->ob_type->tp_name);
713 Py_DECREF(encoded);
714 return -1;
715 }
716 bytes = PyString_AS_STRING(encoded);
717 size = PyString_GET_SIZE(encoded);
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000718 if (size < self->ob_alloc) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000719 self->ob_size = size;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000720 self->ob_bytes[self->ob_size] = '\0'; /* Trailing null byte */
721 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000722 else if (PyBytes_Resize((PyObject *)self, size) < 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000723 Py_DECREF(encoded);
724 return -1;
725 }
726 memcpy(self->ob_bytes, bytes, size);
727 Py_DECREF(encoded);
728 return 0;
729 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000730
Guido van Rossumd624f182006-04-24 13:47:05 +0000731 /* If it's not unicode, there can't be encoding or errors */
732 if (encoding != NULL || errors != NULL) {
733 PyErr_SetString(PyExc_TypeError,
734 "encoding or errors without a string argument");
735 return -1;
736 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000737
Guido van Rossumd624f182006-04-24 13:47:05 +0000738 /* Is it an int? */
Thomas Woutersd204a712006-08-22 13:41:17 +0000739 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000740 if (count == -1 && PyErr_Occurred())
741 PyErr_Clear();
742 else {
743 if (count < 0) {
744 PyErr_SetString(PyExc_ValueError, "negative count");
745 return -1;
746 }
747 if (count > 0) {
748 if (PyBytes_Resize((PyObject *)self, count))
749 return -1;
750 memset(self->ob_bytes, 0, count);
751 }
752 return 0;
753 }
754
755 if (PyObject_CheckReadBuffer(arg)) {
756 const void *bytes;
757 Py_ssize_t size;
758 if (PyObject_AsReadBuffer(arg, &bytes, &size) < 0)
759 return -1;
760 if (PyBytes_Resize((PyObject *)self, size) < 0)
761 return -1;
762 memcpy(self->ob_bytes, bytes, size);
763 return 0;
764 }
765
766 /* XXX Optimize this if the arguments is a list, tuple */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000767
768 /* Get the iterator */
769 it = PyObject_GetIter(arg);
770 if (it == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000771 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000772 iternext = *it->ob_type->tp_iternext;
773
774 /* Run the iterator to exhaustion */
775 for (;;) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000776 PyObject *item;
777 Py_ssize_t value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000778
Guido van Rossumd624f182006-04-24 13:47:05 +0000779 /* Get the next item */
780 item = iternext(it);
781 if (item == NULL) {
782 if (PyErr_Occurred()) {
783 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
784 goto error;
785 PyErr_Clear();
786 }
787 break;
788 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000789
Guido van Rossumd624f182006-04-24 13:47:05 +0000790 /* Interpret it as an int (__index__) */
Thomas Woutersd204a712006-08-22 13:41:17 +0000791 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000792 Py_DECREF(item);
793 if (value == -1 && PyErr_Occurred())
794 goto error;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000795
Guido van Rossumd624f182006-04-24 13:47:05 +0000796 /* Range check */
797 if (value < 0 || value >= 256) {
798 PyErr_SetString(PyExc_ValueError,
799 "bytes must be in range(0, 256)");
800 goto error;
801 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000802
Guido van Rossumd624f182006-04-24 13:47:05 +0000803 /* Append the byte */
Guido van Rossuma0867f72006-05-05 04:34:18 +0000804 if (self->ob_size < self->ob_alloc)
805 self->ob_size++;
806 else if (PyBytes_Resize((PyObject *)self, self->ob_size+1) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000807 goto error;
808 self->ob_bytes[self->ob_size-1] = value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000809 }
810
811 /* Clean up and return success */
812 Py_DECREF(it);
813 return 0;
814
815 error:
816 /* Error handling when it != NULL */
817 Py_DECREF(it);
818 return -1;
819}
820
Georg Brandlee91be42007-02-24 19:41:35 +0000821/* Mostly copied from string_repr, but without the
822 "smart quote" functionality. */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000823static PyObject *
824bytes_repr(PyBytesObject *self)
825{
Georg Brandlee91be42007-02-24 19:41:35 +0000826 size_t newsize = 3 + 4 * self->ob_size;
827 PyObject *v;
828 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != self->ob_size) {
829 PyErr_SetString(PyExc_OverflowError,
830 "bytes object is too large to make repr");
Guido van Rossumd624f182006-04-24 13:47:05 +0000831 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000832 }
Georg Brandlee91be42007-02-24 19:41:35 +0000833 v = PyString_FromStringAndSize((char *)NULL, newsize);
834 if (v == NULL) {
835 return NULL;
836 }
837 else {
838 register Py_ssize_t i;
839 register char c;
840 register char *p;
841 int quote = '\'';
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000842
Georg Brandlee91be42007-02-24 19:41:35 +0000843 p = PyString_AS_STRING(v);
844 *p++ = 'b';
845 *p++ = quote;
846 for (i = 0; i < self->ob_size; i++) {
847 /* There's at least enough room for a hex escape
848 and a closing quote. */
849 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
850 c = self->ob_bytes[i];
851 if (c == quote || c == '\\')
852 *p++ = '\\', *p++ = c;
853 else if (c == '\t')
854 *p++ = '\\', *p++ = 't';
855 else if (c == '\n')
856 *p++ = '\\', *p++ = 'n';
857 else if (c == '\r')
858 *p++ = '\\', *p++ = 'r';
859 else if (c == 0)
860 *p++ = '\\', *p++ = '0';
861 else if (c < ' ' || c >= 0x7f) {
862 /* For performance, we don't want to call
863 PyOS_snprintf here (extra layers of
864 function call). */
865 sprintf(p, "\\x%02x", c & 0xff);
866 p += 4;
867 }
868 else
869 *p++ = c;
870 }
871 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
872 *p++ = quote;
873 *p = '\0';
874 _PyString_Resize(
875 &v, (p - PyString_AS_STRING(v)));
876 return v;
877 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000878}
879
880static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000881bytes_str(PyBytesObject *self)
882{
883 return PyString_FromStringAndSize(self->ob_bytes, self->ob_size);
884}
885
886static PyObject *
Guido van Rossum343e97f2007-04-09 00:43:24 +0000887bytes_richcompare(PyObject *self, PyObject *other, int op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000888{
Guido van Rossum343e97f2007-04-09 00:43:24 +0000889 Py_ssize_t self_size, other_size;
890 void *self_bytes, *other_bytes;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000891 PyObject *res;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000892 Py_ssize_t minsize;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000893 int cmp;
894
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000895 /* Bytes can be compared to anything that supports the (binary) buffer
896 API. Except Unicode. */
Guido van Rossumebea9be2007-04-09 00:49:13 +0000897
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000898 self_size = _getbuffer(self, &self_bytes);
899 if (self_size < 0) {
Guido van Rossumebea9be2007-04-09 00:49:13 +0000900 Py_INCREF(Py_NotImplemented);
901 return Py_NotImplemented;
902 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000903
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000904 other_size = _getbuffer(other, &other_bytes);
905 if (other_size < 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000906 Py_INCREF(Py_NotImplemented);
907 return Py_NotImplemented;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000908 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000909
910 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000911 /* Shortcut: if the lengths differ, the objects differ */
912 cmp = (op == Py_NE);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000913 }
914 else {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000915 minsize = self_size;
916 if (other_size < minsize)
917 minsize = other_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000918
Guido van Rossum343e97f2007-04-09 00:43:24 +0000919 cmp = memcmp(self_bytes, other_bytes, minsize);
Guido van Rossumd624f182006-04-24 13:47:05 +0000920 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000921
Guido van Rossumd624f182006-04-24 13:47:05 +0000922 if (cmp == 0) {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000923 if (self_size < other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +0000924 cmp = -1;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000925 else if (self_size > other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +0000926 cmp = 1;
927 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000928
Guido van Rossumd624f182006-04-24 13:47:05 +0000929 switch (op) {
930 case Py_LT: cmp = cmp < 0; break;
931 case Py_LE: cmp = cmp <= 0; break;
932 case Py_EQ: cmp = cmp == 0; break;
933 case Py_NE: cmp = cmp != 0; break;
934 case Py_GT: cmp = cmp > 0; break;
935 case Py_GE: cmp = cmp >= 0; break;
936 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000937 }
938
939 res = cmp ? Py_True : Py_False;
940 Py_INCREF(res);
941 return res;
942}
943
944static void
945bytes_dealloc(PyBytesObject *self)
946{
Guido van Rossumd624f182006-04-24 13:47:05 +0000947 if (self->ob_bytes != 0) {
948 PyMem_Free(self->ob_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000949 }
950 self->ob_type->tp_free((PyObject *)self);
951}
952
Guido van Rossumd624f182006-04-24 13:47:05 +0000953static Py_ssize_t
954bytes_getbuffer(PyBytesObject *self, Py_ssize_t index, const void **ptr)
955{
956 if (index != 0) {
957 PyErr_SetString(PyExc_SystemError,
Neal Norwitz6968b052007-02-27 19:02:19 +0000958 "accessing non-existent bytes segment");
Guido van Rossumd624f182006-04-24 13:47:05 +0000959 return -1;
960 }
961 *ptr = (void *)self->ob_bytes;
962 return self->ob_size;
963}
964
965static Py_ssize_t
966bytes_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
967{
968 if (lenp)
969 *lenp = self->ob_size;
970 return 1;
971}
972
Neal Norwitz6968b052007-02-27 19:02:19 +0000973
974
975/* -------------------------------------------------------------------- */
976/* Methods */
977
978#define STRINGLIB_CHAR char
979#define STRINGLIB_CMP memcmp
980#define STRINGLIB_LEN PyBytes_GET_SIZE
981#define STRINGLIB_NEW PyBytes_FromStringAndSize
982#define STRINGLIB_EMPTY nullbytes
983
984#include "stringlib/fastsearch.h"
985#include "stringlib/count.h"
986#include "stringlib/find.h"
987#include "stringlib/partition.h"
988
989
990/* The following Py_LOCAL_INLINE and Py_LOCAL functions
991were copied from the old char* style string object. */
992
993Py_LOCAL_INLINE(void)
994_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
995{
996 if (*end > len)
997 *end = len;
998 else if (*end < 0)
999 *end += len;
1000 if (*end < 0)
1001 *end = 0;
1002 if (*start < 0)
1003 *start += len;
1004 if (*start < 0)
1005 *start = 0;
1006}
1007
1008
1009Py_LOCAL_INLINE(Py_ssize_t)
1010bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
1011{
1012 PyObject *subobj;
1013 const char *sub;
1014 Py_ssize_t sub_len;
1015 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1016
1017 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1018 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1019 return -2;
1020 if (PyBytes_Check(subobj)) {
1021 sub = PyBytes_AS_STRING(subobj);
1022 sub_len = PyBytes_GET_SIZE(subobj);
1023 }
1024 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1025 /* XXX - the "expected a character buffer object" is pretty
1026 confusing for a non-expert. remap to something else ? */
1027 return -2;
1028
1029 if (dir > 0)
1030 return stringlib_find_slice(
1031 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1032 sub, sub_len, start, end);
1033 else
1034 return stringlib_rfind_slice(
1035 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1036 sub, sub_len, start, end);
1037}
1038
1039
1040PyDoc_STRVAR(find__doc__,
1041"B.find(sub [,start [,end]]) -> int\n\
1042\n\
1043Return the lowest index in B where subsection sub is found,\n\
1044such that sub is contained within s[start,end]. Optional\n\
1045arguments start and end are interpreted as in slice notation.\n\
1046\n\
1047Return -1 on failure.");
1048
1049static PyObject *
1050bytes_find(PyBytesObject *self, PyObject *args)
1051{
1052 Py_ssize_t result = bytes_find_internal(self, args, +1);
1053 if (result == -2)
1054 return NULL;
1055 return PyInt_FromSsize_t(result);
1056}
1057
1058PyDoc_STRVAR(count__doc__,
1059"B.count(sub[, start[, end]]) -> int\n\
1060\n\
1061Return the number of non-overlapping occurrences of subsection sub in\n\
1062bytes B[start:end]. Optional arguments start and end are interpreted\n\
1063as in slice notation.");
1064
1065static PyObject *
1066bytes_count(PyBytesObject *self, PyObject *args)
1067{
1068 PyObject *sub_obj;
1069 const char *str = PyBytes_AS_STRING(self), *sub;
1070 Py_ssize_t sub_len;
1071 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1072
1073 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1074 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1075 return NULL;
1076
1077 if (PyBytes_Check(sub_obj)) {
1078 sub = PyBytes_AS_STRING(sub_obj);
1079 sub_len = PyBytes_GET_SIZE(sub_obj);
1080 }
1081 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1082 return NULL;
1083
1084 _adjust_indices(&start, &end, PyString_GET_SIZE(self));
1085
1086 return PyInt_FromSsize_t(
1087 stringlib_count(str + start, end - start, sub, sub_len)
1088 );
1089}
1090
1091
1092PyDoc_STRVAR(index__doc__,
1093"B.index(sub [,start [,end]]) -> int\n\
1094\n\
1095Like B.find() but raise ValueError when the subsection is not found.");
1096
1097static PyObject *
1098bytes_index(PyBytesObject *self, PyObject *args)
1099{
1100 Py_ssize_t result = bytes_find_internal(self, args, +1);
1101 if (result == -2)
1102 return NULL;
1103 if (result == -1) {
1104 PyErr_SetString(PyExc_ValueError,
1105 "subsection not found");
1106 return NULL;
1107 }
1108 return PyInt_FromSsize_t(result);
1109}
1110
1111
1112PyDoc_STRVAR(rfind__doc__,
1113"B.rfind(sub [,start [,end]]) -> int\n\
1114\n\
1115Return the highest index in B where subsection sub is found,\n\
1116such that sub is contained within s[start,end]. Optional\n\
1117arguments start and end are interpreted as in slice notation.\n\
1118\n\
1119Return -1 on failure.");
1120
1121static PyObject *
1122bytes_rfind(PyBytesObject *self, PyObject *args)
1123{
1124 Py_ssize_t result = bytes_find_internal(self, args, -1);
1125 if (result == -2)
1126 return NULL;
1127 return PyInt_FromSsize_t(result);
1128}
1129
1130
1131PyDoc_STRVAR(rindex__doc__,
1132"B.rindex(sub [,start [,end]]) -> int\n\
1133\n\
1134Like B.rfind() but raise ValueError when the subsection is not found.");
1135
1136static PyObject *
1137bytes_rindex(PyBytesObject *self, PyObject *args)
1138{
1139 Py_ssize_t result = bytes_find_internal(self, args, -1);
1140 if (result == -2)
1141 return NULL;
1142 if (result == -1) {
1143 PyErr_SetString(PyExc_ValueError,
1144 "subsection not found");
1145 return NULL;
1146 }
1147 return PyInt_FromSsize_t(result);
1148}
1149
1150
1151/* Matches the end (direction >= 0) or start (direction < 0) of self
1152 * against substr, using the start and end arguments. Returns
1153 * -1 on error, 0 if not found and 1 if found.
1154 */
1155Py_LOCAL(int)
1156_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
1157 Py_ssize_t end, int direction)
1158{
1159 Py_ssize_t len = PyBytes_GET_SIZE(self);
1160 Py_ssize_t slen;
1161 const char* sub;
1162 const char* str;
1163
1164 if (PyBytes_Check(substr)) {
1165 sub = PyBytes_AS_STRING(substr);
1166 slen = PyBytes_GET_SIZE(substr);
1167 }
1168 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
1169 return -1;
1170 str = PyBytes_AS_STRING(self);
1171
1172 _adjust_indices(&start, &end, len);
1173
1174 if (direction < 0) {
1175 /* startswith */
1176 if (start+slen > len)
1177 return 0;
1178 } else {
1179 /* endswith */
1180 if (end-start < slen || start > len)
1181 return 0;
1182
1183 if (end-slen > start)
1184 start = end - slen;
1185 }
1186 if (end-start >= slen)
1187 return ! memcmp(str+start, sub, slen);
1188 return 0;
1189}
1190
1191
1192PyDoc_STRVAR(startswith__doc__,
1193"B.startswith(prefix[, start[, end]]) -> bool\n\
1194\n\
1195Return True if B starts with the specified prefix, False otherwise.\n\
1196With optional start, test B beginning at that position.\n\
1197With optional end, stop comparing B at that position.\n\
1198prefix can also be a tuple of strings to try.");
1199
1200static PyObject *
1201bytes_startswith(PyBytesObject *self, PyObject *args)
1202{
1203 Py_ssize_t start = 0;
1204 Py_ssize_t end = PY_SSIZE_T_MAX;
1205 PyObject *subobj;
1206 int result;
1207
1208 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1209 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1210 return NULL;
1211 if (PyTuple_Check(subobj)) {
1212 Py_ssize_t i;
1213 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1214 result = _bytes_tailmatch(self,
1215 PyTuple_GET_ITEM(subobj, i),
1216 start, end, -1);
1217 if (result == -1)
1218 return NULL;
1219 else if (result) {
1220 Py_RETURN_TRUE;
1221 }
1222 }
1223 Py_RETURN_FALSE;
1224 }
1225 result = _bytes_tailmatch(self, subobj, start, end, -1);
1226 if (result == -1)
1227 return NULL;
1228 else
1229 return PyBool_FromLong(result);
1230}
1231
1232PyDoc_STRVAR(endswith__doc__,
1233"B.endswith(suffix[, start[, end]]) -> bool\n\
1234\n\
1235Return True if B ends with the specified suffix, False otherwise.\n\
1236With optional start, test B beginning at that position.\n\
1237With optional end, stop comparing B at that position.\n\
1238suffix can also be a tuple of strings to try.");
1239
1240static PyObject *
1241bytes_endswith(PyBytesObject *self, PyObject *args)
1242{
1243 Py_ssize_t start = 0;
1244 Py_ssize_t end = PY_SSIZE_T_MAX;
1245 PyObject *subobj;
1246 int result;
1247
1248 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1249 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1250 return NULL;
1251 if (PyTuple_Check(subobj)) {
1252 Py_ssize_t i;
1253 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1254 result = _bytes_tailmatch(self,
1255 PyTuple_GET_ITEM(subobj, i),
1256 start, end, +1);
1257 if (result == -1)
1258 return NULL;
1259 else if (result) {
1260 Py_RETURN_TRUE;
1261 }
1262 }
1263 Py_RETURN_FALSE;
1264 }
1265 result = _bytes_tailmatch(self, subobj, start, end, +1);
1266 if (result == -1)
1267 return NULL;
1268 else
1269 return PyBool_FromLong(result);
1270}
1271
1272
1273
1274PyDoc_STRVAR(translate__doc__,
1275"B.translate(table [,deletechars]) -> bytes\n\
1276\n\
1277Return a copy of the bytes B, where all characters occurring\n\
1278in the optional argument deletechars are removed, and the\n\
1279remaining characters have been mapped through the given\n\
1280translation table, which must be a bytes of length 256.");
1281
1282static PyObject *
1283bytes_translate(PyBytesObject *self, PyObject *args)
1284{
1285 register char *input, *output;
1286 register const char *table;
1287 register Py_ssize_t i, c, changed = 0;
1288 PyObject *input_obj = (PyObject*)self;
1289 const char *table1, *output_start, *del_table=NULL;
1290 Py_ssize_t inlen, tablen, dellen = 0;
1291 PyObject *result;
1292 int trans_table[256];
1293 PyObject *tableobj, *delobj = NULL;
1294
1295 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1296 &tableobj, &delobj))
1297 return NULL;
1298
1299 if (PyBytes_Check(tableobj)) {
1300 table1 = PyBytes_AS_STRING(tableobj);
1301 tablen = PyBytes_GET_SIZE(tableobj);
1302 }
1303 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1304 return NULL;
1305
1306 if (tablen != 256) {
1307 PyErr_SetString(PyExc_ValueError,
1308 "translation table must be 256 characters long");
1309 return NULL;
1310 }
1311
1312 if (delobj != NULL) {
1313 if (PyBytes_Check(delobj)) {
1314 del_table = PyBytes_AS_STRING(delobj);
1315 dellen = PyBytes_GET_SIZE(delobj);
1316 }
1317 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1318 return NULL;
1319 }
1320 else {
1321 del_table = NULL;
1322 dellen = 0;
1323 }
1324
1325 table = table1;
1326 inlen = PyBytes_GET_SIZE(input_obj);
1327 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1328 if (result == NULL)
1329 return NULL;
1330 output_start = output = PyBytes_AsString(result);
1331 input = PyBytes_AS_STRING(input_obj);
1332
1333 if (dellen == 0) {
1334 /* If no deletions are required, use faster code */
1335 for (i = inlen; --i >= 0; ) {
1336 c = Py_CHARMASK(*input++);
1337 if (Py_CHARMASK((*output++ = table[c])) != c)
1338 changed = 1;
1339 }
1340 if (changed || !PyBytes_CheckExact(input_obj))
1341 return result;
1342 Py_DECREF(result);
1343 Py_INCREF(input_obj);
1344 return input_obj;
1345 }
1346
1347 for (i = 0; i < 256; i++)
1348 trans_table[i] = Py_CHARMASK(table[i]);
1349
1350 for (i = 0; i < dellen; i++)
1351 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1352
1353 for (i = inlen; --i >= 0; ) {
1354 c = Py_CHARMASK(*input++);
1355 if (trans_table[c] != -1)
1356 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1357 continue;
1358 changed = 1;
1359 }
1360 if (!changed && PyBytes_CheckExact(input_obj)) {
1361 Py_DECREF(result);
1362 Py_INCREF(input_obj);
1363 return input_obj;
1364 }
1365 /* Fix the size of the resulting string */
1366 if (inlen > 0)
1367 PyBytes_Resize(result, output - output_start);
1368 return result;
1369}
1370
1371
1372#define FORWARD 1
1373#define REVERSE -1
1374
1375/* find and count characters and substrings */
1376
1377#define findchar(target, target_len, c) \
1378 ((char *)memchr((const void *)(target), c, target_len))
1379
1380/* Don't call if length < 2 */
1381#define Py_STRING_MATCH(target, offset, pattern, length) \
1382 (target[offset] == pattern[0] && \
1383 target[offset+length-1] == pattern[length-1] && \
1384 !memcmp(target+offset+1, pattern+1, length-2) )
1385
1386
1387/* Bytes ops must return a string. */
1388/* If the object is subclass of bytes, create a copy */
1389Py_LOCAL(PyBytesObject *)
1390return_self(PyBytesObject *self)
1391{
1392 if (PyBytes_CheckExact(self)) {
1393 Py_INCREF(self);
1394 return (PyBytesObject *)self;
1395 }
1396 return (PyBytesObject *)PyBytes_FromStringAndSize(
1397 PyBytes_AS_STRING(self),
1398 PyBytes_GET_SIZE(self));
1399}
1400
1401Py_LOCAL_INLINE(Py_ssize_t)
1402countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1403{
1404 Py_ssize_t count=0;
1405 const char *start=target;
1406 const char *end=target+target_len;
1407
1408 while ( (start=findchar(start, end-start, c)) != NULL ) {
1409 count++;
1410 if (count >= maxcount)
1411 break;
1412 start += 1;
1413 }
1414 return count;
1415}
1416
1417Py_LOCAL(Py_ssize_t)
1418findstring(const char *target, Py_ssize_t target_len,
1419 const char *pattern, Py_ssize_t pattern_len,
1420 Py_ssize_t start,
1421 Py_ssize_t end,
1422 int direction)
1423{
1424 if (start < 0) {
1425 start += target_len;
1426 if (start < 0)
1427 start = 0;
1428 }
1429 if (end > target_len) {
1430 end = target_len;
1431 } else if (end < 0) {
1432 end += target_len;
1433 if (end < 0)
1434 end = 0;
1435 }
1436
1437 /* zero-length substrings always match at the first attempt */
1438 if (pattern_len == 0)
1439 return (direction > 0) ? start : end;
1440
1441 end -= pattern_len;
1442
1443 if (direction < 0) {
1444 for (; end >= start; end--)
1445 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1446 return end;
1447 } else {
1448 for (; start <= end; start++)
1449 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1450 return start;
1451 }
1452 return -1;
1453}
1454
1455Py_LOCAL_INLINE(Py_ssize_t)
1456countstring(const char *target, Py_ssize_t target_len,
1457 const char *pattern, Py_ssize_t pattern_len,
1458 Py_ssize_t start,
1459 Py_ssize_t end,
1460 int direction, Py_ssize_t maxcount)
1461{
1462 Py_ssize_t count=0;
1463
1464 if (start < 0) {
1465 start += target_len;
1466 if (start < 0)
1467 start = 0;
1468 }
1469 if (end > target_len) {
1470 end = target_len;
1471 } else if (end < 0) {
1472 end += target_len;
1473 if (end < 0)
1474 end = 0;
1475 }
1476
1477 /* zero-length substrings match everywhere */
1478 if (pattern_len == 0 || maxcount == 0) {
1479 if (target_len+1 < maxcount)
1480 return target_len+1;
1481 return maxcount;
1482 }
1483
1484 end -= pattern_len;
1485 if (direction < 0) {
1486 for (; (end >= start); end--)
1487 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1488 count++;
1489 if (--maxcount <= 0) break;
1490 end -= pattern_len-1;
1491 }
1492 } else {
1493 for (; (start <= end); start++)
1494 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1495 count++;
1496 if (--maxcount <= 0)
1497 break;
1498 start += pattern_len-1;
1499 }
1500 }
1501 return count;
1502}
1503
1504
1505/* Algorithms for different cases of string replacement */
1506
1507/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1508Py_LOCAL(PyBytesObject *)
1509replace_interleave(PyBytesObject *self,
1510 const char *to_s, Py_ssize_t to_len,
1511 Py_ssize_t maxcount)
1512{
1513 char *self_s, *result_s;
1514 Py_ssize_t self_len, result_len;
1515 Py_ssize_t count, i, product;
1516 PyBytesObject *result;
1517
1518 self_len = PyBytes_GET_SIZE(self);
1519
1520 /* 1 at the end plus 1 after every character */
1521 count = self_len+1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001522 if (maxcount < count)
Neal Norwitz6968b052007-02-27 19:02:19 +00001523 count = maxcount;
1524
1525 /* Check for overflow */
1526 /* result_len = count * to_len + self_len; */
1527 product = count * to_len;
1528 if (product / to_len != count) {
1529 PyErr_SetString(PyExc_OverflowError,
1530 "replace string is too long");
1531 return NULL;
1532 }
1533 result_len = product + self_len;
1534 if (result_len < 0) {
1535 PyErr_SetString(PyExc_OverflowError,
1536 "replace string is too long");
1537 return NULL;
1538 }
1539
1540 if (! (result = (PyBytesObject *)
1541 PyBytes_FromStringAndSize(NULL, result_len)) )
1542 return NULL;
1543
1544 self_s = PyBytes_AS_STRING(self);
1545 result_s = PyBytes_AS_STRING(result);
1546
1547 /* TODO: special case single character, which doesn't need memcpy */
1548
1549 /* Lay the first one down (guaranteed this will occur) */
1550 Py_MEMCPY(result_s, to_s, to_len);
1551 result_s += to_len;
1552 count -= 1;
1553
1554 for (i=0; i<count; i++) {
1555 *result_s++ = *self_s++;
1556 Py_MEMCPY(result_s, to_s, to_len);
1557 result_s += to_len;
1558 }
1559
1560 /* Copy the rest of the original string */
1561 Py_MEMCPY(result_s, self_s, self_len-i);
1562
1563 return result;
1564}
1565
1566/* Special case for deleting a single character */
1567/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1568Py_LOCAL(PyBytesObject *)
1569replace_delete_single_character(PyBytesObject *self,
1570 char from_c, Py_ssize_t maxcount)
1571{
1572 char *self_s, *result_s;
1573 char *start, *next, *end;
1574 Py_ssize_t self_len, result_len;
1575 Py_ssize_t count;
1576 PyBytesObject *result;
1577
1578 self_len = PyBytes_GET_SIZE(self);
1579 self_s = PyBytes_AS_STRING(self);
1580
1581 count = countchar(self_s, self_len, from_c, maxcount);
1582 if (count == 0) {
1583 return return_self(self);
1584 }
1585
1586 result_len = self_len - count; /* from_len == 1 */
1587 assert(result_len>=0);
1588
1589 if ( (result = (PyBytesObject *)
1590 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1591 return NULL;
1592 result_s = PyBytes_AS_STRING(result);
1593
1594 start = self_s;
1595 end = self_s + self_len;
1596 while (count-- > 0) {
1597 next = findchar(start, end-start, from_c);
1598 if (next == NULL)
1599 break;
1600 Py_MEMCPY(result_s, start, next-start);
1601 result_s += (next-start);
1602 start = next+1;
1603 }
1604 Py_MEMCPY(result_s, start, end-start);
1605
1606 return result;
1607}
1608
1609/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1610
1611Py_LOCAL(PyBytesObject *)
1612replace_delete_substring(PyBytesObject *self,
1613 const char *from_s, Py_ssize_t from_len,
1614 Py_ssize_t maxcount)
1615{
1616 char *self_s, *result_s;
1617 char *start, *next, *end;
1618 Py_ssize_t self_len, result_len;
1619 Py_ssize_t count, offset;
1620 PyBytesObject *result;
1621
1622 self_len = PyBytes_GET_SIZE(self);
1623 self_s = PyBytes_AS_STRING(self);
1624
1625 count = countstring(self_s, self_len,
1626 from_s, from_len,
1627 0, self_len, 1,
1628 maxcount);
1629
1630 if (count == 0) {
1631 /* no matches */
1632 return return_self(self);
1633 }
1634
1635 result_len = self_len - (count * from_len);
1636 assert (result_len>=0);
1637
1638 if ( (result = (PyBytesObject *)
1639 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1640 return NULL;
1641
1642 result_s = PyBytes_AS_STRING(result);
1643
1644 start = self_s;
1645 end = self_s + self_len;
1646 while (count-- > 0) {
1647 offset = findstring(start, end-start,
1648 from_s, from_len,
1649 0, end-start, FORWARD);
1650 if (offset == -1)
1651 break;
1652 next = start + offset;
1653
1654 Py_MEMCPY(result_s, start, next-start);
1655
1656 result_s += (next-start);
1657 start = next+from_len;
1658 }
1659 Py_MEMCPY(result_s, start, end-start);
1660 return result;
1661}
1662
1663/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1664Py_LOCAL(PyBytesObject *)
1665replace_single_character_in_place(PyBytesObject *self,
1666 char from_c, char to_c,
1667 Py_ssize_t maxcount)
1668{
1669 char *self_s, *result_s, *start, *end, *next;
1670 Py_ssize_t self_len;
1671 PyBytesObject *result;
1672
1673 /* The result string will be the same size */
1674 self_s = PyBytes_AS_STRING(self);
1675 self_len = PyBytes_GET_SIZE(self);
1676
1677 next = findchar(self_s, self_len, from_c);
1678
1679 if (next == NULL) {
1680 /* No matches; return the original bytes */
1681 return return_self(self);
1682 }
1683
1684 /* Need to make a new bytes */
1685 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1686 if (result == NULL)
1687 return NULL;
1688 result_s = PyBytes_AS_STRING(result);
1689 Py_MEMCPY(result_s, self_s, self_len);
1690
1691 /* change everything in-place, starting with this one */
1692 start = result_s + (next-self_s);
1693 *start = to_c;
1694 start++;
1695 end = result_s + self_len;
1696
1697 while (--maxcount > 0) {
1698 next = findchar(start, end-start, from_c);
1699 if (next == NULL)
1700 break;
1701 *next = to_c;
1702 start = next+1;
1703 }
1704
1705 return result;
1706}
1707
1708/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1709Py_LOCAL(PyBytesObject *)
1710replace_substring_in_place(PyBytesObject *self,
1711 const char *from_s, Py_ssize_t from_len,
1712 const char *to_s, Py_ssize_t to_len,
1713 Py_ssize_t maxcount)
1714{
1715 char *result_s, *start, *end;
1716 char *self_s;
1717 Py_ssize_t self_len, offset;
1718 PyBytesObject *result;
1719
1720 /* The result bytes will be the same size */
1721
1722 self_s = PyBytes_AS_STRING(self);
1723 self_len = PyBytes_GET_SIZE(self);
1724
1725 offset = findstring(self_s, self_len,
1726 from_s, from_len,
1727 0, self_len, FORWARD);
1728 if (offset == -1) {
1729 /* No matches; return the original bytes */
1730 return return_self(self);
1731 }
1732
1733 /* Need to make a new bytes */
1734 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1735 if (result == NULL)
1736 return NULL;
1737 result_s = PyBytes_AS_STRING(result);
1738 Py_MEMCPY(result_s, self_s, self_len);
1739
1740 /* change everything in-place, starting with this one */
1741 start = result_s + offset;
1742 Py_MEMCPY(start, to_s, from_len);
1743 start += from_len;
1744 end = result_s + self_len;
1745
1746 while ( --maxcount > 0) {
1747 offset = findstring(start, end-start,
1748 from_s, from_len,
1749 0, end-start, FORWARD);
1750 if (offset==-1)
1751 break;
1752 Py_MEMCPY(start+offset, to_s, from_len);
1753 start += offset+from_len;
1754 }
1755
1756 return result;
1757}
1758
1759/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1760Py_LOCAL(PyBytesObject *)
1761replace_single_character(PyBytesObject *self,
1762 char from_c,
1763 const char *to_s, Py_ssize_t to_len,
1764 Py_ssize_t maxcount)
1765{
1766 char *self_s, *result_s;
1767 char *start, *next, *end;
1768 Py_ssize_t self_len, result_len;
1769 Py_ssize_t count, product;
1770 PyBytesObject *result;
1771
1772 self_s = PyBytes_AS_STRING(self);
1773 self_len = PyBytes_GET_SIZE(self);
1774
1775 count = countchar(self_s, self_len, from_c, maxcount);
1776 if (count == 0) {
1777 /* no matches, return unchanged */
1778 return return_self(self);
1779 }
1780
1781 /* use the difference between current and new, hence the "-1" */
1782 /* result_len = self_len + count * (to_len-1) */
1783 product = count * (to_len-1);
1784 if (product / (to_len-1) != count) {
1785 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1786 return NULL;
1787 }
1788 result_len = self_len + product;
1789 if (result_len < 0) {
1790 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1791 return NULL;
1792 }
1793
1794 if ( (result = (PyBytesObject *)
1795 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1796 return NULL;
1797 result_s = PyBytes_AS_STRING(result);
1798
1799 start = self_s;
1800 end = self_s + self_len;
1801 while (count-- > 0) {
1802 next = findchar(start, end-start, from_c);
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001803 if (next == NULL)
Neal Norwitz6968b052007-02-27 19:02:19 +00001804 break;
1805
1806 if (next == start) {
1807 /* replace with the 'to' */
1808 Py_MEMCPY(result_s, to_s, to_len);
1809 result_s += to_len;
1810 start += 1;
1811 } else {
1812 /* copy the unchanged old then the 'to' */
1813 Py_MEMCPY(result_s, start, next-start);
1814 result_s += (next-start);
1815 Py_MEMCPY(result_s, to_s, to_len);
1816 result_s += to_len;
1817 start = next+1;
1818 }
1819 }
1820 /* Copy the remainder of the remaining bytes */
1821 Py_MEMCPY(result_s, start, end-start);
1822
1823 return result;
1824}
1825
1826/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1827Py_LOCAL(PyBytesObject *)
1828replace_substring(PyBytesObject *self,
1829 const char *from_s, Py_ssize_t from_len,
1830 const char *to_s, Py_ssize_t to_len,
1831 Py_ssize_t maxcount)
1832{
1833 char *self_s, *result_s;
1834 char *start, *next, *end;
1835 Py_ssize_t self_len, result_len;
1836 Py_ssize_t count, offset, product;
1837 PyBytesObject *result;
1838
1839 self_s = PyBytes_AS_STRING(self);
1840 self_len = PyBytes_GET_SIZE(self);
1841
1842 count = countstring(self_s, self_len,
1843 from_s, from_len,
1844 0, self_len, FORWARD, maxcount);
1845 if (count == 0) {
1846 /* no matches, return unchanged */
1847 return return_self(self);
1848 }
1849
1850 /* Check for overflow */
1851 /* result_len = self_len + count * (to_len-from_len) */
1852 product = count * (to_len-from_len);
1853 if (product / (to_len-from_len) != count) {
1854 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1855 return NULL;
1856 }
1857 result_len = self_len + product;
1858 if (result_len < 0) {
1859 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1860 return NULL;
1861 }
1862
1863 if ( (result = (PyBytesObject *)
1864 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1865 return NULL;
1866 result_s = PyBytes_AS_STRING(result);
1867
1868 start = self_s;
1869 end = self_s + self_len;
1870 while (count-- > 0) {
1871 offset = findstring(start, end-start,
1872 from_s, from_len,
1873 0, end-start, FORWARD);
1874 if (offset == -1)
1875 break;
1876 next = start+offset;
1877 if (next == start) {
1878 /* replace with the 'to' */
1879 Py_MEMCPY(result_s, to_s, to_len);
1880 result_s += to_len;
1881 start += from_len;
1882 } else {
1883 /* copy the unchanged old then the 'to' */
1884 Py_MEMCPY(result_s, start, next-start);
1885 result_s += (next-start);
1886 Py_MEMCPY(result_s, to_s, to_len);
1887 result_s += to_len;
1888 start = next+from_len;
1889 }
1890 }
1891 /* Copy the remainder of the remaining bytes */
1892 Py_MEMCPY(result_s, start, end-start);
1893
1894 return result;
1895}
1896
1897
1898Py_LOCAL(PyBytesObject *)
1899replace(PyBytesObject *self,
1900 const char *from_s, Py_ssize_t from_len,
1901 const char *to_s, Py_ssize_t to_len,
1902 Py_ssize_t maxcount)
1903{
1904 if (maxcount < 0) {
1905 maxcount = PY_SSIZE_T_MAX;
1906 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
1907 /* nothing to do; return the original bytes */
1908 return return_self(self);
1909 }
1910
1911 if (maxcount == 0 ||
1912 (from_len == 0 && to_len == 0)) {
1913 /* nothing to do; return the original bytes */
1914 return return_self(self);
1915 }
1916
1917 /* Handle zero-length special cases */
1918
1919 if (from_len == 0) {
1920 /* insert the 'to' bytes everywhere. */
1921 /* >>> "Python".replace("", ".") */
1922 /* '.P.y.t.h.o.n.' */
1923 return replace_interleave(self, to_s, to_len, maxcount);
1924 }
1925
1926 /* Except for "".replace("", "A") == "A" there is no way beyond this */
1927 /* point for an empty self bytes to generate a non-empty bytes */
1928 /* Special case so the remaining code always gets a non-empty bytes */
1929 if (PyBytes_GET_SIZE(self) == 0) {
1930 return return_self(self);
1931 }
1932
1933 if (to_len == 0) {
1934 /* delete all occurances of 'from' bytes */
1935 if (from_len == 1) {
1936 return replace_delete_single_character(
1937 self, from_s[0], maxcount);
1938 } else {
1939 return replace_delete_substring(self, from_s, from_len, maxcount);
1940 }
1941 }
1942
1943 /* Handle special case where both bytes have the same length */
1944
1945 if (from_len == to_len) {
1946 if (from_len == 1) {
1947 return replace_single_character_in_place(
1948 self,
1949 from_s[0],
1950 to_s[0],
1951 maxcount);
1952 } else {
1953 return replace_substring_in_place(
1954 self, from_s, from_len, to_s, to_len, maxcount);
1955 }
1956 }
1957
1958 /* Otherwise use the more generic algorithms */
1959 if (from_len == 1) {
1960 return replace_single_character(self, from_s[0],
1961 to_s, to_len, maxcount);
1962 } else {
1963 /* len('from')>=2, len('to')>=1 */
1964 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
1965 }
1966}
1967
1968PyDoc_STRVAR(replace__doc__,
1969"B.replace (old, new[, count]) -> bytes\n\
1970\n\
1971Return a copy of bytes B with all occurrences of subsection\n\
1972old replaced by new. If the optional argument count is\n\
1973given, only the first count occurrences are replaced.");
1974
1975static PyObject *
1976bytes_replace(PyBytesObject *self, PyObject *args)
1977{
1978 Py_ssize_t count = -1;
1979 PyObject *from, *to;
1980 const char *from_s, *to_s;
1981 Py_ssize_t from_len, to_len;
1982
1983 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
1984 return NULL;
1985
1986 if (PyBytes_Check(from)) {
1987 from_s = PyBytes_AS_STRING(from);
1988 from_len = PyBytes_GET_SIZE(from);
1989 }
1990 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
1991 return NULL;
1992
1993 if (PyBytes_Check(to)) {
1994 to_s = PyBytes_AS_STRING(to);
1995 to_len = PyBytes_GET_SIZE(to);
1996 }
1997 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
1998 return NULL;
1999
2000 return (PyObject *)replace((PyBytesObject *) self,
2001 from_s, from_len,
2002 to_s, to_len, count);
2003}
2004
2005
2006/* Overallocate the initial list to reduce the number of reallocs for small
2007 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2008 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2009 text (roughly 11 words per line) and field delimited data (usually 1-10
2010 fields). For large strings the split algorithms are bandwidth limited
2011 so increasing the preallocation likely will not improve things.*/
2012
2013#define MAX_PREALLOC 12
2014
2015/* 5 splits gives 6 elements */
2016#define PREALLOC_SIZE(maxsplit) \
2017 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2018
2019#define SPLIT_APPEND(data, left, right) \
2020 str = PyBytes_FromStringAndSize((data) + (left), \
2021 (right) - (left)); \
2022 if (str == NULL) \
2023 goto onError; \
2024 if (PyList_Append(list, str)) { \
2025 Py_DECREF(str); \
2026 goto onError; \
2027 } \
2028 else \
2029 Py_DECREF(str);
2030
2031#define SPLIT_ADD(data, left, right) { \
2032 str = PyBytes_FromStringAndSize((data) + (left), \
2033 (right) - (left)); \
2034 if (str == NULL) \
2035 goto onError; \
2036 if (count < MAX_PREALLOC) { \
2037 PyList_SET_ITEM(list, count, str); \
2038 } else { \
2039 if (PyList_Append(list, str)) { \
2040 Py_DECREF(str); \
2041 goto onError; \
2042 } \
2043 else \
2044 Py_DECREF(str); \
2045 } \
2046 count++; }
2047
2048/* Always force the list to the expected size. */
2049#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
2050
2051
2052Py_LOCAL_INLINE(PyObject *)
2053split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2054{
2055 register Py_ssize_t i, j, count=0;
2056 PyObject *str;
2057 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2058
2059 if (list == NULL)
2060 return NULL;
2061
2062 i = j = 0;
2063 while ((j < len) && (maxcount-- > 0)) {
2064 for(; j<len; j++) {
2065 /* I found that using memchr makes no difference */
2066 if (s[j] == ch) {
2067 SPLIT_ADD(s, i, j);
2068 i = j = j + 1;
2069 break;
2070 }
2071 }
2072 }
2073 if (i <= len) {
2074 SPLIT_ADD(s, i, len);
2075 }
2076 FIX_PREALLOC_SIZE(list);
2077 return list;
2078
2079 onError:
2080 Py_DECREF(list);
2081 return NULL;
2082}
2083
2084PyDoc_STRVAR(split__doc__,
2085"B.split(sep [,maxsplit]) -> list of bytes\n\
2086\n\
2087Return a list of the bytes in the string B, using sep as the\n\
2088delimiter. If maxsplit is given, at most maxsplit\n\
2089splits are done.");
2090
2091static PyObject *
2092bytes_split(PyBytesObject *self, PyObject *args)
2093{
2094 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2095 Py_ssize_t maxsplit = -1, count=0;
2096 const char *s = PyBytes_AS_STRING(self), *sub;
2097 PyObject *list, *str, *subobj;
2098#ifdef USE_FAST
2099 Py_ssize_t pos;
2100#endif
2101
2102 if (!PyArg_ParseTuple(args, "O|n:split", &subobj, &maxsplit))
2103 return NULL;
2104 if (maxsplit < 0)
2105 maxsplit = PY_SSIZE_T_MAX;
2106 if (PyBytes_Check(subobj)) {
2107 sub = PyBytes_AS_STRING(subobj);
2108 n = PyBytes_GET_SIZE(subobj);
2109 }
2110 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2111 return NULL;
2112
2113 if (n == 0) {
2114 PyErr_SetString(PyExc_ValueError, "empty separator");
2115 return NULL;
2116 }
2117 else if (n == 1)
2118 return split_char(s, len, sub[0], maxsplit);
2119
2120 list = PyList_New(PREALLOC_SIZE(maxsplit));
2121 if (list == NULL)
2122 return NULL;
2123
2124#ifdef USE_FAST
2125 i = j = 0;
2126 while (maxsplit-- > 0) {
2127 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2128 if (pos < 0)
2129 break;
2130 j = i+pos;
2131 SPLIT_ADD(s, i, j);
2132 i = j + n;
2133 }
2134#else
2135 i = j = 0;
2136 while ((j+n <= len) && (maxsplit-- > 0)) {
2137 for (; j+n <= len; j++) {
2138 if (Py_STRING_MATCH(s, j, sub, n)) {
2139 SPLIT_ADD(s, i, j);
2140 i = j = j + n;
2141 break;
2142 }
2143 }
2144 }
2145#endif
2146 SPLIT_ADD(s, i, len);
2147 FIX_PREALLOC_SIZE(list);
2148 return list;
2149
2150 onError:
2151 Py_DECREF(list);
2152 return NULL;
2153}
2154
2155PyDoc_STRVAR(partition__doc__,
2156"B.partition(sep) -> (head, sep, tail)\n\
2157\n\
2158Searches for the separator sep in B, and returns the part before it,\n\
2159the separator itself, and the part after it. If the separator is not\n\
2160found, returns B and two empty bytes.");
2161
2162static PyObject *
2163bytes_partition(PyBytesObject *self, PyObject *sep_obj)
2164{
2165 PyObject *bytesep, *result;
2166
2167 bytesep = PyBytes_FromObject(sep_obj);
2168 if (! bytesep)
2169 return NULL;
2170
2171 result = stringlib_partition(
2172 (PyObject*) self,
2173 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002174 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002175 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2176 );
2177
2178 Py_DECREF(bytesep);
2179 return result;
2180}
2181
2182PyDoc_STRVAR(rpartition__doc__,
2183"B.rpartition(sep) -> (tail, sep, head)\n\
2184\n\
2185Searches for the separator sep in B, starting at the end of B, and returns\n\
2186the part before it, the separator itself, and the part after it. If the\n\
2187separator is not found, returns two empty bytes and B.");
2188
2189static PyObject *
2190bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
2191{
2192 PyObject *bytesep, *result;
2193
2194 bytesep = PyBytes_FromObject(sep_obj);
2195 if (! bytesep)
2196 return NULL;
2197
2198 result = stringlib_rpartition(
2199 (PyObject*) self,
2200 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002201 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002202 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2203 );
2204
2205 Py_DECREF(bytesep);
2206 return result;
2207}
2208
2209Py_LOCAL_INLINE(PyObject *)
2210rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2211{
2212 register Py_ssize_t i, j, count=0;
2213 PyObject *str;
2214 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2215
2216 if (list == NULL)
2217 return NULL;
2218
2219 i = j = len - 1;
2220 while ((i >= 0) && (maxcount-- > 0)) {
2221 for (; i >= 0; i--) {
2222 if (s[i] == ch) {
2223 SPLIT_ADD(s, i + 1, j + 1);
2224 j = i = i - 1;
2225 break;
2226 }
2227 }
2228 }
2229 if (j >= -1) {
2230 SPLIT_ADD(s, 0, j + 1);
2231 }
2232 FIX_PREALLOC_SIZE(list);
2233 if (PyList_Reverse(list) < 0)
2234 goto onError;
2235
2236 return list;
2237
2238 onError:
2239 Py_DECREF(list);
2240 return NULL;
2241}
2242
2243PyDoc_STRVAR(rsplit__doc__,
2244"B.rsplit(sep [,maxsplit]) -> list of bytes\n\
2245\n\
2246Return a list of the sections in the byte B, using sep as the\n\
2247delimiter, starting at the end of the bytes and working\n\
2248to the front. If maxsplit is given, at most maxsplit splits are\n\
2249done.");
2250
2251static PyObject *
2252bytes_rsplit(PyBytesObject *self, PyObject *args)
2253{
2254 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2255 Py_ssize_t maxsplit = -1, count=0;
2256 const char *s = PyBytes_AS_STRING(self), *sub;
2257 PyObject *list, *str, *subobj;
2258
2259 if (!PyArg_ParseTuple(args, "O|n:rsplit", &subobj, &maxsplit))
2260 return NULL;
2261 if (maxsplit < 0)
2262 maxsplit = PY_SSIZE_T_MAX;
2263 if (PyBytes_Check(subobj)) {
2264 sub = PyBytes_AS_STRING(subobj);
2265 n = PyBytes_GET_SIZE(subobj);
2266 }
2267 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2268 return NULL;
2269
2270 if (n == 0) {
2271 PyErr_SetString(PyExc_ValueError, "empty separator");
2272 return NULL;
2273 }
2274 else if (n == 1)
2275 return rsplit_char(s, len, sub[0], maxsplit);
2276
2277 list = PyList_New(PREALLOC_SIZE(maxsplit));
2278 if (list == NULL)
2279 return NULL;
2280
2281 j = len;
2282 i = j - n;
2283
2284 while ( (i >= 0) && (maxsplit-- > 0) ) {
2285 for (; i>=0; i--) {
2286 if (Py_STRING_MATCH(s, i, sub, n)) {
2287 SPLIT_ADD(s, i + n, j);
2288 j = i;
2289 i -= n;
2290 break;
2291 }
2292 }
2293 }
2294 SPLIT_ADD(s, 0, j);
2295 FIX_PREALLOC_SIZE(list);
2296 if (PyList_Reverse(list) < 0)
2297 goto onError;
2298 return list;
2299
2300onError:
2301 Py_DECREF(list);
2302 return NULL;
2303}
2304
2305PyDoc_STRVAR(extend__doc__,
2306"B.extend(iterable int) -> None\n\
2307\n\
2308Append all the elements from the iterator or sequence to the\n\
2309end of the bytes.");
2310static PyObject *
2311bytes_extend(PyBytesObject *self, PyObject *arg)
2312{
2313 if (bytes_setslice(self, self->ob_size, self->ob_size, arg) == -1)
2314 return NULL;
2315 Py_RETURN_NONE;
2316}
2317
2318
2319PyDoc_STRVAR(reverse__doc__,
2320"B.reverse() -> None\n\
2321\n\
2322Reverse the order of the values in bytes in place.");
2323static PyObject *
2324bytes_reverse(PyBytesObject *self, PyObject *unused)
2325{
2326 char swap, *head, *tail;
2327 Py_ssize_t i, j, n = self->ob_size;
2328
2329 j = n / 2;
2330 head = self->ob_bytes;
2331 tail = head + n - 1;
2332 for (i = 0; i < j; i++) {
2333 swap = *head;
2334 *head++ = *tail;
2335 *tail-- = swap;
2336 }
2337
2338 Py_RETURN_NONE;
2339}
2340
2341PyDoc_STRVAR(insert__doc__,
2342"B.insert(index, int) -> None\n\
2343\n\
2344Insert a single item into the bytes before the given index.");
2345static PyObject *
2346bytes_insert(PyBytesObject *self, PyObject *args)
2347{
2348 int value;
2349 Py_ssize_t where, n = self->ob_size;
2350
2351 if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
2352 return NULL;
2353
2354 if (n == PY_SSIZE_T_MAX) {
2355 PyErr_SetString(PyExc_OverflowError,
2356 "cannot add more objects to bytes");
2357 return NULL;
2358 }
2359 if (value < 0 || value >= 256) {
2360 PyErr_SetString(PyExc_ValueError,
2361 "byte must be in range(0, 256)");
2362 return NULL;
2363 }
2364 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2365 return NULL;
2366
2367 if (where < 0) {
2368 where += n;
2369 if (where < 0)
2370 where = 0;
2371 }
2372 if (where > n)
2373 where = n;
Guido van Rossum4fc8ae42007-02-27 20:57:45 +00002374 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
Neal Norwitz6968b052007-02-27 19:02:19 +00002375 self->ob_bytes[where] = value;
2376
2377 Py_RETURN_NONE;
2378}
2379
2380PyDoc_STRVAR(append__doc__,
2381"B.append(int) -> None\n\
2382\n\
2383Append a single item to the end of the bytes.");
2384static PyObject *
2385bytes_append(PyBytesObject *self, PyObject *arg)
2386{
2387 int value;
2388 Py_ssize_t n = self->ob_size;
2389
2390 if (! _getbytevalue(arg, &value))
2391 return NULL;
2392 if (n == PY_SSIZE_T_MAX) {
2393 PyErr_SetString(PyExc_OverflowError,
2394 "cannot add more objects to bytes");
2395 return NULL;
2396 }
2397 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2398 return NULL;
2399
2400 self->ob_bytes[n] = value;
2401
2402 Py_RETURN_NONE;
2403}
2404
2405PyDoc_STRVAR(pop__doc__,
2406"B.pop([index]) -> int\n\
2407\n\
2408Remove and return a single item from the bytes. If no index\n\
2409argument is give, will pop the last value.");
2410static PyObject *
2411bytes_pop(PyBytesObject *self, PyObject *args)
2412{
2413 int value;
2414 Py_ssize_t where = -1, n = self->ob_size;
2415
2416 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2417 return NULL;
2418
2419 if (n == 0) {
2420 PyErr_SetString(PyExc_OverflowError,
2421 "cannot pop an empty bytes");
2422 return NULL;
2423 }
2424 if (where < 0)
2425 where += self->ob_size;
2426 if (where < 0 || where >= self->ob_size) {
2427 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2428 return NULL;
2429 }
2430
2431 value = self->ob_bytes[where];
2432 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2433 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2434 return NULL;
2435
2436 return PyInt_FromLong(value);
2437}
2438
2439PyDoc_STRVAR(remove__doc__,
2440"B.remove(int) -> None\n\
2441\n\
2442Remove the first occurance of a value in bytes");
2443static PyObject *
2444bytes_remove(PyBytesObject *self, PyObject *arg)
2445{
2446 int value;
2447 Py_ssize_t where, n = self->ob_size;
2448
2449 if (! _getbytevalue(arg, &value))
2450 return NULL;
2451
2452 for (where = 0; where < n; where++) {
2453 if (self->ob_bytes[where] == value)
2454 break;
2455 }
2456 if (where == n) {
2457 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2458 return NULL;
2459 }
2460
2461 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2462 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2463 return NULL;
2464
2465 Py_RETURN_NONE;
2466}
2467
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002468/* XXX These two helpers could be optimized if argsize == 1 */
2469
2470Py_ssize_t
2471lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2472 void *argptr, Py_ssize_t argsize)
2473{
2474 Py_ssize_t i = 0;
2475 while (i < mysize && memchr(argptr, myptr[i], argsize))
2476 i++;
2477 return i;
2478}
2479
2480Py_ssize_t
2481rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2482 void *argptr, Py_ssize_t argsize)
2483{
2484 Py_ssize_t i = mysize - 1;
2485 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2486 i--;
2487 return i + 1;
2488}
2489
2490PyDoc_STRVAR(strip__doc__,
2491"B.strip(bytes) -> bytes\n\
2492\n\
2493Strip leading and trailing bytes contained in the argument.");
2494static PyObject *
2495bytes_strip(PyBytesObject *self, PyObject *arg)
2496{
2497 Py_ssize_t left, right, mysize, argsize;
2498 void *myptr, *argptr;
2499 if (arg == NULL || !PyBytes_Check(arg)) {
2500 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2501 return NULL;
2502 }
2503 myptr = self->ob_bytes;
2504 mysize = self->ob_size;
2505 argptr = ((PyBytesObject *)arg)->ob_bytes;
2506 argsize = ((PyBytesObject *)arg)->ob_size;
2507 left = lstrip_helper(myptr, mysize, argptr, argsize);
2508 right = rstrip_helper(myptr, mysize, argptr, argsize);
2509 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2510}
2511
2512PyDoc_STRVAR(lstrip__doc__,
2513"B.lstrip(bytes) -> bytes\n\
2514\n\
2515Strip leading bytes contained in the argument.");
2516static PyObject *
2517bytes_lstrip(PyBytesObject *self, PyObject *arg)
2518{
2519 Py_ssize_t left, right, mysize, argsize;
2520 void *myptr, *argptr;
2521 if (arg == NULL || !PyBytes_Check(arg)) {
2522 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2523 return NULL;
2524 }
2525 myptr = self->ob_bytes;
2526 mysize = self->ob_size;
2527 argptr = ((PyBytesObject *)arg)->ob_bytes;
2528 argsize = ((PyBytesObject *)arg)->ob_size;
2529 left = lstrip_helper(myptr, mysize, argptr, argsize);
2530 right = mysize;
2531 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2532}
2533
2534PyDoc_STRVAR(rstrip__doc__,
2535"B.rstrip(bytes) -> bytes\n\
2536\n\
2537Strip trailing bytes contained in the argument.");
2538static PyObject *
2539bytes_rstrip(PyBytesObject *self, PyObject *arg)
2540{
2541 Py_ssize_t left, right, mysize, argsize;
2542 void *myptr, *argptr;
2543 if (arg == NULL || !PyBytes_Check(arg)) {
2544 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2545 return NULL;
2546 }
2547 myptr = self->ob_bytes;
2548 mysize = self->ob_size;
2549 argptr = ((PyBytesObject *)arg)->ob_bytes;
2550 argsize = ((PyBytesObject *)arg)->ob_size;
2551 left = 0;
2552 right = rstrip_helper(myptr, mysize, argptr, argsize);
2553 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2554}
Neal Norwitz6968b052007-02-27 19:02:19 +00002555
Guido van Rossumd624f182006-04-24 13:47:05 +00002556PyDoc_STRVAR(decode_doc,
2557"B.decode([encoding[,errors]]) -> unicode obect.\n\
2558\n\
2559Decodes B using the codec registered for encoding. encoding defaults\n\
2560to the default encoding. errors may be given to set a different error\n\
2561handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2562a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2563as well as any other name registerd with codecs.register_error that is\n\
2564able to handle UnicodeDecodeErrors.");
2565
2566static PyObject *
2567bytes_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002568{
Guido van Rossumd624f182006-04-24 13:47:05 +00002569 const char *encoding = NULL;
2570 const char *errors = NULL;
2571
2572 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2573 return NULL;
2574 if (encoding == NULL)
2575 encoding = PyUnicode_GetDefaultEncoding();
2576 return PyCodec_Decode(self, encoding, errors);
2577}
2578
Guido van Rossuma0867f72006-05-05 04:34:18 +00002579PyDoc_STRVAR(alloc_doc,
2580"B.__alloc__() -> int\n\
2581\n\
2582Returns the number of bytes actually allocated.");
2583
2584static PyObject *
2585bytes_alloc(PyBytesObject *self)
2586{
2587 return PyInt_FromSsize_t(self->ob_alloc);
2588}
2589
Guido van Rossum20188312006-05-05 15:15:40 +00002590PyDoc_STRVAR(join_doc,
2591"bytes.join(iterable_of_bytes) -> bytes\n\
2592\n\
2593Concatenates any number of bytes objects. Example:\n\
2594bytes.join([bytes('ab'), bytes('pq'), bytes('rs')]) -> bytes('abpqrs').");
2595
2596static PyObject *
2597bytes_join(PyObject *cls, PyObject *it)
2598{
2599 PyObject *seq;
2600 Py_ssize_t i;
2601 Py_ssize_t n;
2602 PyObject **items;
2603 Py_ssize_t totalsize = 0;
2604 PyObject *result;
2605 char *dest;
2606
2607 seq = PySequence_Fast(it, "can only join an iterable");
2608 if (seq == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002609 return NULL;
Guido van Rossum20188312006-05-05 15:15:40 +00002610 n = PySequence_Fast_GET_SIZE(seq);
2611 items = PySequence_Fast_ITEMS(seq);
2612
2613 /* Compute the total size, and check that they are all bytes */
2614 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002615 PyObject *obj = items[i];
2616 if (!PyBytes_Check(obj)) {
2617 PyErr_Format(PyExc_TypeError,
2618 "can only join an iterable of bytes "
2619 "(item %ld has type '%.100s')",
Guido van Rossum3cf5b1e2006-07-27 21:53:35 +00002620 /* XXX %ld isn't right on Win64 */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002621 (long)i, obj->ob_type->tp_name);
2622 goto error;
2623 }
2624 totalsize += PyBytes_GET_SIZE(obj);
2625 if (totalsize < 0) {
2626 PyErr_NoMemory();
2627 goto error;
2628 }
Guido van Rossum20188312006-05-05 15:15:40 +00002629 }
2630
2631 /* Allocate the result, and copy the bytes */
2632 result = PyBytes_FromStringAndSize(NULL, totalsize);
2633 if (result == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002634 goto error;
Guido van Rossum20188312006-05-05 15:15:40 +00002635 dest = PyBytes_AS_STRING(result);
2636 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002637 PyObject *obj = items[i];
2638 Py_ssize_t size = PyBytes_GET_SIZE(obj);
2639 memcpy(dest, PyBytes_AS_STRING(obj), size);
2640 dest += size;
Guido van Rossum20188312006-05-05 15:15:40 +00002641 }
2642
2643 /* Done */
2644 Py_DECREF(seq);
2645 return result;
2646
2647 /* Error handling */
2648 error:
2649 Py_DECREF(seq);
2650 return NULL;
2651}
2652
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002653PyDoc_STRVAR(fromhex_doc,
2654"bytes.fromhex(string) -> bytes\n\
2655\n\
2656Create a bytes object from a string of hexadecimal numbers.\n\
2657Spaces between two numbers are accepted. Example:\n\
2658bytes.fromhex('10 2030') -> bytes([0x10, 0x20, 0x30]).");
2659
2660static int
2661hex_digit_to_int(int c)
2662{
Georg Brandlb3f568f2007-02-27 08:49:18 +00002663 if (isdigit(c))
2664 return c - '0';
2665 else {
2666 if (isupper(c))
2667 c = tolower(c);
2668 if (c >= 'a' && c <= 'f')
2669 return c - 'a' + 10;
2670 }
2671 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002672}
2673
2674static PyObject *
2675bytes_fromhex(PyObject *cls, PyObject *args)
2676{
2677 PyObject *newbytes;
2678 char *hex, *buf;
2679 Py_ssize_t len, byteslen, i, j;
2680 int top, bot;
2681
2682 if (!PyArg_ParseTuple(args, "s#:fromhex", &hex, &len))
2683 return NULL;
2684
2685 byteslen = len / 2; /* max length if there are no spaces */
2686
2687 newbytes = PyBytes_FromStringAndSize(NULL, byteslen);
2688 if (!newbytes)
2689 return NULL;
2690 buf = PyBytes_AS_STRING(newbytes);
2691
2692 for (i = j = 0; ; i += 2) {
2693 /* skip over spaces in the input */
2694 while (Py_CHARMASK(hex[i]) == ' ')
2695 i++;
2696 if (i >= len)
2697 break;
2698 top = hex_digit_to_int(Py_CHARMASK(hex[i]));
2699 bot = hex_digit_to_int(Py_CHARMASK(hex[i+1]));
2700 if (top == -1 || bot == -1) {
2701 PyErr_Format(PyExc_ValueError,
2702 "non-hexadecimal number string '%c%c' found in "
2703 "fromhex() arg at position %zd",
2704 hex[i], hex[i+1], i);
2705 goto error;
2706 }
2707 buf[j++] = (top << 4) + bot;
2708 }
2709 if (PyBytes_Resize(newbytes, j) < 0)
2710 goto error;
2711 return newbytes;
2712
2713 error:
2714 Py_DECREF(newbytes);
2715 return NULL;
2716}
2717
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002718PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2719
2720static PyObject *
2721bytes_reduce(PyBytesObject *self)
2722{
2723 return Py_BuildValue("(O(s#))",
2724 self->ob_type,
2725 self->ob_bytes == NULL ? "" : self->ob_bytes,
2726 self->ob_size);
2727}
2728
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002729static PySequenceMethods bytes_as_sequence = {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002730 (lenfunc)bytes_length, /* sq_length */
2731 (binaryfunc)bytes_concat, /* sq_concat */
2732 (ssizeargfunc)bytes_repeat, /* sq_repeat */
2733 (ssizeargfunc)bytes_getitem, /* sq_item */
2734 0, /* sq_slice */
2735 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
2736 0, /* sq_ass_slice */
Guido van Rossumd624f182006-04-24 13:47:05 +00002737 (objobjproc)bytes_contains, /* sq_contains */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002738 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
2739 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002740};
2741
2742static PyMappingMethods bytes_as_mapping = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002743 (lenfunc)bytes_length,
Thomas Wouters376446d2006-12-19 08:30:14 +00002744 (binaryfunc)bytes_subscript,
2745 (objobjargproc)bytes_ass_subscript,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002746};
2747
2748static PyBufferProcs bytes_as_buffer = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002749 (readbufferproc)bytes_getbuffer,
2750 (writebufferproc)bytes_getbuffer,
2751 (segcountproc)bytes_getsegcount,
2752 /* XXX Bytes are not characters! But we need to implement
2753 bf_getcharbuffer() so we can be used as 't#' argument to codecs. */
2754 (charbufferproc)bytes_getbuffer,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002755};
2756
2757static PyMethodDef
2758bytes_methods[] = {
Neal Norwitz6968b052007-02-27 19:02:19 +00002759 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2760 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2761 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2762 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2763 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2764 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
2765 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2766 startswith__doc__},
2767 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2768 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, translate__doc__},
2769 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2770 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
2771 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2772 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2773 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
2774 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
2775 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
2776 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
2777 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
2778 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002779 {"strip", (PyCFunction)bytes_strip, METH_O, strip__doc__},
2780 {"lstrip", (PyCFunction)bytes_lstrip, METH_O, lstrip__doc__},
2781 {"rstrip", (PyCFunction)bytes_rstrip, METH_O, rstrip__doc__},
Guido van Rossumd624f182006-04-24 13:47:05 +00002782 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002783 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002784 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2785 fromhex_doc},
Guido van Rossum20188312006-05-05 15:15:40 +00002786 {"join", (PyCFunction)bytes_join, METH_O|METH_CLASS, join_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002787 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002788 {NULL}
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002789};
2790
2791PyDoc_STRVAR(bytes_doc,
2792"bytes([iterable]) -> new array of bytes.\n\
2793\n\
2794If an argument is given it must be an iterable yielding ints in range(256).");
2795
2796PyTypeObject PyBytes_Type = {
2797 PyObject_HEAD_INIT(&PyType_Type)
2798 0,
2799 "bytes",
2800 sizeof(PyBytesObject),
2801 0,
Guido van Rossumd624f182006-04-24 13:47:05 +00002802 (destructor)bytes_dealloc, /* tp_dealloc */
2803 0, /* tp_print */
2804 0, /* tp_getattr */
2805 0, /* tp_setattr */
2806 0, /* tp_compare */
2807 (reprfunc)bytes_repr, /* tp_repr */
2808 0, /* tp_as_number */
2809 &bytes_as_sequence, /* tp_as_sequence */
2810 &bytes_as_mapping, /* tp_as_mapping */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002811 0, /* tp_hash */
Guido van Rossumd624f182006-04-24 13:47:05 +00002812 0, /* tp_call */
2813 (reprfunc)bytes_str, /* tp_str */
2814 PyObject_GenericGetAttr, /* tp_getattro */
2815 0, /* tp_setattro */
2816 &bytes_as_buffer, /* tp_as_buffer */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002817 /* bytes is 'final' or 'sealed' */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002818 Py_TPFLAGS_DEFAULT, /* tp_flags */
Guido van Rossumd624f182006-04-24 13:47:05 +00002819 bytes_doc, /* tp_doc */
2820 0, /* tp_traverse */
2821 0, /* tp_clear */
2822 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2823 0, /* tp_weaklistoffset */
2824 0, /* tp_iter */
2825 0, /* tp_iternext */
2826 bytes_methods, /* tp_methods */
2827 0, /* tp_members */
2828 0, /* tp_getset */
2829 0, /* tp_base */
2830 0, /* tp_dict */
2831 0, /* tp_descr_get */
2832 0, /* tp_descr_set */
2833 0, /* tp_dictoffset */
2834 (initproc)bytes_init, /* tp_init */
2835 PyType_GenericAlloc, /* tp_alloc */
2836 PyType_GenericNew, /* tp_new */
2837 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002838};