blob: be3da7a62c833dd3b261340edda225fa72ec2fa5 [file] [log] [blame]
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001/* Bytes object implementation */
2
3/* XXX TO DO: optimizations */
4
5#define PY_SSIZE_T_CLEAN
6#include "Python.h"
Guido van Rossuma0867f72006-05-05 04:34:18 +00007#include "structmember.h"
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00008
Neal Norwitz6968b052007-02-27 19:02:19 +00009/* The nullbytes are used by the stringlib during partition.
10 * If partition is removed from bytes, nullbytes and its helper
11 * Init/Fini should also be removed.
12 */
13static PyBytesObject *nullbytes = NULL;
14
15void
16PyBytes_Fini(void)
17{
18 Py_CLEAR(nullbytes);
19}
20
21int
22PyBytes_Init(void)
23{
24 nullbytes = PyObject_New(PyBytesObject, &PyBytes_Type);
25 if (nullbytes == NULL)
26 return 0;
27 nullbytes->ob_bytes = NULL;
28 nullbytes->ob_size = nullbytes->ob_alloc = 0;
29 return 1;
30}
31
32/* end nullbytes support */
33
34static int _getbytevalue(PyObject* arg, int *value)
35{
36 PyObject *intarg = PyNumber_Int(arg);
37 if (! intarg)
38 return 0;
39 *value = PyInt_AsLong(intarg);
40 Py_DECREF(intarg);
41 if (*value < 0 || *value >= 256) {
42 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
43 return 0;
44 }
45 return 1;
46}
47
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000048/* Direct API functions */
49
50PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +000051PyBytes_FromObject(PyObject *input)
52{
53 return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
54 input, NULL);
55}
56
57PyObject *
58PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000059{
60 PyBytesObject *new;
61
Guido van Rossumd624f182006-04-24 13:47:05 +000062 assert(size >= 0);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000063
64 new = PyObject_New(PyBytesObject, &PyBytes_Type);
65 if (new == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +000066 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000067
Guido van Rossumd624f182006-04-24 13:47:05 +000068 if (size == 0)
69 new->ob_bytes = NULL;
70 else {
71 new->ob_bytes = PyMem_Malloc(size);
72 if (new->ob_bytes == NULL) {
73 Py_DECREF(new);
74 return NULL;
75 }
76 if (bytes != NULL)
77 memcpy(new->ob_bytes, bytes, size);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000078 }
Guido van Rossuma0867f72006-05-05 04:34:18 +000079 new->ob_size = new->ob_alloc = size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000080
81 return (PyObject *)new;
82}
83
84Py_ssize_t
85PyBytes_Size(PyObject *self)
86{
87 assert(self != NULL);
88 assert(PyBytes_Check(self));
89
Guido van Rossum20188312006-05-05 15:15:40 +000090 return PyBytes_GET_SIZE(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000091}
92
93char *
94PyBytes_AsString(PyObject *self)
95{
96 assert(self != NULL);
97 assert(PyBytes_Check(self));
98
Guido van Rossum20188312006-05-05 15:15:40 +000099 return PyBytes_AS_STRING(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000100}
101
102int
103PyBytes_Resize(PyObject *self, Py_ssize_t size)
104{
105 void *sval;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000106 Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000107
108 assert(self != NULL);
109 assert(PyBytes_Check(self));
110 assert(size >= 0);
111
Guido van Rossuma0867f72006-05-05 04:34:18 +0000112 if (size < alloc / 2) {
113 /* Major downsize; resize down to exact size */
114 alloc = size;
115 }
116 else if (size <= alloc) {
117 /* Within allocated size; quick exit */
118 ((PyBytesObject *)self)->ob_size = size;
119 return 0;
120 }
121 else if (size <= alloc * 1.125) {
122 /* Moderate upsize; overallocate similar to list_resize() */
123 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
124 }
125 else {
126 /* Major upsize; resize up to exact size */
127 alloc = size;
128 }
129
130 sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000131 if (sval == NULL) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000132 PyErr_NoMemory();
133 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000134 }
135
Guido van Rossumd624f182006-04-24 13:47:05 +0000136 ((PyBytesObject *)self)->ob_bytes = sval;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000137 ((PyBytesObject *)self)->ob_size = size;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000138 ((PyBytesObject *)self)->ob_alloc = alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000139
140 return 0;
141}
142
143/* Functions stuffed into the type object */
144
145static Py_ssize_t
146bytes_length(PyBytesObject *self)
147{
148 return self->ob_size;
149}
150
151static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000152bytes_concat(PyBytesObject *self, PyObject *other)
153{
154 PyBytesObject *result;
155 Py_ssize_t mysize;
156 Py_ssize_t size;
157
158 if (!PyBytes_Check(other)) {
159 PyErr_Format(PyExc_TypeError,
160 "can't concat bytes to %.100s", other->ob_type->tp_name);
161 return NULL;
162 }
163
164 mysize = self->ob_size;
165 size = mysize + ((PyBytesObject *)other)->ob_size;
166 if (size < 0)
167 return PyErr_NoMemory();
168 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
169 if (result != NULL) {
170 memcpy(result->ob_bytes, self->ob_bytes, self->ob_size);
171 memcpy(result->ob_bytes + self->ob_size,
172 ((PyBytesObject *)other)->ob_bytes,
173 ((PyBytesObject *)other)->ob_size);
174 }
175 return (PyObject *)result;
176}
177
178static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000179bytes_iconcat(PyBytesObject *self, PyObject *other)
180{
181 Py_ssize_t mysize;
182 Py_ssize_t osize;
183 Py_ssize_t size;
184
185 if (!PyBytes_Check(other)) {
186 PyErr_Format(PyExc_TypeError,
187 "can't concat bytes to %.100s", other->ob_type->tp_name);
188 return NULL;
189 }
190
191 mysize = self->ob_size;
192 osize = ((PyBytesObject *)other)->ob_size;
193 size = mysize + osize;
194 if (size < 0)
195 return PyErr_NoMemory();
Guido van Rossuma0867f72006-05-05 04:34:18 +0000196 if (size <= self->ob_alloc)
197 self->ob_size = size;
198 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000199 return NULL;
200 memcpy(self->ob_bytes + mysize, ((PyBytesObject *)other)->ob_bytes, osize);
201 Py_INCREF(self);
202 return (PyObject *)self;
203}
204
205static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000206bytes_repeat(PyBytesObject *self, Py_ssize_t count)
207{
208 PyBytesObject *result;
209 Py_ssize_t mysize;
210 Py_ssize_t size;
211
212 if (count < 0)
213 count = 0;
214 mysize = self->ob_size;
215 size = mysize * count;
216 if (count != 0 && size / count != mysize)
217 return PyErr_NoMemory();
218 result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
219 if (result != NULL && size != 0) {
220 if (mysize == 1)
221 memset(result->ob_bytes, self->ob_bytes[0], size);
222 else {
Guido van Rossum13e57212006-04-27 22:54:26 +0000223 Py_ssize_t i;
Guido van Rossumd624f182006-04-24 13:47:05 +0000224 for (i = 0; i < count; i++)
225 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
226 }
227 }
228 return (PyObject *)result;
229}
230
231static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000232bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
233{
234 Py_ssize_t mysize;
235 Py_ssize_t size;
236
237 if (count < 0)
238 count = 0;
239 mysize = self->ob_size;
240 size = mysize * count;
241 if (count != 0 && size / count != mysize)
242 return PyErr_NoMemory();
Guido van Rossuma0867f72006-05-05 04:34:18 +0000243 if (size <= self->ob_alloc)
244 self->ob_size = size;
245 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000246 return NULL;
247
248 if (mysize == 1)
249 memset(self->ob_bytes, self->ob_bytes[0], size);
250 else {
251 Py_ssize_t i;
252 for (i = 1; i < count; i++)
253 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
254 }
255
256 Py_INCREF(self);
257 return (PyObject *)self;
258}
259
260static int
261bytes_substring(PyBytesObject *self, PyBytesObject *other)
262{
263 Py_ssize_t i;
264
265 if (other->ob_size == 1) {
266 return memchr(self->ob_bytes, other->ob_bytes[0],
267 self->ob_size) != NULL;
268 }
269 if (other->ob_size == 0)
270 return 1; /* Edge case */
271 for (i = 0; i + other->ob_size <= self->ob_size; i++) {
272 /* XXX Yeah, yeah, lots of optimizations possible... */
273 if (memcmp(self->ob_bytes + i, other->ob_bytes, other->ob_size) == 0)
274 return 1;
275 }
276 return 0;
277}
278
279static int
280bytes_contains(PyBytesObject *self, PyObject *value)
281{
282 Py_ssize_t ival;
283
284 if (PyBytes_Check(value))
285 return bytes_substring(self, (PyBytesObject *)value);
286
Thomas Woutersd204a712006-08-22 13:41:17 +0000287 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossum13e57212006-04-27 22:54:26 +0000288 if (ival == -1 && PyErr_Occurred())
289 return -1;
Guido van Rossum13e57212006-04-27 22:54:26 +0000290 if (ival < 0 || ival >= 256) {
291 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
292 return -1;
293 }
294
295 return memchr(self->ob_bytes, ival, self->ob_size) != NULL;
296}
297
298static PyObject *
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000299bytes_getitem(PyBytesObject *self, Py_ssize_t i)
300{
301 if (i < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000302 i += self->ob_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000303 if (i < 0 || i >= self->ob_size) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000304 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
305 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000306 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000307 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
308}
309
310static PyObject *
Thomas Wouters376446d2006-12-19 08:30:14 +0000311bytes_subscript(PyBytesObject *self, PyObject *item)
Guido van Rossumd624f182006-04-24 13:47:05 +0000312{
Thomas Wouters376446d2006-12-19 08:30:14 +0000313 if (PyIndex_Check(item)) {
314 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000315
Thomas Wouters376446d2006-12-19 08:30:14 +0000316 if (i == -1 && PyErr_Occurred())
317 return NULL;
318
319 if (i < 0)
320 i += PyBytes_GET_SIZE(self);
321
322 if (i < 0 || i >= self->ob_size) {
323 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
324 return NULL;
325 }
326 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
327 }
328 else if (PySlice_Check(item)) {
329 Py_ssize_t start, stop, step, slicelength, cur, i;
330 if (PySlice_GetIndicesEx((PySliceObject *)item,
331 PyBytes_GET_SIZE(self),
332 &start, &stop, &step, &slicelength) < 0) {
333 return NULL;
334 }
335
336 if (slicelength <= 0)
337 return PyBytes_FromStringAndSize("", 0);
338 else if (step == 1) {
339 return PyBytes_FromStringAndSize(self->ob_bytes + start,
340 slicelength);
341 }
342 else {
343 char *source_buf = PyBytes_AS_STRING(self);
344 char *result_buf = (char *)PyMem_Malloc(slicelength);
345 PyObject *result;
346
347 if (result_buf == NULL)
348 return PyErr_NoMemory();
349
350 for (cur = start, i = 0; i < slicelength;
351 cur += step, i++) {
352 result_buf[i] = source_buf[cur];
353 }
354 result = PyBytes_FromStringAndSize(result_buf, slicelength);
355 PyMem_Free(result_buf);
356 return result;
357 }
358 }
359 else {
360 PyErr_SetString(PyExc_TypeError, "bytes indices must be integers");
361 return NULL;
362 }
363}
364
Guido van Rossumd624f182006-04-24 13:47:05 +0000365static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000366bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
Guido van Rossumd624f182006-04-24 13:47:05 +0000367 PyObject *values)
368{
369 int avail;
370 int needed;
371 char *bytes;
372
373 if (values == NULL) {
374 bytes = NULL;
375 needed = 0;
376 }
377 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
378 /* Make a copy an call this function recursively */
379 int err;
380 values = PyBytes_FromObject(values);
381 if (values == NULL)
382 return -1;
383 err = bytes_setslice(self, lo, hi, values);
384 Py_DECREF(values);
385 return err;
386 }
387 else {
388 assert(PyBytes_Check(values));
389 bytes = ((PyBytesObject *)values)->ob_bytes;
390 needed = ((PyBytesObject *)values)->ob_size;
391 }
392
393 if (lo < 0)
394 lo = 0;
Thomas Wouters9a6e62b2006-08-23 23:20:29 +0000395 if (hi < lo)
396 hi = lo;
Guido van Rossumd624f182006-04-24 13:47:05 +0000397 if (hi > self->ob_size)
398 hi = self->ob_size;
399
400 avail = hi - lo;
401 if (avail < 0)
402 lo = hi = avail = 0;
403
404 if (avail != needed) {
405 if (avail > needed) {
406 /*
407 0 lo hi old_size
408 | |<----avail----->|<-----tomove------>|
409 | |<-needed->|<-----tomove------>|
410 0 lo new_hi new_size
411 */
412 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
413 self->ob_size - hi);
414 }
Thomas Wouters376446d2006-12-19 08:30:14 +0000415 if (PyBytes_Resize((PyObject *)self,
Guido van Rossumd624f182006-04-24 13:47:05 +0000416 self->ob_size + needed - avail) < 0)
417 return -1;
418 if (avail < needed) {
419 /*
420 0 lo hi old_size
421 | |<-avail->|<-----tomove------>|
422 | |<----needed---->|<-----tomove------>|
423 0 lo new_hi new_size
424 */
425 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
426 self->ob_size - lo - needed);
427 }
428 }
429
430 if (needed > 0)
431 memcpy(self->ob_bytes + lo, bytes, needed);
432
433 return 0;
434}
435
436static int
437bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
438{
439 Py_ssize_t ival;
440
441 if (i < 0)
442 i += self->ob_size;
443
444 if (i < 0 || i >= self->ob_size) {
445 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
446 return -1;
447 }
448
449 if (value == NULL)
450 return bytes_setslice(self, i, i+1, NULL);
451
Thomas Woutersd204a712006-08-22 13:41:17 +0000452 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000453 if (ival == -1 && PyErr_Occurred())
454 return -1;
455
456 if (ival < 0 || ival >= 256) {
457 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
458 return -1;
459 }
460
461 self->ob_bytes[i] = ival;
462 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000463}
464
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000465static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000466bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
467{
468 Py_ssize_t start, stop, step, slicelen, needed;
469 char *bytes;
470
471 if (PyIndex_Check(item)) {
472 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
473
474 if (i == -1 && PyErr_Occurred())
475 return -1;
476
477 if (i < 0)
478 i += PyBytes_GET_SIZE(self);
479
480 if (i < 0 || i >= self->ob_size) {
481 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
482 return -1;
483 }
484
485 if (values == NULL) {
486 /* Fall through to slice assignment */
487 start = i;
488 stop = i + 1;
489 step = 1;
490 slicelen = 1;
491 }
492 else {
493 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
494 if (ival == -1 && PyErr_Occurred())
495 return -1;
496 if (ival < 0 || ival >= 256) {
497 PyErr_SetString(PyExc_ValueError,
498 "byte must be in range(0, 256)");
499 return -1;
500 }
501 self->ob_bytes[i] = (char)ival;
502 return 0;
503 }
504 }
505 else if (PySlice_Check(item)) {
506 if (PySlice_GetIndicesEx((PySliceObject *)item,
507 PyBytes_GET_SIZE(self),
508 &start, &stop, &step, &slicelen) < 0) {
509 return -1;
510 }
511 }
512 else {
513 PyErr_SetString(PyExc_TypeError, "bytes indices must be integer");
514 return -1;
515 }
516
517 if (values == NULL) {
518 bytes = NULL;
519 needed = 0;
520 }
521 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
522 /* Make a copy an call this function recursively */
523 int err;
524 values = PyBytes_FromObject(values);
525 if (values == NULL)
526 return -1;
527 err = bytes_ass_subscript(self, item, values);
528 Py_DECREF(values);
529 return err;
530 }
531 else {
532 assert(PyBytes_Check(values));
533 bytes = ((PyBytesObject *)values)->ob_bytes;
534 needed = ((PyBytesObject *)values)->ob_size;
535 }
536 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
537 if ((step < 0 && start < stop) ||
538 (step > 0 && start > stop))
539 stop = start;
540 if (step == 1) {
541 if (slicelen != needed) {
542 if (slicelen > needed) {
543 /*
544 0 start stop old_size
545 | |<---slicelen--->|<-----tomove------>|
546 | |<-needed->|<-----tomove------>|
547 0 lo new_hi new_size
548 */
549 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
550 self->ob_size - stop);
551 }
552 if (PyBytes_Resize((PyObject *)self,
553 self->ob_size + needed - slicelen) < 0)
554 return -1;
555 if (slicelen < needed) {
556 /*
557 0 lo hi old_size
558 | |<-avail->|<-----tomove------>|
559 | |<----needed---->|<-----tomove------>|
560 0 lo new_hi new_size
561 */
562 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
563 self->ob_size - start - needed);
564 }
565 }
566
567 if (needed > 0)
568 memcpy(self->ob_bytes + start, bytes, needed);
569
570 return 0;
571 }
572 else {
573 if (needed == 0) {
574 /* Delete slice */
575 Py_ssize_t cur, i;
576
577 if (step < 0) {
578 stop = start + 1;
579 start = stop + step * (slicelen - 1) - 1;
580 step = -step;
581 }
582 for (cur = start, i = 0;
583 i < slicelen; cur += step, i++) {
584 Py_ssize_t lim = step - 1;
585
586 if (cur + step >= PyBytes_GET_SIZE(self))
587 lim = PyBytes_GET_SIZE(self) - cur - 1;
588
589 memmove(self->ob_bytes + cur - i,
590 self->ob_bytes + cur + 1, lim);
591 }
592 /* Move the tail of the bytes, in one chunk */
593 cur = start + slicelen*step;
594 if (cur < PyBytes_GET_SIZE(self)) {
595 memmove(self->ob_bytes + cur - slicelen,
596 self->ob_bytes + cur,
597 PyBytes_GET_SIZE(self) - cur);
598 }
599 if (PyBytes_Resize((PyObject *)self,
600 PyBytes_GET_SIZE(self) - slicelen) < 0)
601 return -1;
602
603 return 0;
604 }
605 else {
606 /* Assign slice */
607 Py_ssize_t cur, i;
608
609 if (needed != slicelen) {
610 PyErr_Format(PyExc_ValueError,
611 "attempt to assign bytes of size %zd "
612 "to extended slice of size %zd",
613 needed, slicelen);
614 return -1;
615 }
616 for (cur = start, i = 0; i < slicelen; cur += step, i++)
617 self->ob_bytes[cur] = bytes[i];
618 return 0;
619 }
620 }
621}
622
623static int
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000624bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
625{
Guido van Rossumd624f182006-04-24 13:47:05 +0000626 static char *kwlist[] = {"source", "encoding", "errors", 0};
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000627 PyObject *arg = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +0000628 const char *encoding = NULL;
629 const char *errors = NULL;
630 Py_ssize_t count;
631 PyObject *it;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000632 PyObject *(*iternext)(PyObject *);
633
Guido van Rossuma0867f72006-05-05 04:34:18 +0000634 if (self->ob_size != 0) {
635 /* Empty previous contents (yes, do this first of all!) */
636 if (PyBytes_Resize((PyObject *)self, 0) < 0)
637 return -1;
638 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000639
Guido van Rossumd624f182006-04-24 13:47:05 +0000640 /* Parse arguments */
641 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
642 &arg, &encoding, &errors))
643 return -1;
644
645 /* Make a quick exit if no first argument */
646 if (arg == NULL) {
647 if (encoding != NULL || errors != NULL) {
648 PyErr_SetString(PyExc_TypeError,
649 "encoding or errors without sequence argument");
650 return -1;
651 }
652 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000653 }
654
Guido van Rossumd624f182006-04-24 13:47:05 +0000655 if (PyUnicode_Check(arg)) {
656 /* Encode via the codec registry */
657 PyObject *encoded;
658 char *bytes;
659 Py_ssize_t size;
660 if (encoding == NULL)
661 encoding = PyUnicode_GetDefaultEncoding();
662 encoded = PyCodec_Encode(arg, encoding, errors);
663 if (encoded == NULL)
664 return -1;
665 if (!PyString_Check(encoded)) {
666 PyErr_Format(PyExc_TypeError,
667 "encoder did not return a string object (type=%.400s)",
668 encoded->ob_type->tp_name);
669 Py_DECREF(encoded);
670 return -1;
671 }
672 bytes = PyString_AS_STRING(encoded);
673 size = PyString_GET_SIZE(encoded);
Guido van Rossuma0867f72006-05-05 04:34:18 +0000674 if (size <= self->ob_alloc)
675 self->ob_size = size;
676 else if (PyBytes_Resize((PyObject *)self, size) < 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000677 Py_DECREF(encoded);
678 return -1;
679 }
680 memcpy(self->ob_bytes, bytes, size);
681 Py_DECREF(encoded);
682 return 0;
683 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000684
Guido van Rossumd624f182006-04-24 13:47:05 +0000685 /* If it's not unicode, there can't be encoding or errors */
686 if (encoding != NULL || errors != NULL) {
687 PyErr_SetString(PyExc_TypeError,
688 "encoding or errors without a string argument");
689 return -1;
690 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000691
Guido van Rossumd624f182006-04-24 13:47:05 +0000692 /* Is it an int? */
Thomas Woutersd204a712006-08-22 13:41:17 +0000693 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000694 if (count == -1 && PyErr_Occurred())
695 PyErr_Clear();
696 else {
697 if (count < 0) {
698 PyErr_SetString(PyExc_ValueError, "negative count");
699 return -1;
700 }
701 if (count > 0) {
702 if (PyBytes_Resize((PyObject *)self, count))
703 return -1;
704 memset(self->ob_bytes, 0, count);
705 }
706 return 0;
707 }
708
709 if (PyObject_CheckReadBuffer(arg)) {
710 const void *bytes;
711 Py_ssize_t size;
712 if (PyObject_AsReadBuffer(arg, &bytes, &size) < 0)
713 return -1;
714 if (PyBytes_Resize((PyObject *)self, size) < 0)
715 return -1;
716 memcpy(self->ob_bytes, bytes, size);
717 return 0;
718 }
719
720 /* XXX Optimize this if the arguments is a list, tuple */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000721
722 /* Get the iterator */
723 it = PyObject_GetIter(arg);
724 if (it == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000725 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000726 iternext = *it->ob_type->tp_iternext;
727
728 /* Run the iterator to exhaustion */
729 for (;;) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000730 PyObject *item;
731 Py_ssize_t value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000732
Guido van Rossumd624f182006-04-24 13:47:05 +0000733 /* Get the next item */
734 item = iternext(it);
735 if (item == NULL) {
736 if (PyErr_Occurred()) {
737 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
738 goto error;
739 PyErr_Clear();
740 }
741 break;
742 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000743
Guido van Rossumd624f182006-04-24 13:47:05 +0000744 /* Interpret it as an int (__index__) */
Thomas Woutersd204a712006-08-22 13:41:17 +0000745 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000746 Py_DECREF(item);
747 if (value == -1 && PyErr_Occurred())
748 goto error;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000749
Guido van Rossumd624f182006-04-24 13:47:05 +0000750 /* Range check */
751 if (value < 0 || value >= 256) {
752 PyErr_SetString(PyExc_ValueError,
753 "bytes must be in range(0, 256)");
754 goto error;
755 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000756
Guido van Rossumd624f182006-04-24 13:47:05 +0000757 /* Append the byte */
Guido van Rossuma0867f72006-05-05 04:34:18 +0000758 if (self->ob_size < self->ob_alloc)
759 self->ob_size++;
760 else if (PyBytes_Resize((PyObject *)self, self->ob_size+1) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000761 goto error;
762 self->ob_bytes[self->ob_size-1] = value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000763 }
764
765 /* Clean up and return success */
766 Py_DECREF(it);
767 return 0;
768
769 error:
770 /* Error handling when it != NULL */
771 Py_DECREF(it);
772 return -1;
773}
774
Georg Brandlee91be42007-02-24 19:41:35 +0000775/* Mostly copied from string_repr, but without the
776 "smart quote" functionality. */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000777static PyObject *
778bytes_repr(PyBytesObject *self)
779{
Georg Brandlee91be42007-02-24 19:41:35 +0000780 size_t newsize = 3 + 4 * self->ob_size;
781 PyObject *v;
782 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != self->ob_size) {
783 PyErr_SetString(PyExc_OverflowError,
784 "bytes object is too large to make repr");
Guido van Rossumd624f182006-04-24 13:47:05 +0000785 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000786 }
Georg Brandlee91be42007-02-24 19:41:35 +0000787 v = PyString_FromStringAndSize((char *)NULL, newsize);
788 if (v == NULL) {
789 return NULL;
790 }
791 else {
792 register Py_ssize_t i;
793 register char c;
794 register char *p;
795 int quote = '\'';
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000796
Georg Brandlee91be42007-02-24 19:41:35 +0000797 p = PyString_AS_STRING(v);
798 *p++ = 'b';
799 *p++ = quote;
800 for (i = 0; i < self->ob_size; i++) {
801 /* There's at least enough room for a hex escape
802 and a closing quote. */
803 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
804 c = self->ob_bytes[i];
805 if (c == quote || c == '\\')
806 *p++ = '\\', *p++ = c;
807 else if (c == '\t')
808 *p++ = '\\', *p++ = 't';
809 else if (c == '\n')
810 *p++ = '\\', *p++ = 'n';
811 else if (c == '\r')
812 *p++ = '\\', *p++ = 'r';
813 else if (c == 0)
814 *p++ = '\\', *p++ = '0';
815 else if (c < ' ' || c >= 0x7f) {
816 /* For performance, we don't want to call
817 PyOS_snprintf here (extra layers of
818 function call). */
819 sprintf(p, "\\x%02x", c & 0xff);
820 p += 4;
821 }
822 else
823 *p++ = c;
824 }
825 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
826 *p++ = quote;
827 *p = '\0';
828 _PyString_Resize(
829 &v, (p - PyString_AS_STRING(v)));
830 return v;
831 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000832}
833
834static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000835bytes_str(PyBytesObject *self)
836{
837 return PyString_FromStringAndSize(self->ob_bytes, self->ob_size);
838}
839
840static PyObject *
Guido van Rossum343e97f2007-04-09 00:43:24 +0000841bytes_richcompare(PyObject *self, PyObject *other, int op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000842{
Guido van Rossum343e97f2007-04-09 00:43:24 +0000843 PyBufferProcs *self_buffer, *other_buffer;
844 Py_ssize_t self_size, other_size;
845 void *self_bytes, *other_bytes;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000846 PyObject *res;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000847 Py_ssize_t minsize;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000848 int cmp;
849
Guido van Rossum343e97f2007-04-09 00:43:24 +0000850 /* For backwards compatibility, bytes can be compared to anything that
Guido van Rossumebea9be2007-04-09 00:49:13 +0000851 supports the (binary) buffer API. Except Unicode. */
852
853 if (PyUnicode_Check(self) || PyUnicode_Check(other)) {
854 Py_INCREF(Py_NotImplemented);
855 return Py_NotImplemented;
856 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000857
858 self_buffer = self->ob_type->tp_as_buffer;
859 if (self_buffer == NULL ||
860 self_buffer->bf_getreadbuffer == NULL ||
861 self_buffer->bf_getsegcount == NULL ||
862 self_buffer->bf_getsegcount(self, NULL) != 1)
863 {
Guido van Rossumd624f182006-04-24 13:47:05 +0000864 Py_INCREF(Py_NotImplemented);
865 return Py_NotImplemented;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000866 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000867 self_size = self_buffer->bf_getreadbuffer(self, 0, &self_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000868
Guido van Rossum343e97f2007-04-09 00:43:24 +0000869 other_buffer = other->ob_type->tp_as_buffer;
870 if (other_buffer == NULL ||
871 other_buffer->bf_getreadbuffer == NULL ||
872 other_buffer->bf_getsegcount == NULL ||
873 other_buffer->bf_getsegcount(self, NULL) != 1)
874 {
875 Py_INCREF(Py_NotImplemented);
876 return Py_NotImplemented;
877 }
878 other_size = other_buffer->bf_getreadbuffer(other, 0, &other_bytes);
879
880 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000881 /* Shortcut: if the lengths differ, the objects differ */
882 cmp = (op == Py_NE);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000883 }
884 else {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000885 minsize = self_size;
886 if (other_size < minsize)
887 minsize = other_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000888
Guido van Rossum343e97f2007-04-09 00:43:24 +0000889 cmp = memcmp(self_bytes, other_bytes, minsize);
Guido van Rossumd624f182006-04-24 13:47:05 +0000890 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000891
Guido van Rossumd624f182006-04-24 13:47:05 +0000892 if (cmp == 0) {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000893 if (self_size < other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +0000894 cmp = -1;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000895 else if (self_size > other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +0000896 cmp = 1;
897 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000898
Guido van Rossumd624f182006-04-24 13:47:05 +0000899 switch (op) {
900 case Py_LT: cmp = cmp < 0; break;
901 case Py_LE: cmp = cmp <= 0; break;
902 case Py_EQ: cmp = cmp == 0; break;
903 case Py_NE: cmp = cmp != 0; break;
904 case Py_GT: cmp = cmp > 0; break;
905 case Py_GE: cmp = cmp >= 0; break;
906 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000907 }
908
909 res = cmp ? Py_True : Py_False;
910 Py_INCREF(res);
911 return res;
912}
913
914static void
915bytes_dealloc(PyBytesObject *self)
916{
Guido van Rossumd624f182006-04-24 13:47:05 +0000917 if (self->ob_bytes != 0) {
918 PyMem_Free(self->ob_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000919 }
920 self->ob_type->tp_free((PyObject *)self);
921}
922
Guido van Rossumd624f182006-04-24 13:47:05 +0000923static Py_ssize_t
924bytes_getbuffer(PyBytesObject *self, Py_ssize_t index, const void **ptr)
925{
926 if (index != 0) {
927 PyErr_SetString(PyExc_SystemError,
Neal Norwitz6968b052007-02-27 19:02:19 +0000928 "accessing non-existent bytes segment");
Guido van Rossumd624f182006-04-24 13:47:05 +0000929 return -1;
930 }
931 *ptr = (void *)self->ob_bytes;
932 return self->ob_size;
933}
934
935static Py_ssize_t
936bytes_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
937{
938 if (lenp)
939 *lenp = self->ob_size;
940 return 1;
941}
942
Neal Norwitz6968b052007-02-27 19:02:19 +0000943
944
945/* -------------------------------------------------------------------- */
946/* Methods */
947
948#define STRINGLIB_CHAR char
949#define STRINGLIB_CMP memcmp
950#define STRINGLIB_LEN PyBytes_GET_SIZE
951#define STRINGLIB_NEW PyBytes_FromStringAndSize
952#define STRINGLIB_EMPTY nullbytes
953
954#include "stringlib/fastsearch.h"
955#include "stringlib/count.h"
956#include "stringlib/find.h"
957#include "stringlib/partition.h"
958
959
960/* The following Py_LOCAL_INLINE and Py_LOCAL functions
961were copied from the old char* style string object. */
962
963Py_LOCAL_INLINE(void)
964_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
965{
966 if (*end > len)
967 *end = len;
968 else if (*end < 0)
969 *end += len;
970 if (*end < 0)
971 *end = 0;
972 if (*start < 0)
973 *start += len;
974 if (*start < 0)
975 *start = 0;
976}
977
978
979Py_LOCAL_INLINE(Py_ssize_t)
980bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
981{
982 PyObject *subobj;
983 const char *sub;
984 Py_ssize_t sub_len;
985 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
986
987 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
988 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
989 return -2;
990 if (PyBytes_Check(subobj)) {
991 sub = PyBytes_AS_STRING(subobj);
992 sub_len = PyBytes_GET_SIZE(subobj);
993 }
994 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
995 /* XXX - the "expected a character buffer object" is pretty
996 confusing for a non-expert. remap to something else ? */
997 return -2;
998
999 if (dir > 0)
1000 return stringlib_find_slice(
1001 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1002 sub, sub_len, start, end);
1003 else
1004 return stringlib_rfind_slice(
1005 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1006 sub, sub_len, start, end);
1007}
1008
1009
1010PyDoc_STRVAR(find__doc__,
1011"B.find(sub [,start [,end]]) -> int\n\
1012\n\
1013Return the lowest index in B where subsection sub is found,\n\
1014such that sub is contained within s[start,end]. Optional\n\
1015arguments start and end are interpreted as in slice notation.\n\
1016\n\
1017Return -1 on failure.");
1018
1019static PyObject *
1020bytes_find(PyBytesObject *self, PyObject *args)
1021{
1022 Py_ssize_t result = bytes_find_internal(self, args, +1);
1023 if (result == -2)
1024 return NULL;
1025 return PyInt_FromSsize_t(result);
1026}
1027
1028PyDoc_STRVAR(count__doc__,
1029"B.count(sub[, start[, end]]) -> int\n\
1030\n\
1031Return the number of non-overlapping occurrences of subsection sub in\n\
1032bytes B[start:end]. Optional arguments start and end are interpreted\n\
1033as in slice notation.");
1034
1035static PyObject *
1036bytes_count(PyBytesObject *self, PyObject *args)
1037{
1038 PyObject *sub_obj;
1039 const char *str = PyBytes_AS_STRING(self), *sub;
1040 Py_ssize_t sub_len;
1041 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1042
1043 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1044 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1045 return NULL;
1046
1047 if (PyBytes_Check(sub_obj)) {
1048 sub = PyBytes_AS_STRING(sub_obj);
1049 sub_len = PyBytes_GET_SIZE(sub_obj);
1050 }
1051 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1052 return NULL;
1053
1054 _adjust_indices(&start, &end, PyString_GET_SIZE(self));
1055
1056 return PyInt_FromSsize_t(
1057 stringlib_count(str + start, end - start, sub, sub_len)
1058 );
1059}
1060
1061
1062PyDoc_STRVAR(index__doc__,
1063"B.index(sub [,start [,end]]) -> int\n\
1064\n\
1065Like B.find() but raise ValueError when the subsection is not found.");
1066
1067static PyObject *
1068bytes_index(PyBytesObject *self, PyObject *args)
1069{
1070 Py_ssize_t result = bytes_find_internal(self, args, +1);
1071 if (result == -2)
1072 return NULL;
1073 if (result == -1) {
1074 PyErr_SetString(PyExc_ValueError,
1075 "subsection not found");
1076 return NULL;
1077 }
1078 return PyInt_FromSsize_t(result);
1079}
1080
1081
1082PyDoc_STRVAR(rfind__doc__,
1083"B.rfind(sub [,start [,end]]) -> int\n\
1084\n\
1085Return the highest index in B where subsection sub is found,\n\
1086such that sub is contained within s[start,end]. Optional\n\
1087arguments start and end are interpreted as in slice notation.\n\
1088\n\
1089Return -1 on failure.");
1090
1091static PyObject *
1092bytes_rfind(PyBytesObject *self, PyObject *args)
1093{
1094 Py_ssize_t result = bytes_find_internal(self, args, -1);
1095 if (result == -2)
1096 return NULL;
1097 return PyInt_FromSsize_t(result);
1098}
1099
1100
1101PyDoc_STRVAR(rindex__doc__,
1102"B.rindex(sub [,start [,end]]) -> int\n\
1103\n\
1104Like B.rfind() but raise ValueError when the subsection is not found.");
1105
1106static PyObject *
1107bytes_rindex(PyBytesObject *self, PyObject *args)
1108{
1109 Py_ssize_t result = bytes_find_internal(self, args, -1);
1110 if (result == -2)
1111 return NULL;
1112 if (result == -1) {
1113 PyErr_SetString(PyExc_ValueError,
1114 "subsection not found");
1115 return NULL;
1116 }
1117 return PyInt_FromSsize_t(result);
1118}
1119
1120
1121/* Matches the end (direction >= 0) or start (direction < 0) of self
1122 * against substr, using the start and end arguments. Returns
1123 * -1 on error, 0 if not found and 1 if found.
1124 */
1125Py_LOCAL(int)
1126_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
1127 Py_ssize_t end, int direction)
1128{
1129 Py_ssize_t len = PyBytes_GET_SIZE(self);
1130 Py_ssize_t slen;
1131 const char* sub;
1132 const char* str;
1133
1134 if (PyBytes_Check(substr)) {
1135 sub = PyBytes_AS_STRING(substr);
1136 slen = PyBytes_GET_SIZE(substr);
1137 }
1138 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
1139 return -1;
1140 str = PyBytes_AS_STRING(self);
1141
1142 _adjust_indices(&start, &end, len);
1143
1144 if (direction < 0) {
1145 /* startswith */
1146 if (start+slen > len)
1147 return 0;
1148 } else {
1149 /* endswith */
1150 if (end-start < slen || start > len)
1151 return 0;
1152
1153 if (end-slen > start)
1154 start = end - slen;
1155 }
1156 if (end-start >= slen)
1157 return ! memcmp(str+start, sub, slen);
1158 return 0;
1159}
1160
1161
1162PyDoc_STRVAR(startswith__doc__,
1163"B.startswith(prefix[, start[, end]]) -> bool\n\
1164\n\
1165Return True if B starts with the specified prefix, False otherwise.\n\
1166With optional start, test B beginning at that position.\n\
1167With optional end, stop comparing B at that position.\n\
1168prefix can also be a tuple of strings to try.");
1169
1170static PyObject *
1171bytes_startswith(PyBytesObject *self, PyObject *args)
1172{
1173 Py_ssize_t start = 0;
1174 Py_ssize_t end = PY_SSIZE_T_MAX;
1175 PyObject *subobj;
1176 int result;
1177
1178 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1179 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1180 return NULL;
1181 if (PyTuple_Check(subobj)) {
1182 Py_ssize_t i;
1183 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1184 result = _bytes_tailmatch(self,
1185 PyTuple_GET_ITEM(subobj, i),
1186 start, end, -1);
1187 if (result == -1)
1188 return NULL;
1189 else if (result) {
1190 Py_RETURN_TRUE;
1191 }
1192 }
1193 Py_RETURN_FALSE;
1194 }
1195 result = _bytes_tailmatch(self, subobj, start, end, -1);
1196 if (result == -1)
1197 return NULL;
1198 else
1199 return PyBool_FromLong(result);
1200}
1201
1202PyDoc_STRVAR(endswith__doc__,
1203"B.endswith(suffix[, start[, end]]) -> bool\n\
1204\n\
1205Return True if B ends with the specified suffix, False otherwise.\n\
1206With optional start, test B beginning at that position.\n\
1207With optional end, stop comparing B at that position.\n\
1208suffix can also be a tuple of strings to try.");
1209
1210static PyObject *
1211bytes_endswith(PyBytesObject *self, PyObject *args)
1212{
1213 Py_ssize_t start = 0;
1214 Py_ssize_t end = PY_SSIZE_T_MAX;
1215 PyObject *subobj;
1216 int result;
1217
1218 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1219 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1220 return NULL;
1221 if (PyTuple_Check(subobj)) {
1222 Py_ssize_t i;
1223 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1224 result = _bytes_tailmatch(self,
1225 PyTuple_GET_ITEM(subobj, i),
1226 start, end, +1);
1227 if (result == -1)
1228 return NULL;
1229 else if (result) {
1230 Py_RETURN_TRUE;
1231 }
1232 }
1233 Py_RETURN_FALSE;
1234 }
1235 result = _bytes_tailmatch(self, subobj, start, end, +1);
1236 if (result == -1)
1237 return NULL;
1238 else
1239 return PyBool_FromLong(result);
1240}
1241
1242
1243
1244PyDoc_STRVAR(translate__doc__,
1245"B.translate(table [,deletechars]) -> bytes\n\
1246\n\
1247Return a copy of the bytes B, where all characters occurring\n\
1248in the optional argument deletechars are removed, and the\n\
1249remaining characters have been mapped through the given\n\
1250translation table, which must be a bytes of length 256.");
1251
1252static PyObject *
1253bytes_translate(PyBytesObject *self, PyObject *args)
1254{
1255 register char *input, *output;
1256 register const char *table;
1257 register Py_ssize_t i, c, changed = 0;
1258 PyObject *input_obj = (PyObject*)self;
1259 const char *table1, *output_start, *del_table=NULL;
1260 Py_ssize_t inlen, tablen, dellen = 0;
1261 PyObject *result;
1262 int trans_table[256];
1263 PyObject *tableobj, *delobj = NULL;
1264
1265 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1266 &tableobj, &delobj))
1267 return NULL;
1268
1269 if (PyBytes_Check(tableobj)) {
1270 table1 = PyBytes_AS_STRING(tableobj);
1271 tablen = PyBytes_GET_SIZE(tableobj);
1272 }
1273 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1274 return NULL;
1275
1276 if (tablen != 256) {
1277 PyErr_SetString(PyExc_ValueError,
1278 "translation table must be 256 characters long");
1279 return NULL;
1280 }
1281
1282 if (delobj != NULL) {
1283 if (PyBytes_Check(delobj)) {
1284 del_table = PyBytes_AS_STRING(delobj);
1285 dellen = PyBytes_GET_SIZE(delobj);
1286 }
1287 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1288 return NULL;
1289 }
1290 else {
1291 del_table = NULL;
1292 dellen = 0;
1293 }
1294
1295 table = table1;
1296 inlen = PyBytes_GET_SIZE(input_obj);
1297 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1298 if (result == NULL)
1299 return NULL;
1300 output_start = output = PyBytes_AsString(result);
1301 input = PyBytes_AS_STRING(input_obj);
1302
1303 if (dellen == 0) {
1304 /* If no deletions are required, use faster code */
1305 for (i = inlen; --i >= 0; ) {
1306 c = Py_CHARMASK(*input++);
1307 if (Py_CHARMASK((*output++ = table[c])) != c)
1308 changed = 1;
1309 }
1310 if (changed || !PyBytes_CheckExact(input_obj))
1311 return result;
1312 Py_DECREF(result);
1313 Py_INCREF(input_obj);
1314 return input_obj;
1315 }
1316
1317 for (i = 0; i < 256; i++)
1318 trans_table[i] = Py_CHARMASK(table[i]);
1319
1320 for (i = 0; i < dellen; i++)
1321 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1322
1323 for (i = inlen; --i >= 0; ) {
1324 c = Py_CHARMASK(*input++);
1325 if (trans_table[c] != -1)
1326 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1327 continue;
1328 changed = 1;
1329 }
1330 if (!changed && PyBytes_CheckExact(input_obj)) {
1331 Py_DECREF(result);
1332 Py_INCREF(input_obj);
1333 return input_obj;
1334 }
1335 /* Fix the size of the resulting string */
1336 if (inlen > 0)
1337 PyBytes_Resize(result, output - output_start);
1338 return result;
1339}
1340
1341
1342#define FORWARD 1
1343#define REVERSE -1
1344
1345/* find and count characters and substrings */
1346
1347#define findchar(target, target_len, c) \
1348 ((char *)memchr((const void *)(target), c, target_len))
1349
1350/* Don't call if length < 2 */
1351#define Py_STRING_MATCH(target, offset, pattern, length) \
1352 (target[offset] == pattern[0] && \
1353 target[offset+length-1] == pattern[length-1] && \
1354 !memcmp(target+offset+1, pattern+1, length-2) )
1355
1356
1357/* Bytes ops must return a string. */
1358/* If the object is subclass of bytes, create a copy */
1359Py_LOCAL(PyBytesObject *)
1360return_self(PyBytesObject *self)
1361{
1362 if (PyBytes_CheckExact(self)) {
1363 Py_INCREF(self);
1364 return (PyBytesObject *)self;
1365 }
1366 return (PyBytesObject *)PyBytes_FromStringAndSize(
1367 PyBytes_AS_STRING(self),
1368 PyBytes_GET_SIZE(self));
1369}
1370
1371Py_LOCAL_INLINE(Py_ssize_t)
1372countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1373{
1374 Py_ssize_t count=0;
1375 const char *start=target;
1376 const char *end=target+target_len;
1377
1378 while ( (start=findchar(start, end-start, c)) != NULL ) {
1379 count++;
1380 if (count >= maxcount)
1381 break;
1382 start += 1;
1383 }
1384 return count;
1385}
1386
1387Py_LOCAL(Py_ssize_t)
1388findstring(const char *target, Py_ssize_t target_len,
1389 const char *pattern, Py_ssize_t pattern_len,
1390 Py_ssize_t start,
1391 Py_ssize_t end,
1392 int direction)
1393{
1394 if (start < 0) {
1395 start += target_len;
1396 if (start < 0)
1397 start = 0;
1398 }
1399 if (end > target_len) {
1400 end = target_len;
1401 } else if (end < 0) {
1402 end += target_len;
1403 if (end < 0)
1404 end = 0;
1405 }
1406
1407 /* zero-length substrings always match at the first attempt */
1408 if (pattern_len == 0)
1409 return (direction > 0) ? start : end;
1410
1411 end -= pattern_len;
1412
1413 if (direction < 0) {
1414 for (; end >= start; end--)
1415 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1416 return end;
1417 } else {
1418 for (; start <= end; start++)
1419 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1420 return start;
1421 }
1422 return -1;
1423}
1424
1425Py_LOCAL_INLINE(Py_ssize_t)
1426countstring(const char *target, Py_ssize_t target_len,
1427 const char *pattern, Py_ssize_t pattern_len,
1428 Py_ssize_t start,
1429 Py_ssize_t end,
1430 int direction, Py_ssize_t maxcount)
1431{
1432 Py_ssize_t count=0;
1433
1434 if (start < 0) {
1435 start += target_len;
1436 if (start < 0)
1437 start = 0;
1438 }
1439 if (end > target_len) {
1440 end = target_len;
1441 } else if (end < 0) {
1442 end += target_len;
1443 if (end < 0)
1444 end = 0;
1445 }
1446
1447 /* zero-length substrings match everywhere */
1448 if (pattern_len == 0 || maxcount == 0) {
1449 if (target_len+1 < maxcount)
1450 return target_len+1;
1451 return maxcount;
1452 }
1453
1454 end -= pattern_len;
1455 if (direction < 0) {
1456 for (; (end >= start); end--)
1457 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1458 count++;
1459 if (--maxcount <= 0) break;
1460 end -= pattern_len-1;
1461 }
1462 } else {
1463 for (; (start <= end); start++)
1464 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1465 count++;
1466 if (--maxcount <= 0)
1467 break;
1468 start += pattern_len-1;
1469 }
1470 }
1471 return count;
1472}
1473
1474
1475/* Algorithms for different cases of string replacement */
1476
1477/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1478Py_LOCAL(PyBytesObject *)
1479replace_interleave(PyBytesObject *self,
1480 const char *to_s, Py_ssize_t to_len,
1481 Py_ssize_t maxcount)
1482{
1483 char *self_s, *result_s;
1484 Py_ssize_t self_len, result_len;
1485 Py_ssize_t count, i, product;
1486 PyBytesObject *result;
1487
1488 self_len = PyBytes_GET_SIZE(self);
1489
1490 /* 1 at the end plus 1 after every character */
1491 count = self_len+1;
1492 if (maxcount < count)
1493 count = maxcount;
1494
1495 /* Check for overflow */
1496 /* result_len = count * to_len + self_len; */
1497 product = count * to_len;
1498 if (product / to_len != count) {
1499 PyErr_SetString(PyExc_OverflowError,
1500 "replace string is too long");
1501 return NULL;
1502 }
1503 result_len = product + self_len;
1504 if (result_len < 0) {
1505 PyErr_SetString(PyExc_OverflowError,
1506 "replace string is too long");
1507 return NULL;
1508 }
1509
1510 if (! (result = (PyBytesObject *)
1511 PyBytes_FromStringAndSize(NULL, result_len)) )
1512 return NULL;
1513
1514 self_s = PyBytes_AS_STRING(self);
1515 result_s = PyBytes_AS_STRING(result);
1516
1517 /* TODO: special case single character, which doesn't need memcpy */
1518
1519 /* Lay the first one down (guaranteed this will occur) */
1520 Py_MEMCPY(result_s, to_s, to_len);
1521 result_s += to_len;
1522 count -= 1;
1523
1524 for (i=0; i<count; i++) {
1525 *result_s++ = *self_s++;
1526 Py_MEMCPY(result_s, to_s, to_len);
1527 result_s += to_len;
1528 }
1529
1530 /* Copy the rest of the original string */
1531 Py_MEMCPY(result_s, self_s, self_len-i);
1532
1533 return result;
1534}
1535
1536/* Special case for deleting a single character */
1537/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1538Py_LOCAL(PyBytesObject *)
1539replace_delete_single_character(PyBytesObject *self,
1540 char from_c, Py_ssize_t maxcount)
1541{
1542 char *self_s, *result_s;
1543 char *start, *next, *end;
1544 Py_ssize_t self_len, result_len;
1545 Py_ssize_t count;
1546 PyBytesObject *result;
1547
1548 self_len = PyBytes_GET_SIZE(self);
1549 self_s = PyBytes_AS_STRING(self);
1550
1551 count = countchar(self_s, self_len, from_c, maxcount);
1552 if (count == 0) {
1553 return return_self(self);
1554 }
1555
1556 result_len = self_len - count; /* from_len == 1 */
1557 assert(result_len>=0);
1558
1559 if ( (result = (PyBytesObject *)
1560 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1561 return NULL;
1562 result_s = PyBytes_AS_STRING(result);
1563
1564 start = self_s;
1565 end = self_s + self_len;
1566 while (count-- > 0) {
1567 next = findchar(start, end-start, from_c);
1568 if (next == NULL)
1569 break;
1570 Py_MEMCPY(result_s, start, next-start);
1571 result_s += (next-start);
1572 start = next+1;
1573 }
1574 Py_MEMCPY(result_s, start, end-start);
1575
1576 return result;
1577}
1578
1579/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1580
1581Py_LOCAL(PyBytesObject *)
1582replace_delete_substring(PyBytesObject *self,
1583 const char *from_s, Py_ssize_t from_len,
1584 Py_ssize_t maxcount)
1585{
1586 char *self_s, *result_s;
1587 char *start, *next, *end;
1588 Py_ssize_t self_len, result_len;
1589 Py_ssize_t count, offset;
1590 PyBytesObject *result;
1591
1592 self_len = PyBytes_GET_SIZE(self);
1593 self_s = PyBytes_AS_STRING(self);
1594
1595 count = countstring(self_s, self_len,
1596 from_s, from_len,
1597 0, self_len, 1,
1598 maxcount);
1599
1600 if (count == 0) {
1601 /* no matches */
1602 return return_self(self);
1603 }
1604
1605 result_len = self_len - (count * from_len);
1606 assert (result_len>=0);
1607
1608 if ( (result = (PyBytesObject *)
1609 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1610 return NULL;
1611
1612 result_s = PyBytes_AS_STRING(result);
1613
1614 start = self_s;
1615 end = self_s + self_len;
1616 while (count-- > 0) {
1617 offset = findstring(start, end-start,
1618 from_s, from_len,
1619 0, end-start, FORWARD);
1620 if (offset == -1)
1621 break;
1622 next = start + offset;
1623
1624 Py_MEMCPY(result_s, start, next-start);
1625
1626 result_s += (next-start);
1627 start = next+from_len;
1628 }
1629 Py_MEMCPY(result_s, start, end-start);
1630 return result;
1631}
1632
1633/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1634Py_LOCAL(PyBytesObject *)
1635replace_single_character_in_place(PyBytesObject *self,
1636 char from_c, char to_c,
1637 Py_ssize_t maxcount)
1638{
1639 char *self_s, *result_s, *start, *end, *next;
1640 Py_ssize_t self_len;
1641 PyBytesObject *result;
1642
1643 /* The result string will be the same size */
1644 self_s = PyBytes_AS_STRING(self);
1645 self_len = PyBytes_GET_SIZE(self);
1646
1647 next = findchar(self_s, self_len, from_c);
1648
1649 if (next == NULL) {
1650 /* No matches; return the original bytes */
1651 return return_self(self);
1652 }
1653
1654 /* Need to make a new bytes */
1655 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1656 if (result == NULL)
1657 return NULL;
1658 result_s = PyBytes_AS_STRING(result);
1659 Py_MEMCPY(result_s, self_s, self_len);
1660
1661 /* change everything in-place, starting with this one */
1662 start = result_s + (next-self_s);
1663 *start = to_c;
1664 start++;
1665 end = result_s + self_len;
1666
1667 while (--maxcount > 0) {
1668 next = findchar(start, end-start, from_c);
1669 if (next == NULL)
1670 break;
1671 *next = to_c;
1672 start = next+1;
1673 }
1674
1675 return result;
1676}
1677
1678/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1679Py_LOCAL(PyBytesObject *)
1680replace_substring_in_place(PyBytesObject *self,
1681 const char *from_s, Py_ssize_t from_len,
1682 const char *to_s, Py_ssize_t to_len,
1683 Py_ssize_t maxcount)
1684{
1685 char *result_s, *start, *end;
1686 char *self_s;
1687 Py_ssize_t self_len, offset;
1688 PyBytesObject *result;
1689
1690 /* The result bytes will be the same size */
1691
1692 self_s = PyBytes_AS_STRING(self);
1693 self_len = PyBytes_GET_SIZE(self);
1694
1695 offset = findstring(self_s, self_len,
1696 from_s, from_len,
1697 0, self_len, FORWARD);
1698 if (offset == -1) {
1699 /* No matches; return the original bytes */
1700 return return_self(self);
1701 }
1702
1703 /* Need to make a new bytes */
1704 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1705 if (result == NULL)
1706 return NULL;
1707 result_s = PyBytes_AS_STRING(result);
1708 Py_MEMCPY(result_s, self_s, self_len);
1709
1710 /* change everything in-place, starting with this one */
1711 start = result_s + offset;
1712 Py_MEMCPY(start, to_s, from_len);
1713 start += from_len;
1714 end = result_s + self_len;
1715
1716 while ( --maxcount > 0) {
1717 offset = findstring(start, end-start,
1718 from_s, from_len,
1719 0, end-start, FORWARD);
1720 if (offset==-1)
1721 break;
1722 Py_MEMCPY(start+offset, to_s, from_len);
1723 start += offset+from_len;
1724 }
1725
1726 return result;
1727}
1728
1729/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1730Py_LOCAL(PyBytesObject *)
1731replace_single_character(PyBytesObject *self,
1732 char from_c,
1733 const char *to_s, Py_ssize_t to_len,
1734 Py_ssize_t maxcount)
1735{
1736 char *self_s, *result_s;
1737 char *start, *next, *end;
1738 Py_ssize_t self_len, result_len;
1739 Py_ssize_t count, product;
1740 PyBytesObject *result;
1741
1742 self_s = PyBytes_AS_STRING(self);
1743 self_len = PyBytes_GET_SIZE(self);
1744
1745 count = countchar(self_s, self_len, from_c, maxcount);
1746 if (count == 0) {
1747 /* no matches, return unchanged */
1748 return return_self(self);
1749 }
1750
1751 /* use the difference between current and new, hence the "-1" */
1752 /* result_len = self_len + count * (to_len-1) */
1753 product = count * (to_len-1);
1754 if (product / (to_len-1) != count) {
1755 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1756 return NULL;
1757 }
1758 result_len = self_len + product;
1759 if (result_len < 0) {
1760 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1761 return NULL;
1762 }
1763
1764 if ( (result = (PyBytesObject *)
1765 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1766 return NULL;
1767 result_s = PyBytes_AS_STRING(result);
1768
1769 start = self_s;
1770 end = self_s + self_len;
1771 while (count-- > 0) {
1772 next = findchar(start, end-start, from_c);
1773 if (next == NULL)
1774 break;
1775
1776 if (next == start) {
1777 /* replace with the 'to' */
1778 Py_MEMCPY(result_s, to_s, to_len);
1779 result_s += to_len;
1780 start += 1;
1781 } else {
1782 /* copy the unchanged old then the 'to' */
1783 Py_MEMCPY(result_s, start, next-start);
1784 result_s += (next-start);
1785 Py_MEMCPY(result_s, to_s, to_len);
1786 result_s += to_len;
1787 start = next+1;
1788 }
1789 }
1790 /* Copy the remainder of the remaining bytes */
1791 Py_MEMCPY(result_s, start, end-start);
1792
1793 return result;
1794}
1795
1796/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1797Py_LOCAL(PyBytesObject *)
1798replace_substring(PyBytesObject *self,
1799 const char *from_s, Py_ssize_t from_len,
1800 const char *to_s, Py_ssize_t to_len,
1801 Py_ssize_t maxcount)
1802{
1803 char *self_s, *result_s;
1804 char *start, *next, *end;
1805 Py_ssize_t self_len, result_len;
1806 Py_ssize_t count, offset, product;
1807 PyBytesObject *result;
1808
1809 self_s = PyBytes_AS_STRING(self);
1810 self_len = PyBytes_GET_SIZE(self);
1811
1812 count = countstring(self_s, self_len,
1813 from_s, from_len,
1814 0, self_len, FORWARD, maxcount);
1815 if (count == 0) {
1816 /* no matches, return unchanged */
1817 return return_self(self);
1818 }
1819
1820 /* Check for overflow */
1821 /* result_len = self_len + count * (to_len-from_len) */
1822 product = count * (to_len-from_len);
1823 if (product / (to_len-from_len) != count) {
1824 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1825 return NULL;
1826 }
1827 result_len = self_len + product;
1828 if (result_len < 0) {
1829 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1830 return NULL;
1831 }
1832
1833 if ( (result = (PyBytesObject *)
1834 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1835 return NULL;
1836 result_s = PyBytes_AS_STRING(result);
1837
1838 start = self_s;
1839 end = self_s + self_len;
1840 while (count-- > 0) {
1841 offset = findstring(start, end-start,
1842 from_s, from_len,
1843 0, end-start, FORWARD);
1844 if (offset == -1)
1845 break;
1846 next = start+offset;
1847 if (next == start) {
1848 /* replace with the 'to' */
1849 Py_MEMCPY(result_s, to_s, to_len);
1850 result_s += to_len;
1851 start += from_len;
1852 } else {
1853 /* copy the unchanged old then the 'to' */
1854 Py_MEMCPY(result_s, start, next-start);
1855 result_s += (next-start);
1856 Py_MEMCPY(result_s, to_s, to_len);
1857 result_s += to_len;
1858 start = next+from_len;
1859 }
1860 }
1861 /* Copy the remainder of the remaining bytes */
1862 Py_MEMCPY(result_s, start, end-start);
1863
1864 return result;
1865}
1866
1867
1868Py_LOCAL(PyBytesObject *)
1869replace(PyBytesObject *self,
1870 const char *from_s, Py_ssize_t from_len,
1871 const char *to_s, Py_ssize_t to_len,
1872 Py_ssize_t maxcount)
1873{
1874 if (maxcount < 0) {
1875 maxcount = PY_SSIZE_T_MAX;
1876 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
1877 /* nothing to do; return the original bytes */
1878 return return_self(self);
1879 }
1880
1881 if (maxcount == 0 ||
1882 (from_len == 0 && to_len == 0)) {
1883 /* nothing to do; return the original bytes */
1884 return return_self(self);
1885 }
1886
1887 /* Handle zero-length special cases */
1888
1889 if (from_len == 0) {
1890 /* insert the 'to' bytes everywhere. */
1891 /* >>> "Python".replace("", ".") */
1892 /* '.P.y.t.h.o.n.' */
1893 return replace_interleave(self, to_s, to_len, maxcount);
1894 }
1895
1896 /* Except for "".replace("", "A") == "A" there is no way beyond this */
1897 /* point for an empty self bytes to generate a non-empty bytes */
1898 /* Special case so the remaining code always gets a non-empty bytes */
1899 if (PyBytes_GET_SIZE(self) == 0) {
1900 return return_self(self);
1901 }
1902
1903 if (to_len == 0) {
1904 /* delete all occurances of 'from' bytes */
1905 if (from_len == 1) {
1906 return replace_delete_single_character(
1907 self, from_s[0], maxcount);
1908 } else {
1909 return replace_delete_substring(self, from_s, from_len, maxcount);
1910 }
1911 }
1912
1913 /* Handle special case where both bytes have the same length */
1914
1915 if (from_len == to_len) {
1916 if (from_len == 1) {
1917 return replace_single_character_in_place(
1918 self,
1919 from_s[0],
1920 to_s[0],
1921 maxcount);
1922 } else {
1923 return replace_substring_in_place(
1924 self, from_s, from_len, to_s, to_len, maxcount);
1925 }
1926 }
1927
1928 /* Otherwise use the more generic algorithms */
1929 if (from_len == 1) {
1930 return replace_single_character(self, from_s[0],
1931 to_s, to_len, maxcount);
1932 } else {
1933 /* len('from')>=2, len('to')>=1 */
1934 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
1935 }
1936}
1937
1938PyDoc_STRVAR(replace__doc__,
1939"B.replace (old, new[, count]) -> bytes\n\
1940\n\
1941Return a copy of bytes B with all occurrences of subsection\n\
1942old replaced by new. If the optional argument count is\n\
1943given, only the first count occurrences are replaced.");
1944
1945static PyObject *
1946bytes_replace(PyBytesObject *self, PyObject *args)
1947{
1948 Py_ssize_t count = -1;
1949 PyObject *from, *to;
1950 const char *from_s, *to_s;
1951 Py_ssize_t from_len, to_len;
1952
1953 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
1954 return NULL;
1955
1956 if (PyBytes_Check(from)) {
1957 from_s = PyBytes_AS_STRING(from);
1958 from_len = PyBytes_GET_SIZE(from);
1959 }
1960 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
1961 return NULL;
1962
1963 if (PyBytes_Check(to)) {
1964 to_s = PyBytes_AS_STRING(to);
1965 to_len = PyBytes_GET_SIZE(to);
1966 }
1967 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
1968 return NULL;
1969
1970 return (PyObject *)replace((PyBytesObject *) self,
1971 from_s, from_len,
1972 to_s, to_len, count);
1973}
1974
1975
1976/* Overallocate the initial list to reduce the number of reallocs for small
1977 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1978 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1979 text (roughly 11 words per line) and field delimited data (usually 1-10
1980 fields). For large strings the split algorithms are bandwidth limited
1981 so increasing the preallocation likely will not improve things.*/
1982
1983#define MAX_PREALLOC 12
1984
1985/* 5 splits gives 6 elements */
1986#define PREALLOC_SIZE(maxsplit) \
1987 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1988
1989#define SPLIT_APPEND(data, left, right) \
1990 str = PyBytes_FromStringAndSize((data) + (left), \
1991 (right) - (left)); \
1992 if (str == NULL) \
1993 goto onError; \
1994 if (PyList_Append(list, str)) { \
1995 Py_DECREF(str); \
1996 goto onError; \
1997 } \
1998 else \
1999 Py_DECREF(str);
2000
2001#define SPLIT_ADD(data, left, right) { \
2002 str = PyBytes_FromStringAndSize((data) + (left), \
2003 (right) - (left)); \
2004 if (str == NULL) \
2005 goto onError; \
2006 if (count < MAX_PREALLOC) { \
2007 PyList_SET_ITEM(list, count, str); \
2008 } else { \
2009 if (PyList_Append(list, str)) { \
2010 Py_DECREF(str); \
2011 goto onError; \
2012 } \
2013 else \
2014 Py_DECREF(str); \
2015 } \
2016 count++; }
2017
2018/* Always force the list to the expected size. */
2019#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
2020
2021
2022Py_LOCAL_INLINE(PyObject *)
2023split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2024{
2025 register Py_ssize_t i, j, count=0;
2026 PyObject *str;
2027 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2028
2029 if (list == NULL)
2030 return NULL;
2031
2032 i = j = 0;
2033 while ((j < len) && (maxcount-- > 0)) {
2034 for(; j<len; j++) {
2035 /* I found that using memchr makes no difference */
2036 if (s[j] == ch) {
2037 SPLIT_ADD(s, i, j);
2038 i = j = j + 1;
2039 break;
2040 }
2041 }
2042 }
2043 if (i <= len) {
2044 SPLIT_ADD(s, i, len);
2045 }
2046 FIX_PREALLOC_SIZE(list);
2047 return list;
2048
2049 onError:
2050 Py_DECREF(list);
2051 return NULL;
2052}
2053
2054PyDoc_STRVAR(split__doc__,
2055"B.split(sep [,maxsplit]) -> list of bytes\n\
2056\n\
2057Return a list of the bytes in the string B, using sep as the\n\
2058delimiter. If maxsplit is given, at most maxsplit\n\
2059splits are done.");
2060
2061static PyObject *
2062bytes_split(PyBytesObject *self, PyObject *args)
2063{
2064 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2065 Py_ssize_t maxsplit = -1, count=0;
2066 const char *s = PyBytes_AS_STRING(self), *sub;
2067 PyObject *list, *str, *subobj;
2068#ifdef USE_FAST
2069 Py_ssize_t pos;
2070#endif
2071
2072 if (!PyArg_ParseTuple(args, "O|n:split", &subobj, &maxsplit))
2073 return NULL;
2074 if (maxsplit < 0)
2075 maxsplit = PY_SSIZE_T_MAX;
2076 if (PyBytes_Check(subobj)) {
2077 sub = PyBytes_AS_STRING(subobj);
2078 n = PyBytes_GET_SIZE(subobj);
2079 }
2080 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2081 return NULL;
2082
2083 if (n == 0) {
2084 PyErr_SetString(PyExc_ValueError, "empty separator");
2085 return NULL;
2086 }
2087 else if (n == 1)
2088 return split_char(s, len, sub[0], maxsplit);
2089
2090 list = PyList_New(PREALLOC_SIZE(maxsplit));
2091 if (list == NULL)
2092 return NULL;
2093
2094#ifdef USE_FAST
2095 i = j = 0;
2096 while (maxsplit-- > 0) {
2097 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2098 if (pos < 0)
2099 break;
2100 j = i+pos;
2101 SPLIT_ADD(s, i, j);
2102 i = j + n;
2103 }
2104#else
2105 i = j = 0;
2106 while ((j+n <= len) && (maxsplit-- > 0)) {
2107 for (; j+n <= len; j++) {
2108 if (Py_STRING_MATCH(s, j, sub, n)) {
2109 SPLIT_ADD(s, i, j);
2110 i = j = j + n;
2111 break;
2112 }
2113 }
2114 }
2115#endif
2116 SPLIT_ADD(s, i, len);
2117 FIX_PREALLOC_SIZE(list);
2118 return list;
2119
2120 onError:
2121 Py_DECREF(list);
2122 return NULL;
2123}
2124
2125PyDoc_STRVAR(partition__doc__,
2126"B.partition(sep) -> (head, sep, tail)\n\
2127\n\
2128Searches for the separator sep in B, and returns the part before it,\n\
2129the separator itself, and the part after it. If the separator is not\n\
2130found, returns B and two empty bytes.");
2131
2132static PyObject *
2133bytes_partition(PyBytesObject *self, PyObject *sep_obj)
2134{
2135 PyObject *bytesep, *result;
2136
2137 bytesep = PyBytes_FromObject(sep_obj);
2138 if (! bytesep)
2139 return NULL;
2140
2141 result = stringlib_partition(
2142 (PyObject*) self,
2143 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2144 bytesep,
2145 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2146 );
2147
2148 Py_DECREF(bytesep);
2149 return result;
2150}
2151
2152PyDoc_STRVAR(rpartition__doc__,
2153"B.rpartition(sep) -> (tail, sep, head)\n\
2154\n\
2155Searches for the separator sep in B, starting at the end of B, and returns\n\
2156the part before it, the separator itself, and the part after it. If the\n\
2157separator is not found, returns two empty bytes and B.");
2158
2159static PyObject *
2160bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
2161{
2162 PyObject *bytesep, *result;
2163
2164 bytesep = PyBytes_FromObject(sep_obj);
2165 if (! bytesep)
2166 return NULL;
2167
2168 result = stringlib_rpartition(
2169 (PyObject*) self,
2170 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2171 bytesep,
2172 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2173 );
2174
2175 Py_DECREF(bytesep);
2176 return result;
2177}
2178
2179Py_LOCAL_INLINE(PyObject *)
2180rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2181{
2182 register Py_ssize_t i, j, count=0;
2183 PyObject *str;
2184 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2185
2186 if (list == NULL)
2187 return NULL;
2188
2189 i = j = len - 1;
2190 while ((i >= 0) && (maxcount-- > 0)) {
2191 for (; i >= 0; i--) {
2192 if (s[i] == ch) {
2193 SPLIT_ADD(s, i + 1, j + 1);
2194 j = i = i - 1;
2195 break;
2196 }
2197 }
2198 }
2199 if (j >= -1) {
2200 SPLIT_ADD(s, 0, j + 1);
2201 }
2202 FIX_PREALLOC_SIZE(list);
2203 if (PyList_Reverse(list) < 0)
2204 goto onError;
2205
2206 return list;
2207
2208 onError:
2209 Py_DECREF(list);
2210 return NULL;
2211}
2212
2213PyDoc_STRVAR(rsplit__doc__,
2214"B.rsplit(sep [,maxsplit]) -> list of bytes\n\
2215\n\
2216Return a list of the sections in the byte B, using sep as the\n\
2217delimiter, starting at the end of the bytes and working\n\
2218to the front. If maxsplit is given, at most maxsplit splits are\n\
2219done.");
2220
2221static PyObject *
2222bytes_rsplit(PyBytesObject *self, PyObject *args)
2223{
2224 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2225 Py_ssize_t maxsplit = -1, count=0;
2226 const char *s = PyBytes_AS_STRING(self), *sub;
2227 PyObject *list, *str, *subobj;
2228
2229 if (!PyArg_ParseTuple(args, "O|n:rsplit", &subobj, &maxsplit))
2230 return NULL;
2231 if (maxsplit < 0)
2232 maxsplit = PY_SSIZE_T_MAX;
2233 if (PyBytes_Check(subobj)) {
2234 sub = PyBytes_AS_STRING(subobj);
2235 n = PyBytes_GET_SIZE(subobj);
2236 }
2237 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2238 return NULL;
2239
2240 if (n == 0) {
2241 PyErr_SetString(PyExc_ValueError, "empty separator");
2242 return NULL;
2243 }
2244 else if (n == 1)
2245 return rsplit_char(s, len, sub[0], maxsplit);
2246
2247 list = PyList_New(PREALLOC_SIZE(maxsplit));
2248 if (list == NULL)
2249 return NULL;
2250
2251 j = len;
2252 i = j - n;
2253
2254 while ( (i >= 0) && (maxsplit-- > 0) ) {
2255 for (; i>=0; i--) {
2256 if (Py_STRING_MATCH(s, i, sub, n)) {
2257 SPLIT_ADD(s, i + n, j);
2258 j = i;
2259 i -= n;
2260 break;
2261 }
2262 }
2263 }
2264 SPLIT_ADD(s, 0, j);
2265 FIX_PREALLOC_SIZE(list);
2266 if (PyList_Reverse(list) < 0)
2267 goto onError;
2268 return list;
2269
2270onError:
2271 Py_DECREF(list);
2272 return NULL;
2273}
2274
2275PyDoc_STRVAR(extend__doc__,
2276"B.extend(iterable int) -> None\n\
2277\n\
2278Append all the elements from the iterator or sequence to the\n\
2279end of the bytes.");
2280static PyObject *
2281bytes_extend(PyBytesObject *self, PyObject *arg)
2282{
2283 if (bytes_setslice(self, self->ob_size, self->ob_size, arg) == -1)
2284 return NULL;
2285 Py_RETURN_NONE;
2286}
2287
2288
2289PyDoc_STRVAR(reverse__doc__,
2290"B.reverse() -> None\n\
2291\n\
2292Reverse the order of the values in bytes in place.");
2293static PyObject *
2294bytes_reverse(PyBytesObject *self, PyObject *unused)
2295{
2296 char swap, *head, *tail;
2297 Py_ssize_t i, j, n = self->ob_size;
2298
2299 j = n / 2;
2300 head = self->ob_bytes;
2301 tail = head + n - 1;
2302 for (i = 0; i < j; i++) {
2303 swap = *head;
2304 *head++ = *tail;
2305 *tail-- = swap;
2306 }
2307
2308 Py_RETURN_NONE;
2309}
2310
2311PyDoc_STRVAR(insert__doc__,
2312"B.insert(index, int) -> None\n\
2313\n\
2314Insert a single item into the bytes before the given index.");
2315static PyObject *
2316bytes_insert(PyBytesObject *self, PyObject *args)
2317{
2318 int value;
2319 Py_ssize_t where, n = self->ob_size;
2320
2321 if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
2322 return NULL;
2323
2324 if (n == PY_SSIZE_T_MAX) {
2325 PyErr_SetString(PyExc_OverflowError,
2326 "cannot add more objects to bytes");
2327 return NULL;
2328 }
2329 if (value < 0 || value >= 256) {
2330 PyErr_SetString(PyExc_ValueError,
2331 "byte must be in range(0, 256)");
2332 return NULL;
2333 }
2334 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2335 return NULL;
2336
2337 if (where < 0) {
2338 where += n;
2339 if (where < 0)
2340 where = 0;
2341 }
2342 if (where > n)
2343 where = n;
Guido van Rossum4fc8ae42007-02-27 20:57:45 +00002344 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
Neal Norwitz6968b052007-02-27 19:02:19 +00002345 self->ob_bytes[where] = value;
2346
2347 Py_RETURN_NONE;
2348}
2349
2350PyDoc_STRVAR(append__doc__,
2351"B.append(int) -> None\n\
2352\n\
2353Append a single item to the end of the bytes.");
2354static PyObject *
2355bytes_append(PyBytesObject *self, PyObject *arg)
2356{
2357 int value;
2358 Py_ssize_t n = self->ob_size;
2359
2360 if (! _getbytevalue(arg, &value))
2361 return NULL;
2362 if (n == PY_SSIZE_T_MAX) {
2363 PyErr_SetString(PyExc_OverflowError,
2364 "cannot add more objects to bytes");
2365 return NULL;
2366 }
2367 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2368 return NULL;
2369
2370 self->ob_bytes[n] = value;
2371
2372 Py_RETURN_NONE;
2373}
2374
2375PyDoc_STRVAR(pop__doc__,
2376"B.pop([index]) -> int\n\
2377\n\
2378Remove and return a single item from the bytes. If no index\n\
2379argument is give, will pop the last value.");
2380static PyObject *
2381bytes_pop(PyBytesObject *self, PyObject *args)
2382{
2383 int value;
2384 Py_ssize_t where = -1, n = self->ob_size;
2385
2386 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2387 return NULL;
2388
2389 if (n == 0) {
2390 PyErr_SetString(PyExc_OverflowError,
2391 "cannot pop an empty bytes");
2392 return NULL;
2393 }
2394 if (where < 0)
2395 where += self->ob_size;
2396 if (where < 0 || where >= self->ob_size) {
2397 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2398 return NULL;
2399 }
2400
2401 value = self->ob_bytes[where];
2402 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2403 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2404 return NULL;
2405
2406 return PyInt_FromLong(value);
2407}
2408
2409PyDoc_STRVAR(remove__doc__,
2410"B.remove(int) -> None\n\
2411\n\
2412Remove the first occurance of a value in bytes");
2413static PyObject *
2414bytes_remove(PyBytesObject *self, PyObject *arg)
2415{
2416 int value;
2417 Py_ssize_t where, n = self->ob_size;
2418
2419 if (! _getbytevalue(arg, &value))
2420 return NULL;
2421
2422 for (where = 0; where < n; where++) {
2423 if (self->ob_bytes[where] == value)
2424 break;
2425 }
2426 if (where == n) {
2427 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2428 return NULL;
2429 }
2430
2431 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2432 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2433 return NULL;
2434
2435 Py_RETURN_NONE;
2436}
2437
2438
Guido van Rossumd624f182006-04-24 13:47:05 +00002439PyDoc_STRVAR(decode_doc,
2440"B.decode([encoding[,errors]]) -> unicode obect.\n\
2441\n\
2442Decodes B using the codec registered for encoding. encoding defaults\n\
2443to the default encoding. errors may be given to set a different error\n\
2444handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2445a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2446as well as any other name registerd with codecs.register_error that is\n\
2447able to handle UnicodeDecodeErrors.");
2448
2449static PyObject *
2450bytes_decode(PyObject *self, PyObject *args)
2451{
2452 const char *encoding = NULL;
2453 const char *errors = NULL;
2454
2455 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2456 return NULL;
2457 if (encoding == NULL)
2458 encoding = PyUnicode_GetDefaultEncoding();
2459 return PyCodec_Decode(self, encoding, errors);
2460}
2461
Guido van Rossuma0867f72006-05-05 04:34:18 +00002462PyDoc_STRVAR(alloc_doc,
2463"B.__alloc__() -> int\n\
2464\n\
2465Returns the number of bytes actually allocated.");
2466
2467static PyObject *
2468bytes_alloc(PyBytesObject *self)
2469{
2470 return PyInt_FromSsize_t(self->ob_alloc);
2471}
2472
Guido van Rossum20188312006-05-05 15:15:40 +00002473PyDoc_STRVAR(join_doc,
2474"bytes.join(iterable_of_bytes) -> bytes\n\
2475\n\
2476Concatenates any number of bytes objects. Example:\n\
2477bytes.join([bytes('ab'), bytes('pq'), bytes('rs')]) -> bytes('abpqrs').");
2478
2479static PyObject *
2480bytes_join(PyObject *cls, PyObject *it)
2481{
2482 PyObject *seq;
2483 Py_ssize_t i;
2484 Py_ssize_t n;
2485 PyObject **items;
2486 Py_ssize_t totalsize = 0;
2487 PyObject *result;
2488 char *dest;
2489
2490 seq = PySequence_Fast(it, "can only join an iterable");
2491 if (seq == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002492 return NULL;
Guido van Rossum20188312006-05-05 15:15:40 +00002493 n = PySequence_Fast_GET_SIZE(seq);
2494 items = PySequence_Fast_ITEMS(seq);
2495
2496 /* Compute the total size, and check that they are all bytes */
2497 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002498 PyObject *obj = items[i];
2499 if (!PyBytes_Check(obj)) {
2500 PyErr_Format(PyExc_TypeError,
2501 "can only join an iterable of bytes "
2502 "(item %ld has type '%.100s')",
Guido van Rossum3cf5b1e2006-07-27 21:53:35 +00002503 /* XXX %ld isn't right on Win64 */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002504 (long)i, obj->ob_type->tp_name);
2505 goto error;
2506 }
2507 totalsize += PyBytes_GET_SIZE(obj);
2508 if (totalsize < 0) {
2509 PyErr_NoMemory();
2510 goto error;
2511 }
Guido van Rossum20188312006-05-05 15:15:40 +00002512 }
2513
2514 /* Allocate the result, and copy the bytes */
2515 result = PyBytes_FromStringAndSize(NULL, totalsize);
2516 if (result == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002517 goto error;
Guido van Rossum20188312006-05-05 15:15:40 +00002518 dest = PyBytes_AS_STRING(result);
2519 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002520 PyObject *obj = items[i];
2521 Py_ssize_t size = PyBytes_GET_SIZE(obj);
2522 memcpy(dest, PyBytes_AS_STRING(obj), size);
2523 dest += size;
Guido van Rossum20188312006-05-05 15:15:40 +00002524 }
2525
2526 /* Done */
2527 Py_DECREF(seq);
2528 return result;
2529
2530 /* Error handling */
2531 error:
2532 Py_DECREF(seq);
2533 return NULL;
2534}
2535
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002536PyDoc_STRVAR(fromhex_doc,
2537"bytes.fromhex(string) -> bytes\n\
2538\n\
2539Create a bytes object from a string of hexadecimal numbers.\n\
2540Spaces between two numbers are accepted. Example:\n\
2541bytes.fromhex('10 2030') -> bytes([0x10, 0x20, 0x30]).");
2542
2543static int
2544hex_digit_to_int(int c)
2545{
Georg Brandlb3f568f2007-02-27 08:49:18 +00002546 if (isdigit(c))
2547 return c - '0';
2548 else {
2549 if (isupper(c))
2550 c = tolower(c);
2551 if (c >= 'a' && c <= 'f')
2552 return c - 'a' + 10;
2553 }
2554 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002555}
2556
2557static PyObject *
2558bytes_fromhex(PyObject *cls, PyObject *args)
2559{
2560 PyObject *newbytes;
2561 char *hex, *buf;
2562 Py_ssize_t len, byteslen, i, j;
2563 int top, bot;
2564
2565 if (!PyArg_ParseTuple(args, "s#:fromhex", &hex, &len))
2566 return NULL;
2567
2568 byteslen = len / 2; /* max length if there are no spaces */
2569
2570 newbytes = PyBytes_FromStringAndSize(NULL, byteslen);
2571 if (!newbytes)
2572 return NULL;
2573 buf = PyBytes_AS_STRING(newbytes);
2574
2575 for (i = j = 0; ; i += 2) {
2576 /* skip over spaces in the input */
2577 while (Py_CHARMASK(hex[i]) == ' ')
2578 i++;
2579 if (i >= len)
2580 break;
2581 top = hex_digit_to_int(Py_CHARMASK(hex[i]));
2582 bot = hex_digit_to_int(Py_CHARMASK(hex[i+1]));
2583 if (top == -1 || bot == -1) {
2584 PyErr_Format(PyExc_ValueError,
2585 "non-hexadecimal number string '%c%c' found in "
2586 "fromhex() arg at position %zd",
2587 hex[i], hex[i+1], i);
2588 goto error;
2589 }
2590 buf[j++] = (top << 4) + bot;
2591 }
2592 if (PyBytes_Resize(newbytes, j) < 0)
2593 goto error;
2594 return newbytes;
2595
2596 error:
2597 Py_DECREF(newbytes);
2598 return NULL;
2599}
2600
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002601PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2602
2603static PyObject *
2604bytes_reduce(PyBytesObject *self)
2605{
2606 return Py_BuildValue("(O(s#))",
2607 self->ob_type,
2608 self->ob_bytes == NULL ? "" : self->ob_bytes,
2609 self->ob_size);
2610}
2611
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002612static PySequenceMethods bytes_as_sequence = {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002613 (lenfunc)bytes_length, /* sq_length */
2614 (binaryfunc)bytes_concat, /* sq_concat */
2615 (ssizeargfunc)bytes_repeat, /* sq_repeat */
2616 (ssizeargfunc)bytes_getitem, /* sq_item */
2617 0, /* sq_slice */
2618 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
2619 0, /* sq_ass_slice */
Guido van Rossumd624f182006-04-24 13:47:05 +00002620 (objobjproc)bytes_contains, /* sq_contains */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002621 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
2622 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002623};
2624
2625static PyMappingMethods bytes_as_mapping = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002626 (lenfunc)bytes_length,
Thomas Wouters376446d2006-12-19 08:30:14 +00002627 (binaryfunc)bytes_subscript,
2628 (objobjargproc)bytes_ass_subscript,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002629};
2630
2631static PyBufferProcs bytes_as_buffer = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002632 (readbufferproc)bytes_getbuffer,
2633 (writebufferproc)bytes_getbuffer,
2634 (segcountproc)bytes_getsegcount,
2635 /* XXX Bytes are not characters! But we need to implement
2636 bf_getcharbuffer() so we can be used as 't#' argument to codecs. */
2637 (charbufferproc)bytes_getbuffer,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002638};
2639
2640static PyMethodDef
2641bytes_methods[] = {
Neal Norwitz6968b052007-02-27 19:02:19 +00002642 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2643 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2644 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2645 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2646 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2647 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
2648 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2649 startswith__doc__},
2650 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2651 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, translate__doc__},
2652 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2653 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
2654 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2655 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2656 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
2657 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
2658 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
2659 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
2660 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
2661 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
Guido van Rossumd624f182006-04-24 13:47:05 +00002662 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002663 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002664 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2665 fromhex_doc},
Guido van Rossum20188312006-05-05 15:15:40 +00002666 {"join", (PyCFunction)bytes_join, METH_O|METH_CLASS, join_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002667 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002668 {NULL}
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002669};
2670
2671PyDoc_STRVAR(bytes_doc,
2672"bytes([iterable]) -> new array of bytes.\n\
2673\n\
2674If an argument is given it must be an iterable yielding ints in range(256).");
2675
2676PyTypeObject PyBytes_Type = {
2677 PyObject_HEAD_INIT(&PyType_Type)
2678 0,
2679 "bytes",
2680 sizeof(PyBytesObject),
2681 0,
Guido van Rossumd624f182006-04-24 13:47:05 +00002682 (destructor)bytes_dealloc, /* tp_dealloc */
2683 0, /* tp_print */
2684 0, /* tp_getattr */
2685 0, /* tp_setattr */
2686 0, /* tp_compare */
2687 (reprfunc)bytes_repr, /* tp_repr */
2688 0, /* tp_as_number */
2689 &bytes_as_sequence, /* tp_as_sequence */
2690 &bytes_as_mapping, /* tp_as_mapping */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002691 0, /* tp_hash */
Guido van Rossumd624f182006-04-24 13:47:05 +00002692 0, /* tp_call */
2693 (reprfunc)bytes_str, /* tp_str */
2694 PyObject_GenericGetAttr, /* tp_getattro */
2695 0, /* tp_setattro */
2696 &bytes_as_buffer, /* tp_as_buffer */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002697 /* bytes is 'final' or 'sealed' */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002698 Py_TPFLAGS_DEFAULT, /* tp_flags */
Guido van Rossumd624f182006-04-24 13:47:05 +00002699 bytes_doc, /* tp_doc */
2700 0, /* tp_traverse */
2701 0, /* tp_clear */
2702 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2703 0, /* tp_weaklistoffset */
2704 0, /* tp_iter */
2705 0, /* tp_iternext */
2706 bytes_methods, /* tp_methods */
2707 0, /* tp_members */
2708 0, /* tp_getset */
2709 0, /* tp_base */
2710 0, /* tp_dict */
2711 0, /* tp_descr_get */
2712 0, /* tp_descr_set */
2713 0, /* tp_dictoffset */
2714 (initproc)bytes_init, /* tp_init */
2715 PyType_GenericAlloc, /* tp_alloc */
2716 PyType_GenericNew, /* tp_new */
2717 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002718};