blob: f7de8bdd867990aca01c92501d96a9e2bfedd959 [file] [log] [blame]
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001/* Bytes object implementation */
2
3/* XXX TO DO: optimizations */
4
5#define PY_SSIZE_T_CLEAN
6#include "Python.h"
Guido van Rossuma0867f72006-05-05 04:34:18 +00007#include "structmember.h"
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00008
Neal Norwitz6968b052007-02-27 19:02:19 +00009/* The nullbytes are used by the stringlib during partition.
10 * If partition is removed from bytes, nullbytes and its helper
11 * Init/Fini should also be removed.
12 */
13static PyBytesObject *nullbytes = NULL;
14
15void
16PyBytes_Fini(void)
17{
18 Py_CLEAR(nullbytes);
19}
20
21int
22PyBytes_Init(void)
23{
24 nullbytes = PyObject_New(PyBytesObject, &PyBytes_Type);
25 if (nullbytes == NULL)
26 return 0;
27 nullbytes->ob_bytes = NULL;
28 nullbytes->ob_size = nullbytes->ob_alloc = 0;
29 return 1;
30}
31
32/* end nullbytes support */
33
34static int _getbytevalue(PyObject* arg, int *value)
35{
36 PyObject *intarg = PyNumber_Int(arg);
37 if (! intarg)
38 return 0;
39 *value = PyInt_AsLong(intarg);
40 Py_DECREF(intarg);
41 if (*value < 0 || *value >= 256) {
42 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
43 return 0;
44 }
45 return 1;
46}
47
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000048/* Direct API functions */
49
50PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +000051PyBytes_FromObject(PyObject *input)
52{
53 return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
54 input, NULL);
55}
56
57PyObject *
58PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000059{
60 PyBytesObject *new;
61
Guido van Rossumd624f182006-04-24 13:47:05 +000062 assert(size >= 0);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000063
64 new = PyObject_New(PyBytesObject, &PyBytes_Type);
65 if (new == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +000066 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000067
Guido van Rossumd624f182006-04-24 13:47:05 +000068 if (size == 0)
69 new->ob_bytes = NULL;
70 else {
71 new->ob_bytes = PyMem_Malloc(size);
72 if (new->ob_bytes == NULL) {
73 Py_DECREF(new);
74 return NULL;
75 }
76 if (bytes != NULL)
77 memcpy(new->ob_bytes, bytes, size);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000078 }
Guido van Rossuma0867f72006-05-05 04:34:18 +000079 new->ob_size = new->ob_alloc = size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000080
81 return (PyObject *)new;
82}
83
84Py_ssize_t
85PyBytes_Size(PyObject *self)
86{
87 assert(self != NULL);
88 assert(PyBytes_Check(self));
89
Guido van Rossum20188312006-05-05 15:15:40 +000090 return PyBytes_GET_SIZE(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000091}
92
93char *
94PyBytes_AsString(PyObject *self)
95{
96 assert(self != NULL);
97 assert(PyBytes_Check(self));
98
Guido van Rossum20188312006-05-05 15:15:40 +000099 return PyBytes_AS_STRING(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000100}
101
102int
103PyBytes_Resize(PyObject *self, Py_ssize_t size)
104{
105 void *sval;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000106 Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000107
108 assert(self != NULL);
109 assert(PyBytes_Check(self));
110 assert(size >= 0);
111
Guido van Rossuma0867f72006-05-05 04:34:18 +0000112 if (size < alloc / 2) {
113 /* Major downsize; resize down to exact size */
114 alloc = size;
115 }
116 else if (size <= alloc) {
117 /* Within allocated size; quick exit */
118 ((PyBytesObject *)self)->ob_size = size;
119 return 0;
120 }
121 else if (size <= alloc * 1.125) {
122 /* Moderate upsize; overallocate similar to list_resize() */
123 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
124 }
125 else {
126 /* Major upsize; resize up to exact size */
127 alloc = size;
128 }
129
130 sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000131 if (sval == NULL) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000132 PyErr_NoMemory();
133 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000134 }
135
Guido van Rossumd624f182006-04-24 13:47:05 +0000136 ((PyBytesObject *)self)->ob_bytes = sval;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000137 ((PyBytesObject *)self)->ob_size = size;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000138 ((PyBytesObject *)self)->ob_alloc = alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000139
140 return 0;
141}
142
143/* Functions stuffed into the type object */
144
145static Py_ssize_t
146bytes_length(PyBytesObject *self)
147{
148 return self->ob_size;
149}
150
151static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000152bytes_concat(PyBytesObject *self, PyObject *other)
153{
154 PyBytesObject *result;
155 Py_ssize_t mysize;
156 Py_ssize_t size;
157
158 if (!PyBytes_Check(other)) {
159 PyErr_Format(PyExc_TypeError,
160 "can't concat bytes to %.100s", other->ob_type->tp_name);
161 return NULL;
162 }
163
164 mysize = self->ob_size;
165 size = mysize + ((PyBytesObject *)other)->ob_size;
166 if (size < 0)
167 return PyErr_NoMemory();
168 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
169 if (result != NULL) {
170 memcpy(result->ob_bytes, self->ob_bytes, self->ob_size);
171 memcpy(result->ob_bytes + self->ob_size,
172 ((PyBytesObject *)other)->ob_bytes,
173 ((PyBytesObject *)other)->ob_size);
174 }
175 return (PyObject *)result;
176}
177
178static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000179bytes_iconcat(PyBytesObject *self, PyObject *other)
180{
181 Py_ssize_t mysize;
182 Py_ssize_t osize;
183 Py_ssize_t size;
184
185 if (!PyBytes_Check(other)) {
186 PyErr_Format(PyExc_TypeError,
187 "can't concat bytes to %.100s", other->ob_type->tp_name);
188 return NULL;
189 }
190
191 mysize = self->ob_size;
192 osize = ((PyBytesObject *)other)->ob_size;
193 size = mysize + osize;
194 if (size < 0)
195 return PyErr_NoMemory();
Guido van Rossuma0867f72006-05-05 04:34:18 +0000196 if (size <= self->ob_alloc)
197 self->ob_size = size;
198 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000199 return NULL;
200 memcpy(self->ob_bytes + mysize, ((PyBytesObject *)other)->ob_bytes, osize);
201 Py_INCREF(self);
202 return (PyObject *)self;
203}
204
205static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000206bytes_repeat(PyBytesObject *self, Py_ssize_t count)
207{
208 PyBytesObject *result;
209 Py_ssize_t mysize;
210 Py_ssize_t size;
211
212 if (count < 0)
213 count = 0;
214 mysize = self->ob_size;
215 size = mysize * count;
216 if (count != 0 && size / count != mysize)
217 return PyErr_NoMemory();
218 result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
219 if (result != NULL && size != 0) {
220 if (mysize == 1)
221 memset(result->ob_bytes, self->ob_bytes[0], size);
222 else {
Guido van Rossum13e57212006-04-27 22:54:26 +0000223 Py_ssize_t i;
Guido van Rossumd624f182006-04-24 13:47:05 +0000224 for (i = 0; i < count; i++)
225 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
226 }
227 }
228 return (PyObject *)result;
229}
230
231static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000232bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
233{
234 Py_ssize_t mysize;
235 Py_ssize_t size;
236
237 if (count < 0)
238 count = 0;
239 mysize = self->ob_size;
240 size = mysize * count;
241 if (count != 0 && size / count != mysize)
242 return PyErr_NoMemory();
Guido van Rossuma0867f72006-05-05 04:34:18 +0000243 if (size <= self->ob_alloc)
244 self->ob_size = size;
245 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000246 return NULL;
247
248 if (mysize == 1)
249 memset(self->ob_bytes, self->ob_bytes[0], size);
250 else {
251 Py_ssize_t i;
252 for (i = 1; i < count; i++)
253 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
254 }
255
256 Py_INCREF(self);
257 return (PyObject *)self;
258}
259
260static int
261bytes_substring(PyBytesObject *self, PyBytesObject *other)
262{
263 Py_ssize_t i;
264
265 if (other->ob_size == 1) {
266 return memchr(self->ob_bytes, other->ob_bytes[0],
267 self->ob_size) != NULL;
268 }
269 if (other->ob_size == 0)
270 return 1; /* Edge case */
271 for (i = 0; i + other->ob_size <= self->ob_size; i++) {
272 /* XXX Yeah, yeah, lots of optimizations possible... */
273 if (memcmp(self->ob_bytes + i, other->ob_bytes, other->ob_size) == 0)
274 return 1;
275 }
276 return 0;
277}
278
279static int
280bytes_contains(PyBytesObject *self, PyObject *value)
281{
282 Py_ssize_t ival;
283
284 if (PyBytes_Check(value))
285 return bytes_substring(self, (PyBytesObject *)value);
286
Thomas Woutersd204a712006-08-22 13:41:17 +0000287 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossum13e57212006-04-27 22:54:26 +0000288 if (ival == -1 && PyErr_Occurred())
289 return -1;
Guido van Rossum13e57212006-04-27 22:54:26 +0000290 if (ival < 0 || ival >= 256) {
291 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
292 return -1;
293 }
294
295 return memchr(self->ob_bytes, ival, self->ob_size) != NULL;
296}
297
298static PyObject *
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000299bytes_getitem(PyBytesObject *self, Py_ssize_t i)
300{
301 if (i < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000302 i += self->ob_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000303 if (i < 0 || i >= self->ob_size) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000304 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
305 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000306 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000307 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
308}
309
310static PyObject *
Thomas Wouters376446d2006-12-19 08:30:14 +0000311bytes_subscript(PyBytesObject *self, PyObject *item)
Guido van Rossumd624f182006-04-24 13:47:05 +0000312{
Thomas Wouters376446d2006-12-19 08:30:14 +0000313 if (PyIndex_Check(item)) {
314 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000315
Thomas Wouters376446d2006-12-19 08:30:14 +0000316 if (i == -1 && PyErr_Occurred())
317 return NULL;
318
319 if (i < 0)
320 i += PyBytes_GET_SIZE(self);
321
322 if (i < 0 || i >= self->ob_size) {
323 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
324 return NULL;
325 }
326 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
327 }
328 else if (PySlice_Check(item)) {
329 Py_ssize_t start, stop, step, slicelength, cur, i;
330 if (PySlice_GetIndicesEx((PySliceObject *)item,
331 PyBytes_GET_SIZE(self),
332 &start, &stop, &step, &slicelength) < 0) {
333 return NULL;
334 }
335
336 if (slicelength <= 0)
337 return PyBytes_FromStringAndSize("", 0);
338 else if (step == 1) {
339 return PyBytes_FromStringAndSize(self->ob_bytes + start,
340 slicelength);
341 }
342 else {
343 char *source_buf = PyBytes_AS_STRING(self);
344 char *result_buf = (char *)PyMem_Malloc(slicelength);
345 PyObject *result;
346
347 if (result_buf == NULL)
348 return PyErr_NoMemory();
349
350 for (cur = start, i = 0; i < slicelength;
351 cur += step, i++) {
352 result_buf[i] = source_buf[cur];
353 }
354 result = PyBytes_FromStringAndSize(result_buf, slicelength);
355 PyMem_Free(result_buf);
356 return result;
357 }
358 }
359 else {
360 PyErr_SetString(PyExc_TypeError, "bytes indices must be integers");
361 return NULL;
362 }
363}
364
Guido van Rossumd624f182006-04-24 13:47:05 +0000365static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000366bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
Guido van Rossumd624f182006-04-24 13:47:05 +0000367 PyObject *values)
368{
369 int avail;
370 int needed;
371 char *bytes;
372
373 if (values == NULL) {
374 bytes = NULL;
375 needed = 0;
376 }
377 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
378 /* Make a copy an call this function recursively */
379 int err;
380 values = PyBytes_FromObject(values);
381 if (values == NULL)
382 return -1;
383 err = bytes_setslice(self, lo, hi, values);
384 Py_DECREF(values);
385 return err;
386 }
387 else {
388 assert(PyBytes_Check(values));
389 bytes = ((PyBytesObject *)values)->ob_bytes;
390 needed = ((PyBytesObject *)values)->ob_size;
391 }
392
393 if (lo < 0)
394 lo = 0;
Thomas Wouters9a6e62b2006-08-23 23:20:29 +0000395 if (hi < lo)
396 hi = lo;
Guido van Rossumd624f182006-04-24 13:47:05 +0000397 if (hi > self->ob_size)
398 hi = self->ob_size;
399
400 avail = hi - lo;
401 if (avail < 0)
402 lo = hi = avail = 0;
403
404 if (avail != needed) {
405 if (avail > needed) {
406 /*
407 0 lo hi old_size
408 | |<----avail----->|<-----tomove------>|
409 | |<-needed->|<-----tomove------>|
410 0 lo new_hi new_size
411 */
412 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
413 self->ob_size - hi);
414 }
Thomas Wouters376446d2006-12-19 08:30:14 +0000415 if (PyBytes_Resize((PyObject *)self,
Guido van Rossumd624f182006-04-24 13:47:05 +0000416 self->ob_size + needed - avail) < 0)
417 return -1;
418 if (avail < needed) {
419 /*
420 0 lo hi old_size
421 | |<-avail->|<-----tomove------>|
422 | |<----needed---->|<-----tomove------>|
423 0 lo new_hi new_size
424 */
425 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
426 self->ob_size - lo - needed);
427 }
428 }
429
430 if (needed > 0)
431 memcpy(self->ob_bytes + lo, bytes, needed);
432
433 return 0;
434}
435
436static int
437bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
438{
439 Py_ssize_t ival;
440
441 if (i < 0)
442 i += self->ob_size;
443
444 if (i < 0 || i >= self->ob_size) {
445 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
446 return -1;
447 }
448
449 if (value == NULL)
450 return bytes_setslice(self, i, i+1, NULL);
451
Thomas Woutersd204a712006-08-22 13:41:17 +0000452 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000453 if (ival == -1 && PyErr_Occurred())
454 return -1;
455
456 if (ival < 0 || ival >= 256) {
457 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
458 return -1;
459 }
460
461 self->ob_bytes[i] = ival;
462 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000463}
464
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000465static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000466bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
467{
468 Py_ssize_t start, stop, step, slicelen, needed;
469 char *bytes;
470
471 if (PyIndex_Check(item)) {
472 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
473
474 if (i == -1 && PyErr_Occurred())
475 return -1;
476
477 if (i < 0)
478 i += PyBytes_GET_SIZE(self);
479
480 if (i < 0 || i >= self->ob_size) {
481 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
482 return -1;
483 }
484
485 if (values == NULL) {
486 /* Fall through to slice assignment */
487 start = i;
488 stop = i + 1;
489 step = 1;
490 slicelen = 1;
491 }
492 else {
493 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
494 if (ival == -1 && PyErr_Occurred())
495 return -1;
496 if (ival < 0 || ival >= 256) {
497 PyErr_SetString(PyExc_ValueError,
498 "byte must be in range(0, 256)");
499 return -1;
500 }
501 self->ob_bytes[i] = (char)ival;
502 return 0;
503 }
504 }
505 else if (PySlice_Check(item)) {
506 if (PySlice_GetIndicesEx((PySliceObject *)item,
507 PyBytes_GET_SIZE(self),
508 &start, &stop, &step, &slicelen) < 0) {
509 return -1;
510 }
511 }
512 else {
513 PyErr_SetString(PyExc_TypeError, "bytes indices must be integer");
514 return -1;
515 }
516
517 if (values == NULL) {
518 bytes = NULL;
519 needed = 0;
520 }
521 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
522 /* Make a copy an call this function recursively */
523 int err;
524 values = PyBytes_FromObject(values);
525 if (values == NULL)
526 return -1;
527 err = bytes_ass_subscript(self, item, values);
528 Py_DECREF(values);
529 return err;
530 }
531 else {
532 assert(PyBytes_Check(values));
533 bytes = ((PyBytesObject *)values)->ob_bytes;
534 needed = ((PyBytesObject *)values)->ob_size;
535 }
536 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
537 if ((step < 0 && start < stop) ||
538 (step > 0 && start > stop))
539 stop = start;
540 if (step == 1) {
541 if (slicelen != needed) {
542 if (slicelen > needed) {
543 /*
544 0 start stop old_size
545 | |<---slicelen--->|<-----tomove------>|
546 | |<-needed->|<-----tomove------>|
547 0 lo new_hi new_size
548 */
549 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
550 self->ob_size - stop);
551 }
552 if (PyBytes_Resize((PyObject *)self,
553 self->ob_size + needed - slicelen) < 0)
554 return -1;
555 if (slicelen < needed) {
556 /*
557 0 lo hi old_size
558 | |<-avail->|<-----tomove------>|
559 | |<----needed---->|<-----tomove------>|
560 0 lo new_hi new_size
561 */
562 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
563 self->ob_size - start - needed);
564 }
565 }
566
567 if (needed > 0)
568 memcpy(self->ob_bytes + start, bytes, needed);
569
570 return 0;
571 }
572 else {
573 if (needed == 0) {
574 /* Delete slice */
575 Py_ssize_t cur, i;
576
577 if (step < 0) {
578 stop = start + 1;
579 start = stop + step * (slicelen - 1) - 1;
580 step = -step;
581 }
582 for (cur = start, i = 0;
583 i < slicelen; cur += step, i++) {
584 Py_ssize_t lim = step - 1;
585
586 if (cur + step >= PyBytes_GET_SIZE(self))
587 lim = PyBytes_GET_SIZE(self) - cur - 1;
588
589 memmove(self->ob_bytes + cur - i,
590 self->ob_bytes + cur + 1, lim);
591 }
592 /* Move the tail of the bytes, in one chunk */
593 cur = start + slicelen*step;
594 if (cur < PyBytes_GET_SIZE(self)) {
595 memmove(self->ob_bytes + cur - slicelen,
596 self->ob_bytes + cur,
597 PyBytes_GET_SIZE(self) - cur);
598 }
599 if (PyBytes_Resize((PyObject *)self,
600 PyBytes_GET_SIZE(self) - slicelen) < 0)
601 return -1;
602
603 return 0;
604 }
605 else {
606 /* Assign slice */
607 Py_ssize_t cur, i;
608
609 if (needed != slicelen) {
610 PyErr_Format(PyExc_ValueError,
611 "attempt to assign bytes of size %zd "
612 "to extended slice of size %zd",
613 needed, slicelen);
614 return -1;
615 }
616 for (cur = start, i = 0; i < slicelen; cur += step, i++)
617 self->ob_bytes[cur] = bytes[i];
618 return 0;
619 }
620 }
621}
622
623static int
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000624bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
625{
Guido van Rossumd624f182006-04-24 13:47:05 +0000626 static char *kwlist[] = {"source", "encoding", "errors", 0};
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000627 PyObject *arg = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +0000628 const char *encoding = NULL;
629 const char *errors = NULL;
630 Py_ssize_t count;
631 PyObject *it;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000632 PyObject *(*iternext)(PyObject *);
633
Guido van Rossuma0867f72006-05-05 04:34:18 +0000634 if (self->ob_size != 0) {
635 /* Empty previous contents (yes, do this first of all!) */
636 if (PyBytes_Resize((PyObject *)self, 0) < 0)
637 return -1;
638 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000639
Guido van Rossumd624f182006-04-24 13:47:05 +0000640 /* Parse arguments */
641 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
642 &arg, &encoding, &errors))
643 return -1;
644
645 /* Make a quick exit if no first argument */
646 if (arg == NULL) {
647 if (encoding != NULL || errors != NULL) {
648 PyErr_SetString(PyExc_TypeError,
649 "encoding or errors without sequence argument");
650 return -1;
651 }
652 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000653 }
654
Guido van Rossumd624f182006-04-24 13:47:05 +0000655 if (PyUnicode_Check(arg)) {
656 /* Encode via the codec registry */
657 PyObject *encoded;
658 char *bytes;
659 Py_ssize_t size;
660 if (encoding == NULL)
661 encoding = PyUnicode_GetDefaultEncoding();
662 encoded = PyCodec_Encode(arg, encoding, errors);
663 if (encoded == NULL)
664 return -1;
665 if (!PyString_Check(encoded)) {
666 PyErr_Format(PyExc_TypeError,
667 "encoder did not return a string object (type=%.400s)",
668 encoded->ob_type->tp_name);
669 Py_DECREF(encoded);
670 return -1;
671 }
672 bytes = PyString_AS_STRING(encoded);
673 size = PyString_GET_SIZE(encoded);
Guido van Rossuma0867f72006-05-05 04:34:18 +0000674 if (size <= self->ob_alloc)
675 self->ob_size = size;
676 else if (PyBytes_Resize((PyObject *)self, size) < 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000677 Py_DECREF(encoded);
678 return -1;
679 }
680 memcpy(self->ob_bytes, bytes, size);
681 Py_DECREF(encoded);
682 return 0;
683 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000684
Guido van Rossumd624f182006-04-24 13:47:05 +0000685 /* If it's not unicode, there can't be encoding or errors */
686 if (encoding != NULL || errors != NULL) {
687 PyErr_SetString(PyExc_TypeError,
688 "encoding or errors without a string argument");
689 return -1;
690 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000691
Guido van Rossumd624f182006-04-24 13:47:05 +0000692 /* Is it an int? */
Thomas Woutersd204a712006-08-22 13:41:17 +0000693 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000694 if (count == -1 && PyErr_Occurred())
695 PyErr_Clear();
696 else {
697 if (count < 0) {
698 PyErr_SetString(PyExc_ValueError, "negative count");
699 return -1;
700 }
701 if (count > 0) {
702 if (PyBytes_Resize((PyObject *)self, count))
703 return -1;
704 memset(self->ob_bytes, 0, count);
705 }
706 return 0;
707 }
708
709 if (PyObject_CheckReadBuffer(arg)) {
710 const void *bytes;
711 Py_ssize_t size;
712 if (PyObject_AsReadBuffer(arg, &bytes, &size) < 0)
713 return -1;
714 if (PyBytes_Resize((PyObject *)self, size) < 0)
715 return -1;
716 memcpy(self->ob_bytes, bytes, size);
717 return 0;
718 }
719
720 /* XXX Optimize this if the arguments is a list, tuple */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000721
722 /* Get the iterator */
723 it = PyObject_GetIter(arg);
724 if (it == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000725 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000726 iternext = *it->ob_type->tp_iternext;
727
728 /* Run the iterator to exhaustion */
729 for (;;) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000730 PyObject *item;
731 Py_ssize_t value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000732
Guido van Rossumd624f182006-04-24 13:47:05 +0000733 /* Get the next item */
734 item = iternext(it);
735 if (item == NULL) {
736 if (PyErr_Occurred()) {
737 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
738 goto error;
739 PyErr_Clear();
740 }
741 break;
742 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000743
Guido van Rossumd624f182006-04-24 13:47:05 +0000744 /* Interpret it as an int (__index__) */
Thomas Woutersd204a712006-08-22 13:41:17 +0000745 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000746 Py_DECREF(item);
747 if (value == -1 && PyErr_Occurred())
748 goto error;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000749
Guido van Rossumd624f182006-04-24 13:47:05 +0000750 /* Range check */
751 if (value < 0 || value >= 256) {
752 PyErr_SetString(PyExc_ValueError,
753 "bytes must be in range(0, 256)");
754 goto error;
755 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000756
Guido van Rossumd624f182006-04-24 13:47:05 +0000757 /* Append the byte */
Guido van Rossuma0867f72006-05-05 04:34:18 +0000758 if (self->ob_size < self->ob_alloc)
759 self->ob_size++;
760 else if (PyBytes_Resize((PyObject *)self, self->ob_size+1) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000761 goto error;
762 self->ob_bytes[self->ob_size-1] = value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000763 }
764
765 /* Clean up and return success */
766 Py_DECREF(it);
767 return 0;
768
769 error:
770 /* Error handling when it != NULL */
771 Py_DECREF(it);
772 return -1;
773}
774
Georg Brandlee91be42007-02-24 19:41:35 +0000775/* Mostly copied from string_repr, but without the
776 "smart quote" functionality. */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000777static PyObject *
778bytes_repr(PyBytesObject *self)
779{
Georg Brandlee91be42007-02-24 19:41:35 +0000780 size_t newsize = 3 + 4 * self->ob_size;
781 PyObject *v;
782 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != self->ob_size) {
783 PyErr_SetString(PyExc_OverflowError,
784 "bytes object is too large to make repr");
Guido van Rossumd624f182006-04-24 13:47:05 +0000785 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000786 }
Georg Brandlee91be42007-02-24 19:41:35 +0000787 v = PyString_FromStringAndSize((char *)NULL, newsize);
788 if (v == NULL) {
789 return NULL;
790 }
791 else {
792 register Py_ssize_t i;
793 register char c;
794 register char *p;
795 int quote = '\'';
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000796
Georg Brandlee91be42007-02-24 19:41:35 +0000797 p = PyString_AS_STRING(v);
798 *p++ = 'b';
799 *p++ = quote;
800 for (i = 0; i < self->ob_size; i++) {
801 /* There's at least enough room for a hex escape
802 and a closing quote. */
803 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
804 c = self->ob_bytes[i];
805 if (c == quote || c == '\\')
806 *p++ = '\\', *p++ = c;
807 else if (c == '\t')
808 *p++ = '\\', *p++ = 't';
809 else if (c == '\n')
810 *p++ = '\\', *p++ = 'n';
811 else if (c == '\r')
812 *p++ = '\\', *p++ = 'r';
813 else if (c == 0)
814 *p++ = '\\', *p++ = '0';
815 else if (c < ' ' || c >= 0x7f) {
816 /* For performance, we don't want to call
817 PyOS_snprintf here (extra layers of
818 function call). */
819 sprintf(p, "\\x%02x", c & 0xff);
820 p += 4;
821 }
822 else
823 *p++ = c;
824 }
825 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
826 *p++ = quote;
827 *p = '\0';
828 _PyString_Resize(
829 &v, (p - PyString_AS_STRING(v)));
830 return v;
831 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000832}
833
834static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000835bytes_str(PyBytesObject *self)
836{
837 return PyString_FromStringAndSize(self->ob_bytes, self->ob_size);
838}
839
840static PyObject *
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000841bytes_richcompare(PyBytesObject *self, PyBytesObject *other, int op)
842{
843 PyObject *res;
844 int minsize;
845 int cmp;
846
847 if (!PyBytes_Check(self) || !PyBytes_Check(other)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000848 Py_INCREF(Py_NotImplemented);
849 return Py_NotImplemented;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000850 }
851
852 if (self->ob_size != other->ob_size && (op == Py_EQ || op == Py_NE)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000853 /* Shortcut: if the lengths differ, the objects differ */
854 cmp = (op == Py_NE);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000855 }
856 else {
Guido van Rossumd624f182006-04-24 13:47:05 +0000857 minsize = self->ob_size;
858 if (other->ob_size < minsize)
859 minsize = other->ob_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000860
Guido van Rossumd624f182006-04-24 13:47:05 +0000861 cmp = memcmp(self->ob_bytes, other->ob_bytes, minsize);
862 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000863
Guido van Rossumd624f182006-04-24 13:47:05 +0000864 if (cmp == 0) {
865 if (self->ob_size < other->ob_size)
866 cmp = -1;
867 else if (self->ob_size > other->ob_size)
868 cmp = 1;
869 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000870
Guido van Rossumd624f182006-04-24 13:47:05 +0000871 switch (op) {
872 case Py_LT: cmp = cmp < 0; break;
873 case Py_LE: cmp = cmp <= 0; break;
874 case Py_EQ: cmp = cmp == 0; break;
875 case Py_NE: cmp = cmp != 0; break;
876 case Py_GT: cmp = cmp > 0; break;
877 case Py_GE: cmp = cmp >= 0; break;
878 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000879 }
880
881 res = cmp ? Py_True : Py_False;
882 Py_INCREF(res);
883 return res;
884}
885
886static void
887bytes_dealloc(PyBytesObject *self)
888{
Guido van Rossumd624f182006-04-24 13:47:05 +0000889 if (self->ob_bytes != 0) {
890 PyMem_Free(self->ob_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000891 }
892 self->ob_type->tp_free((PyObject *)self);
893}
894
Guido van Rossumd624f182006-04-24 13:47:05 +0000895static Py_ssize_t
896bytes_getbuffer(PyBytesObject *self, Py_ssize_t index, const void **ptr)
897{
898 if (index != 0) {
899 PyErr_SetString(PyExc_SystemError,
Neal Norwitz6968b052007-02-27 19:02:19 +0000900 "accessing non-existent bytes segment");
Guido van Rossumd624f182006-04-24 13:47:05 +0000901 return -1;
902 }
903 *ptr = (void *)self->ob_bytes;
904 return self->ob_size;
905}
906
907static Py_ssize_t
908bytes_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
909{
910 if (lenp)
911 *lenp = self->ob_size;
912 return 1;
913}
914
Neal Norwitz6968b052007-02-27 19:02:19 +0000915
916
917/* -------------------------------------------------------------------- */
918/* Methods */
919
920#define STRINGLIB_CHAR char
921#define STRINGLIB_CMP memcmp
922#define STRINGLIB_LEN PyBytes_GET_SIZE
923#define STRINGLIB_NEW PyBytes_FromStringAndSize
924#define STRINGLIB_EMPTY nullbytes
925
926#include "stringlib/fastsearch.h"
927#include "stringlib/count.h"
928#include "stringlib/find.h"
929#include "stringlib/partition.h"
930
931
932/* The following Py_LOCAL_INLINE and Py_LOCAL functions
933were copied from the old char* style string object. */
934
935Py_LOCAL_INLINE(void)
936_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
937{
938 if (*end > len)
939 *end = len;
940 else if (*end < 0)
941 *end += len;
942 if (*end < 0)
943 *end = 0;
944 if (*start < 0)
945 *start += len;
946 if (*start < 0)
947 *start = 0;
948}
949
950
951Py_LOCAL_INLINE(Py_ssize_t)
952bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
953{
954 PyObject *subobj;
955 const char *sub;
956 Py_ssize_t sub_len;
957 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
958
959 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
960 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
961 return -2;
962 if (PyBytes_Check(subobj)) {
963 sub = PyBytes_AS_STRING(subobj);
964 sub_len = PyBytes_GET_SIZE(subobj);
965 }
966 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
967 /* XXX - the "expected a character buffer object" is pretty
968 confusing for a non-expert. remap to something else ? */
969 return -2;
970
971 if (dir > 0)
972 return stringlib_find_slice(
973 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
974 sub, sub_len, start, end);
975 else
976 return stringlib_rfind_slice(
977 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
978 sub, sub_len, start, end);
979}
980
981
982PyDoc_STRVAR(find__doc__,
983"B.find(sub [,start [,end]]) -> int\n\
984\n\
985Return the lowest index in B where subsection sub is found,\n\
986such that sub is contained within s[start,end]. Optional\n\
987arguments start and end are interpreted as in slice notation.\n\
988\n\
989Return -1 on failure.");
990
991static PyObject *
992bytes_find(PyBytesObject *self, PyObject *args)
993{
994 Py_ssize_t result = bytes_find_internal(self, args, +1);
995 if (result == -2)
996 return NULL;
997 return PyInt_FromSsize_t(result);
998}
999
1000PyDoc_STRVAR(count__doc__,
1001"B.count(sub[, start[, end]]) -> int\n\
1002\n\
1003Return the number of non-overlapping occurrences of subsection sub in\n\
1004bytes B[start:end]. Optional arguments start and end are interpreted\n\
1005as in slice notation.");
1006
1007static PyObject *
1008bytes_count(PyBytesObject *self, PyObject *args)
1009{
1010 PyObject *sub_obj;
1011 const char *str = PyBytes_AS_STRING(self), *sub;
1012 Py_ssize_t sub_len;
1013 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1014
1015 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1016 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1017 return NULL;
1018
1019 if (PyBytes_Check(sub_obj)) {
1020 sub = PyBytes_AS_STRING(sub_obj);
1021 sub_len = PyBytes_GET_SIZE(sub_obj);
1022 }
1023 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1024 return NULL;
1025
1026 _adjust_indices(&start, &end, PyString_GET_SIZE(self));
1027
1028 return PyInt_FromSsize_t(
1029 stringlib_count(str + start, end - start, sub, sub_len)
1030 );
1031}
1032
1033
1034PyDoc_STRVAR(index__doc__,
1035"B.index(sub [,start [,end]]) -> int\n\
1036\n\
1037Like B.find() but raise ValueError when the subsection is not found.");
1038
1039static PyObject *
1040bytes_index(PyBytesObject *self, PyObject *args)
1041{
1042 Py_ssize_t result = bytes_find_internal(self, args, +1);
1043 if (result == -2)
1044 return NULL;
1045 if (result == -1) {
1046 PyErr_SetString(PyExc_ValueError,
1047 "subsection not found");
1048 return NULL;
1049 }
1050 return PyInt_FromSsize_t(result);
1051}
1052
1053
1054PyDoc_STRVAR(rfind__doc__,
1055"B.rfind(sub [,start [,end]]) -> int\n\
1056\n\
1057Return the highest index in B where subsection sub is found,\n\
1058such that sub is contained within s[start,end]. Optional\n\
1059arguments start and end are interpreted as in slice notation.\n\
1060\n\
1061Return -1 on failure.");
1062
1063static PyObject *
1064bytes_rfind(PyBytesObject *self, PyObject *args)
1065{
1066 Py_ssize_t result = bytes_find_internal(self, args, -1);
1067 if (result == -2)
1068 return NULL;
1069 return PyInt_FromSsize_t(result);
1070}
1071
1072
1073PyDoc_STRVAR(rindex__doc__,
1074"B.rindex(sub [,start [,end]]) -> int\n\
1075\n\
1076Like B.rfind() but raise ValueError when the subsection is not found.");
1077
1078static PyObject *
1079bytes_rindex(PyBytesObject *self, PyObject *args)
1080{
1081 Py_ssize_t result = bytes_find_internal(self, args, -1);
1082 if (result == -2)
1083 return NULL;
1084 if (result == -1) {
1085 PyErr_SetString(PyExc_ValueError,
1086 "subsection not found");
1087 return NULL;
1088 }
1089 return PyInt_FromSsize_t(result);
1090}
1091
1092
1093/* Matches the end (direction >= 0) or start (direction < 0) of self
1094 * against substr, using the start and end arguments. Returns
1095 * -1 on error, 0 if not found and 1 if found.
1096 */
1097Py_LOCAL(int)
1098_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
1099 Py_ssize_t end, int direction)
1100{
1101 Py_ssize_t len = PyBytes_GET_SIZE(self);
1102 Py_ssize_t slen;
1103 const char* sub;
1104 const char* str;
1105
1106 if (PyBytes_Check(substr)) {
1107 sub = PyBytes_AS_STRING(substr);
1108 slen = PyBytes_GET_SIZE(substr);
1109 }
1110 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
1111 return -1;
1112 str = PyBytes_AS_STRING(self);
1113
1114 _adjust_indices(&start, &end, len);
1115
1116 if (direction < 0) {
1117 /* startswith */
1118 if (start+slen > len)
1119 return 0;
1120 } else {
1121 /* endswith */
1122 if (end-start < slen || start > len)
1123 return 0;
1124
1125 if (end-slen > start)
1126 start = end - slen;
1127 }
1128 if (end-start >= slen)
1129 return ! memcmp(str+start, sub, slen);
1130 return 0;
1131}
1132
1133
1134PyDoc_STRVAR(startswith__doc__,
1135"B.startswith(prefix[, start[, end]]) -> bool\n\
1136\n\
1137Return True if B starts with the specified prefix, False otherwise.\n\
1138With optional start, test B beginning at that position.\n\
1139With optional end, stop comparing B at that position.\n\
1140prefix can also be a tuple of strings to try.");
1141
1142static PyObject *
1143bytes_startswith(PyBytesObject *self, PyObject *args)
1144{
1145 Py_ssize_t start = 0;
1146 Py_ssize_t end = PY_SSIZE_T_MAX;
1147 PyObject *subobj;
1148 int result;
1149
1150 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1151 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1152 return NULL;
1153 if (PyTuple_Check(subobj)) {
1154 Py_ssize_t i;
1155 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1156 result = _bytes_tailmatch(self,
1157 PyTuple_GET_ITEM(subobj, i),
1158 start, end, -1);
1159 if (result == -1)
1160 return NULL;
1161 else if (result) {
1162 Py_RETURN_TRUE;
1163 }
1164 }
1165 Py_RETURN_FALSE;
1166 }
1167 result = _bytes_tailmatch(self, subobj, start, end, -1);
1168 if (result == -1)
1169 return NULL;
1170 else
1171 return PyBool_FromLong(result);
1172}
1173
1174PyDoc_STRVAR(endswith__doc__,
1175"B.endswith(suffix[, start[, end]]) -> bool\n\
1176\n\
1177Return True if B ends with the specified suffix, False otherwise.\n\
1178With optional start, test B beginning at that position.\n\
1179With optional end, stop comparing B at that position.\n\
1180suffix can also be a tuple of strings to try.");
1181
1182static PyObject *
1183bytes_endswith(PyBytesObject *self, PyObject *args)
1184{
1185 Py_ssize_t start = 0;
1186 Py_ssize_t end = PY_SSIZE_T_MAX;
1187 PyObject *subobj;
1188 int result;
1189
1190 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1191 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1192 return NULL;
1193 if (PyTuple_Check(subobj)) {
1194 Py_ssize_t i;
1195 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1196 result = _bytes_tailmatch(self,
1197 PyTuple_GET_ITEM(subobj, i),
1198 start, end, +1);
1199 if (result == -1)
1200 return NULL;
1201 else if (result) {
1202 Py_RETURN_TRUE;
1203 }
1204 }
1205 Py_RETURN_FALSE;
1206 }
1207 result = _bytes_tailmatch(self, subobj, start, end, +1);
1208 if (result == -1)
1209 return NULL;
1210 else
1211 return PyBool_FromLong(result);
1212}
1213
1214
1215
1216PyDoc_STRVAR(translate__doc__,
1217"B.translate(table [,deletechars]) -> bytes\n\
1218\n\
1219Return a copy of the bytes B, where all characters occurring\n\
1220in the optional argument deletechars are removed, and the\n\
1221remaining characters have been mapped through the given\n\
1222translation table, which must be a bytes of length 256.");
1223
1224static PyObject *
1225bytes_translate(PyBytesObject *self, PyObject *args)
1226{
1227 register char *input, *output;
1228 register const char *table;
1229 register Py_ssize_t i, c, changed = 0;
1230 PyObject *input_obj = (PyObject*)self;
1231 const char *table1, *output_start, *del_table=NULL;
1232 Py_ssize_t inlen, tablen, dellen = 0;
1233 PyObject *result;
1234 int trans_table[256];
1235 PyObject *tableobj, *delobj = NULL;
1236
1237 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1238 &tableobj, &delobj))
1239 return NULL;
1240
1241 if (PyBytes_Check(tableobj)) {
1242 table1 = PyBytes_AS_STRING(tableobj);
1243 tablen = PyBytes_GET_SIZE(tableobj);
1244 }
1245 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1246 return NULL;
1247
1248 if (tablen != 256) {
1249 PyErr_SetString(PyExc_ValueError,
1250 "translation table must be 256 characters long");
1251 return NULL;
1252 }
1253
1254 if (delobj != NULL) {
1255 if (PyBytes_Check(delobj)) {
1256 del_table = PyBytes_AS_STRING(delobj);
1257 dellen = PyBytes_GET_SIZE(delobj);
1258 }
1259 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1260 return NULL;
1261 }
1262 else {
1263 del_table = NULL;
1264 dellen = 0;
1265 }
1266
1267 table = table1;
1268 inlen = PyBytes_GET_SIZE(input_obj);
1269 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1270 if (result == NULL)
1271 return NULL;
1272 output_start = output = PyBytes_AsString(result);
1273 input = PyBytes_AS_STRING(input_obj);
1274
1275 if (dellen == 0) {
1276 /* If no deletions are required, use faster code */
1277 for (i = inlen; --i >= 0; ) {
1278 c = Py_CHARMASK(*input++);
1279 if (Py_CHARMASK((*output++ = table[c])) != c)
1280 changed = 1;
1281 }
1282 if (changed || !PyBytes_CheckExact(input_obj))
1283 return result;
1284 Py_DECREF(result);
1285 Py_INCREF(input_obj);
1286 return input_obj;
1287 }
1288
1289 for (i = 0; i < 256; i++)
1290 trans_table[i] = Py_CHARMASK(table[i]);
1291
1292 for (i = 0; i < dellen; i++)
1293 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1294
1295 for (i = inlen; --i >= 0; ) {
1296 c = Py_CHARMASK(*input++);
1297 if (trans_table[c] != -1)
1298 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1299 continue;
1300 changed = 1;
1301 }
1302 if (!changed && PyBytes_CheckExact(input_obj)) {
1303 Py_DECREF(result);
1304 Py_INCREF(input_obj);
1305 return input_obj;
1306 }
1307 /* Fix the size of the resulting string */
1308 if (inlen > 0)
1309 PyBytes_Resize(result, output - output_start);
1310 return result;
1311}
1312
1313
1314#define FORWARD 1
1315#define REVERSE -1
1316
1317/* find and count characters and substrings */
1318
1319#define findchar(target, target_len, c) \
1320 ((char *)memchr((const void *)(target), c, target_len))
1321
1322/* Don't call if length < 2 */
1323#define Py_STRING_MATCH(target, offset, pattern, length) \
1324 (target[offset] == pattern[0] && \
1325 target[offset+length-1] == pattern[length-1] && \
1326 !memcmp(target+offset+1, pattern+1, length-2) )
1327
1328
1329/* Bytes ops must return a string. */
1330/* If the object is subclass of bytes, create a copy */
1331Py_LOCAL(PyBytesObject *)
1332return_self(PyBytesObject *self)
1333{
1334 if (PyBytes_CheckExact(self)) {
1335 Py_INCREF(self);
1336 return (PyBytesObject *)self;
1337 }
1338 return (PyBytesObject *)PyBytes_FromStringAndSize(
1339 PyBytes_AS_STRING(self),
1340 PyBytes_GET_SIZE(self));
1341}
1342
1343Py_LOCAL_INLINE(Py_ssize_t)
1344countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1345{
1346 Py_ssize_t count=0;
1347 const char *start=target;
1348 const char *end=target+target_len;
1349
1350 while ( (start=findchar(start, end-start, c)) != NULL ) {
1351 count++;
1352 if (count >= maxcount)
1353 break;
1354 start += 1;
1355 }
1356 return count;
1357}
1358
1359Py_LOCAL(Py_ssize_t)
1360findstring(const char *target, Py_ssize_t target_len,
1361 const char *pattern, Py_ssize_t pattern_len,
1362 Py_ssize_t start,
1363 Py_ssize_t end,
1364 int direction)
1365{
1366 if (start < 0) {
1367 start += target_len;
1368 if (start < 0)
1369 start = 0;
1370 }
1371 if (end > target_len) {
1372 end = target_len;
1373 } else if (end < 0) {
1374 end += target_len;
1375 if (end < 0)
1376 end = 0;
1377 }
1378
1379 /* zero-length substrings always match at the first attempt */
1380 if (pattern_len == 0)
1381 return (direction > 0) ? start : end;
1382
1383 end -= pattern_len;
1384
1385 if (direction < 0) {
1386 for (; end >= start; end--)
1387 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1388 return end;
1389 } else {
1390 for (; start <= end; start++)
1391 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1392 return start;
1393 }
1394 return -1;
1395}
1396
1397Py_LOCAL_INLINE(Py_ssize_t)
1398countstring(const char *target, Py_ssize_t target_len,
1399 const char *pattern, Py_ssize_t pattern_len,
1400 Py_ssize_t start,
1401 Py_ssize_t end,
1402 int direction, Py_ssize_t maxcount)
1403{
1404 Py_ssize_t count=0;
1405
1406 if (start < 0) {
1407 start += target_len;
1408 if (start < 0)
1409 start = 0;
1410 }
1411 if (end > target_len) {
1412 end = target_len;
1413 } else if (end < 0) {
1414 end += target_len;
1415 if (end < 0)
1416 end = 0;
1417 }
1418
1419 /* zero-length substrings match everywhere */
1420 if (pattern_len == 0 || maxcount == 0) {
1421 if (target_len+1 < maxcount)
1422 return target_len+1;
1423 return maxcount;
1424 }
1425
1426 end -= pattern_len;
1427 if (direction < 0) {
1428 for (; (end >= start); end--)
1429 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1430 count++;
1431 if (--maxcount <= 0) break;
1432 end -= pattern_len-1;
1433 }
1434 } else {
1435 for (; (start <= end); start++)
1436 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1437 count++;
1438 if (--maxcount <= 0)
1439 break;
1440 start += pattern_len-1;
1441 }
1442 }
1443 return count;
1444}
1445
1446
1447/* Algorithms for different cases of string replacement */
1448
1449/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1450Py_LOCAL(PyBytesObject *)
1451replace_interleave(PyBytesObject *self,
1452 const char *to_s, Py_ssize_t to_len,
1453 Py_ssize_t maxcount)
1454{
1455 char *self_s, *result_s;
1456 Py_ssize_t self_len, result_len;
1457 Py_ssize_t count, i, product;
1458 PyBytesObject *result;
1459
1460 self_len = PyBytes_GET_SIZE(self);
1461
1462 /* 1 at the end plus 1 after every character */
1463 count = self_len+1;
1464 if (maxcount < count)
1465 count = maxcount;
1466
1467 /* Check for overflow */
1468 /* result_len = count * to_len + self_len; */
1469 product = count * to_len;
1470 if (product / to_len != count) {
1471 PyErr_SetString(PyExc_OverflowError,
1472 "replace string is too long");
1473 return NULL;
1474 }
1475 result_len = product + self_len;
1476 if (result_len < 0) {
1477 PyErr_SetString(PyExc_OverflowError,
1478 "replace string is too long");
1479 return NULL;
1480 }
1481
1482 if (! (result = (PyBytesObject *)
1483 PyBytes_FromStringAndSize(NULL, result_len)) )
1484 return NULL;
1485
1486 self_s = PyBytes_AS_STRING(self);
1487 result_s = PyBytes_AS_STRING(result);
1488
1489 /* TODO: special case single character, which doesn't need memcpy */
1490
1491 /* Lay the first one down (guaranteed this will occur) */
1492 Py_MEMCPY(result_s, to_s, to_len);
1493 result_s += to_len;
1494 count -= 1;
1495
1496 for (i=0; i<count; i++) {
1497 *result_s++ = *self_s++;
1498 Py_MEMCPY(result_s, to_s, to_len);
1499 result_s += to_len;
1500 }
1501
1502 /* Copy the rest of the original string */
1503 Py_MEMCPY(result_s, self_s, self_len-i);
1504
1505 return result;
1506}
1507
1508/* Special case for deleting a single character */
1509/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1510Py_LOCAL(PyBytesObject *)
1511replace_delete_single_character(PyBytesObject *self,
1512 char from_c, Py_ssize_t maxcount)
1513{
1514 char *self_s, *result_s;
1515 char *start, *next, *end;
1516 Py_ssize_t self_len, result_len;
1517 Py_ssize_t count;
1518 PyBytesObject *result;
1519
1520 self_len = PyBytes_GET_SIZE(self);
1521 self_s = PyBytes_AS_STRING(self);
1522
1523 count = countchar(self_s, self_len, from_c, maxcount);
1524 if (count == 0) {
1525 return return_self(self);
1526 }
1527
1528 result_len = self_len - count; /* from_len == 1 */
1529 assert(result_len>=0);
1530
1531 if ( (result = (PyBytesObject *)
1532 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1533 return NULL;
1534 result_s = PyBytes_AS_STRING(result);
1535
1536 start = self_s;
1537 end = self_s + self_len;
1538 while (count-- > 0) {
1539 next = findchar(start, end-start, from_c);
1540 if (next == NULL)
1541 break;
1542 Py_MEMCPY(result_s, start, next-start);
1543 result_s += (next-start);
1544 start = next+1;
1545 }
1546 Py_MEMCPY(result_s, start, end-start);
1547
1548 return result;
1549}
1550
1551/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1552
1553Py_LOCAL(PyBytesObject *)
1554replace_delete_substring(PyBytesObject *self,
1555 const char *from_s, Py_ssize_t from_len,
1556 Py_ssize_t maxcount)
1557{
1558 char *self_s, *result_s;
1559 char *start, *next, *end;
1560 Py_ssize_t self_len, result_len;
1561 Py_ssize_t count, offset;
1562 PyBytesObject *result;
1563
1564 self_len = PyBytes_GET_SIZE(self);
1565 self_s = PyBytes_AS_STRING(self);
1566
1567 count = countstring(self_s, self_len,
1568 from_s, from_len,
1569 0, self_len, 1,
1570 maxcount);
1571
1572 if (count == 0) {
1573 /* no matches */
1574 return return_self(self);
1575 }
1576
1577 result_len = self_len - (count * from_len);
1578 assert (result_len>=0);
1579
1580 if ( (result = (PyBytesObject *)
1581 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1582 return NULL;
1583
1584 result_s = PyBytes_AS_STRING(result);
1585
1586 start = self_s;
1587 end = self_s + self_len;
1588 while (count-- > 0) {
1589 offset = findstring(start, end-start,
1590 from_s, from_len,
1591 0, end-start, FORWARD);
1592 if (offset == -1)
1593 break;
1594 next = start + offset;
1595
1596 Py_MEMCPY(result_s, start, next-start);
1597
1598 result_s += (next-start);
1599 start = next+from_len;
1600 }
1601 Py_MEMCPY(result_s, start, end-start);
1602 return result;
1603}
1604
1605/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1606Py_LOCAL(PyBytesObject *)
1607replace_single_character_in_place(PyBytesObject *self,
1608 char from_c, char to_c,
1609 Py_ssize_t maxcount)
1610{
1611 char *self_s, *result_s, *start, *end, *next;
1612 Py_ssize_t self_len;
1613 PyBytesObject *result;
1614
1615 /* The result string will be the same size */
1616 self_s = PyBytes_AS_STRING(self);
1617 self_len = PyBytes_GET_SIZE(self);
1618
1619 next = findchar(self_s, self_len, from_c);
1620
1621 if (next == NULL) {
1622 /* No matches; return the original bytes */
1623 return return_self(self);
1624 }
1625
1626 /* Need to make a new bytes */
1627 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1628 if (result == NULL)
1629 return NULL;
1630 result_s = PyBytes_AS_STRING(result);
1631 Py_MEMCPY(result_s, self_s, self_len);
1632
1633 /* change everything in-place, starting with this one */
1634 start = result_s + (next-self_s);
1635 *start = to_c;
1636 start++;
1637 end = result_s + self_len;
1638
1639 while (--maxcount > 0) {
1640 next = findchar(start, end-start, from_c);
1641 if (next == NULL)
1642 break;
1643 *next = to_c;
1644 start = next+1;
1645 }
1646
1647 return result;
1648}
1649
1650/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1651Py_LOCAL(PyBytesObject *)
1652replace_substring_in_place(PyBytesObject *self,
1653 const char *from_s, Py_ssize_t from_len,
1654 const char *to_s, Py_ssize_t to_len,
1655 Py_ssize_t maxcount)
1656{
1657 char *result_s, *start, *end;
1658 char *self_s;
1659 Py_ssize_t self_len, offset;
1660 PyBytesObject *result;
1661
1662 /* The result bytes will be the same size */
1663
1664 self_s = PyBytes_AS_STRING(self);
1665 self_len = PyBytes_GET_SIZE(self);
1666
1667 offset = findstring(self_s, self_len,
1668 from_s, from_len,
1669 0, self_len, FORWARD);
1670 if (offset == -1) {
1671 /* No matches; return the original bytes */
1672 return return_self(self);
1673 }
1674
1675 /* Need to make a new bytes */
1676 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1677 if (result == NULL)
1678 return NULL;
1679 result_s = PyBytes_AS_STRING(result);
1680 Py_MEMCPY(result_s, self_s, self_len);
1681
1682 /* change everything in-place, starting with this one */
1683 start = result_s + offset;
1684 Py_MEMCPY(start, to_s, from_len);
1685 start += from_len;
1686 end = result_s + self_len;
1687
1688 while ( --maxcount > 0) {
1689 offset = findstring(start, end-start,
1690 from_s, from_len,
1691 0, end-start, FORWARD);
1692 if (offset==-1)
1693 break;
1694 Py_MEMCPY(start+offset, to_s, from_len);
1695 start += offset+from_len;
1696 }
1697
1698 return result;
1699}
1700
1701/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1702Py_LOCAL(PyBytesObject *)
1703replace_single_character(PyBytesObject *self,
1704 char from_c,
1705 const char *to_s, Py_ssize_t to_len,
1706 Py_ssize_t maxcount)
1707{
1708 char *self_s, *result_s;
1709 char *start, *next, *end;
1710 Py_ssize_t self_len, result_len;
1711 Py_ssize_t count, product;
1712 PyBytesObject *result;
1713
1714 self_s = PyBytes_AS_STRING(self);
1715 self_len = PyBytes_GET_SIZE(self);
1716
1717 count = countchar(self_s, self_len, from_c, maxcount);
1718 if (count == 0) {
1719 /* no matches, return unchanged */
1720 return return_self(self);
1721 }
1722
1723 /* use the difference between current and new, hence the "-1" */
1724 /* result_len = self_len + count * (to_len-1) */
1725 product = count * (to_len-1);
1726 if (product / (to_len-1) != count) {
1727 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1728 return NULL;
1729 }
1730 result_len = self_len + product;
1731 if (result_len < 0) {
1732 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1733 return NULL;
1734 }
1735
1736 if ( (result = (PyBytesObject *)
1737 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1738 return NULL;
1739 result_s = PyBytes_AS_STRING(result);
1740
1741 start = self_s;
1742 end = self_s + self_len;
1743 while (count-- > 0) {
1744 next = findchar(start, end-start, from_c);
1745 if (next == NULL)
1746 break;
1747
1748 if (next == start) {
1749 /* replace with the 'to' */
1750 Py_MEMCPY(result_s, to_s, to_len);
1751 result_s += to_len;
1752 start += 1;
1753 } else {
1754 /* copy the unchanged old then the 'to' */
1755 Py_MEMCPY(result_s, start, next-start);
1756 result_s += (next-start);
1757 Py_MEMCPY(result_s, to_s, to_len);
1758 result_s += to_len;
1759 start = next+1;
1760 }
1761 }
1762 /* Copy the remainder of the remaining bytes */
1763 Py_MEMCPY(result_s, start, end-start);
1764
1765 return result;
1766}
1767
1768/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1769Py_LOCAL(PyBytesObject *)
1770replace_substring(PyBytesObject *self,
1771 const char *from_s, Py_ssize_t from_len,
1772 const char *to_s, Py_ssize_t to_len,
1773 Py_ssize_t maxcount)
1774{
1775 char *self_s, *result_s;
1776 char *start, *next, *end;
1777 Py_ssize_t self_len, result_len;
1778 Py_ssize_t count, offset, product;
1779 PyBytesObject *result;
1780
1781 self_s = PyBytes_AS_STRING(self);
1782 self_len = PyBytes_GET_SIZE(self);
1783
1784 count = countstring(self_s, self_len,
1785 from_s, from_len,
1786 0, self_len, FORWARD, maxcount);
1787 if (count == 0) {
1788 /* no matches, return unchanged */
1789 return return_self(self);
1790 }
1791
1792 /* Check for overflow */
1793 /* result_len = self_len + count * (to_len-from_len) */
1794 product = count * (to_len-from_len);
1795 if (product / (to_len-from_len) != count) {
1796 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1797 return NULL;
1798 }
1799 result_len = self_len + product;
1800 if (result_len < 0) {
1801 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1802 return NULL;
1803 }
1804
1805 if ( (result = (PyBytesObject *)
1806 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1807 return NULL;
1808 result_s = PyBytes_AS_STRING(result);
1809
1810 start = self_s;
1811 end = self_s + self_len;
1812 while (count-- > 0) {
1813 offset = findstring(start, end-start,
1814 from_s, from_len,
1815 0, end-start, FORWARD);
1816 if (offset == -1)
1817 break;
1818 next = start+offset;
1819 if (next == start) {
1820 /* replace with the 'to' */
1821 Py_MEMCPY(result_s, to_s, to_len);
1822 result_s += to_len;
1823 start += from_len;
1824 } else {
1825 /* copy the unchanged old then the 'to' */
1826 Py_MEMCPY(result_s, start, next-start);
1827 result_s += (next-start);
1828 Py_MEMCPY(result_s, to_s, to_len);
1829 result_s += to_len;
1830 start = next+from_len;
1831 }
1832 }
1833 /* Copy the remainder of the remaining bytes */
1834 Py_MEMCPY(result_s, start, end-start);
1835
1836 return result;
1837}
1838
1839
1840Py_LOCAL(PyBytesObject *)
1841replace(PyBytesObject *self,
1842 const char *from_s, Py_ssize_t from_len,
1843 const char *to_s, Py_ssize_t to_len,
1844 Py_ssize_t maxcount)
1845{
1846 if (maxcount < 0) {
1847 maxcount = PY_SSIZE_T_MAX;
1848 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
1849 /* nothing to do; return the original bytes */
1850 return return_self(self);
1851 }
1852
1853 if (maxcount == 0 ||
1854 (from_len == 0 && to_len == 0)) {
1855 /* nothing to do; return the original bytes */
1856 return return_self(self);
1857 }
1858
1859 /* Handle zero-length special cases */
1860
1861 if (from_len == 0) {
1862 /* insert the 'to' bytes everywhere. */
1863 /* >>> "Python".replace("", ".") */
1864 /* '.P.y.t.h.o.n.' */
1865 return replace_interleave(self, to_s, to_len, maxcount);
1866 }
1867
1868 /* Except for "".replace("", "A") == "A" there is no way beyond this */
1869 /* point for an empty self bytes to generate a non-empty bytes */
1870 /* Special case so the remaining code always gets a non-empty bytes */
1871 if (PyBytes_GET_SIZE(self) == 0) {
1872 return return_self(self);
1873 }
1874
1875 if (to_len == 0) {
1876 /* delete all occurances of 'from' bytes */
1877 if (from_len == 1) {
1878 return replace_delete_single_character(
1879 self, from_s[0], maxcount);
1880 } else {
1881 return replace_delete_substring(self, from_s, from_len, maxcount);
1882 }
1883 }
1884
1885 /* Handle special case where both bytes have the same length */
1886
1887 if (from_len == to_len) {
1888 if (from_len == 1) {
1889 return replace_single_character_in_place(
1890 self,
1891 from_s[0],
1892 to_s[0],
1893 maxcount);
1894 } else {
1895 return replace_substring_in_place(
1896 self, from_s, from_len, to_s, to_len, maxcount);
1897 }
1898 }
1899
1900 /* Otherwise use the more generic algorithms */
1901 if (from_len == 1) {
1902 return replace_single_character(self, from_s[0],
1903 to_s, to_len, maxcount);
1904 } else {
1905 /* len('from')>=2, len('to')>=1 */
1906 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
1907 }
1908}
1909
1910PyDoc_STRVAR(replace__doc__,
1911"B.replace (old, new[, count]) -> bytes\n\
1912\n\
1913Return a copy of bytes B with all occurrences of subsection\n\
1914old replaced by new. If the optional argument count is\n\
1915given, only the first count occurrences are replaced.");
1916
1917static PyObject *
1918bytes_replace(PyBytesObject *self, PyObject *args)
1919{
1920 Py_ssize_t count = -1;
1921 PyObject *from, *to;
1922 const char *from_s, *to_s;
1923 Py_ssize_t from_len, to_len;
1924
1925 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
1926 return NULL;
1927
1928 if (PyBytes_Check(from)) {
1929 from_s = PyBytes_AS_STRING(from);
1930 from_len = PyBytes_GET_SIZE(from);
1931 }
1932 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
1933 return NULL;
1934
1935 if (PyBytes_Check(to)) {
1936 to_s = PyBytes_AS_STRING(to);
1937 to_len = PyBytes_GET_SIZE(to);
1938 }
1939 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
1940 return NULL;
1941
1942 return (PyObject *)replace((PyBytesObject *) self,
1943 from_s, from_len,
1944 to_s, to_len, count);
1945}
1946
1947
1948/* Overallocate the initial list to reduce the number of reallocs for small
1949 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1950 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1951 text (roughly 11 words per line) and field delimited data (usually 1-10
1952 fields). For large strings the split algorithms are bandwidth limited
1953 so increasing the preallocation likely will not improve things.*/
1954
1955#define MAX_PREALLOC 12
1956
1957/* 5 splits gives 6 elements */
1958#define PREALLOC_SIZE(maxsplit) \
1959 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1960
1961#define SPLIT_APPEND(data, left, right) \
1962 str = PyBytes_FromStringAndSize((data) + (left), \
1963 (right) - (left)); \
1964 if (str == NULL) \
1965 goto onError; \
1966 if (PyList_Append(list, str)) { \
1967 Py_DECREF(str); \
1968 goto onError; \
1969 } \
1970 else \
1971 Py_DECREF(str);
1972
1973#define SPLIT_ADD(data, left, right) { \
1974 str = PyBytes_FromStringAndSize((data) + (left), \
1975 (right) - (left)); \
1976 if (str == NULL) \
1977 goto onError; \
1978 if (count < MAX_PREALLOC) { \
1979 PyList_SET_ITEM(list, count, str); \
1980 } else { \
1981 if (PyList_Append(list, str)) { \
1982 Py_DECREF(str); \
1983 goto onError; \
1984 } \
1985 else \
1986 Py_DECREF(str); \
1987 } \
1988 count++; }
1989
1990/* Always force the list to the expected size. */
1991#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
1992
1993
1994Py_LOCAL_INLINE(PyObject *)
1995split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1996{
1997 register Py_ssize_t i, j, count=0;
1998 PyObject *str;
1999 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2000
2001 if (list == NULL)
2002 return NULL;
2003
2004 i = j = 0;
2005 while ((j < len) && (maxcount-- > 0)) {
2006 for(; j<len; j++) {
2007 /* I found that using memchr makes no difference */
2008 if (s[j] == ch) {
2009 SPLIT_ADD(s, i, j);
2010 i = j = j + 1;
2011 break;
2012 }
2013 }
2014 }
2015 if (i <= len) {
2016 SPLIT_ADD(s, i, len);
2017 }
2018 FIX_PREALLOC_SIZE(list);
2019 return list;
2020
2021 onError:
2022 Py_DECREF(list);
2023 return NULL;
2024}
2025
2026PyDoc_STRVAR(split__doc__,
2027"B.split(sep [,maxsplit]) -> list of bytes\n\
2028\n\
2029Return a list of the bytes in the string B, using sep as the\n\
2030delimiter. If maxsplit is given, at most maxsplit\n\
2031splits are done.");
2032
2033static PyObject *
2034bytes_split(PyBytesObject *self, PyObject *args)
2035{
2036 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2037 Py_ssize_t maxsplit = -1, count=0;
2038 const char *s = PyBytes_AS_STRING(self), *sub;
2039 PyObject *list, *str, *subobj;
2040#ifdef USE_FAST
2041 Py_ssize_t pos;
2042#endif
2043
2044 if (!PyArg_ParseTuple(args, "O|n:split", &subobj, &maxsplit))
2045 return NULL;
2046 if (maxsplit < 0)
2047 maxsplit = PY_SSIZE_T_MAX;
2048 if (PyBytes_Check(subobj)) {
2049 sub = PyBytes_AS_STRING(subobj);
2050 n = PyBytes_GET_SIZE(subobj);
2051 }
2052 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2053 return NULL;
2054
2055 if (n == 0) {
2056 PyErr_SetString(PyExc_ValueError, "empty separator");
2057 return NULL;
2058 }
2059 else if (n == 1)
2060 return split_char(s, len, sub[0], maxsplit);
2061
2062 list = PyList_New(PREALLOC_SIZE(maxsplit));
2063 if (list == NULL)
2064 return NULL;
2065
2066#ifdef USE_FAST
2067 i = j = 0;
2068 while (maxsplit-- > 0) {
2069 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2070 if (pos < 0)
2071 break;
2072 j = i+pos;
2073 SPLIT_ADD(s, i, j);
2074 i = j + n;
2075 }
2076#else
2077 i = j = 0;
2078 while ((j+n <= len) && (maxsplit-- > 0)) {
2079 for (; j+n <= len; j++) {
2080 if (Py_STRING_MATCH(s, j, sub, n)) {
2081 SPLIT_ADD(s, i, j);
2082 i = j = j + n;
2083 break;
2084 }
2085 }
2086 }
2087#endif
2088 SPLIT_ADD(s, i, len);
2089 FIX_PREALLOC_SIZE(list);
2090 return list;
2091
2092 onError:
2093 Py_DECREF(list);
2094 return NULL;
2095}
2096
2097PyDoc_STRVAR(partition__doc__,
2098"B.partition(sep) -> (head, sep, tail)\n\
2099\n\
2100Searches for the separator sep in B, and returns the part before it,\n\
2101the separator itself, and the part after it. If the separator is not\n\
2102found, returns B and two empty bytes.");
2103
2104static PyObject *
2105bytes_partition(PyBytesObject *self, PyObject *sep_obj)
2106{
2107 PyObject *bytesep, *result;
2108
2109 bytesep = PyBytes_FromObject(sep_obj);
2110 if (! bytesep)
2111 return NULL;
2112
2113 result = stringlib_partition(
2114 (PyObject*) self,
2115 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2116 bytesep,
2117 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2118 );
2119
2120 Py_DECREF(bytesep);
2121 return result;
2122}
2123
2124PyDoc_STRVAR(rpartition__doc__,
2125"B.rpartition(sep) -> (tail, sep, head)\n\
2126\n\
2127Searches for the separator sep in B, starting at the end of B, and returns\n\
2128the part before it, the separator itself, and the part after it. If the\n\
2129separator is not found, returns two empty bytes and B.");
2130
2131static PyObject *
2132bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
2133{
2134 PyObject *bytesep, *result;
2135
2136 bytesep = PyBytes_FromObject(sep_obj);
2137 if (! bytesep)
2138 return NULL;
2139
2140 result = stringlib_rpartition(
2141 (PyObject*) self,
2142 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2143 bytesep,
2144 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2145 );
2146
2147 Py_DECREF(bytesep);
2148 return result;
2149}
2150
2151Py_LOCAL_INLINE(PyObject *)
2152rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2153{
2154 register Py_ssize_t i, j, count=0;
2155 PyObject *str;
2156 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2157
2158 if (list == NULL)
2159 return NULL;
2160
2161 i = j = len - 1;
2162 while ((i >= 0) && (maxcount-- > 0)) {
2163 for (; i >= 0; i--) {
2164 if (s[i] == ch) {
2165 SPLIT_ADD(s, i + 1, j + 1);
2166 j = i = i - 1;
2167 break;
2168 }
2169 }
2170 }
2171 if (j >= -1) {
2172 SPLIT_ADD(s, 0, j + 1);
2173 }
2174 FIX_PREALLOC_SIZE(list);
2175 if (PyList_Reverse(list) < 0)
2176 goto onError;
2177
2178 return list;
2179
2180 onError:
2181 Py_DECREF(list);
2182 return NULL;
2183}
2184
2185PyDoc_STRVAR(rsplit__doc__,
2186"B.rsplit(sep [,maxsplit]) -> list of bytes\n\
2187\n\
2188Return a list of the sections in the byte B, using sep as the\n\
2189delimiter, starting at the end of the bytes and working\n\
2190to the front. If maxsplit is given, at most maxsplit splits are\n\
2191done.");
2192
2193static PyObject *
2194bytes_rsplit(PyBytesObject *self, PyObject *args)
2195{
2196 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2197 Py_ssize_t maxsplit = -1, count=0;
2198 const char *s = PyBytes_AS_STRING(self), *sub;
2199 PyObject *list, *str, *subobj;
2200
2201 if (!PyArg_ParseTuple(args, "O|n:rsplit", &subobj, &maxsplit))
2202 return NULL;
2203 if (maxsplit < 0)
2204 maxsplit = PY_SSIZE_T_MAX;
2205 if (PyBytes_Check(subobj)) {
2206 sub = PyBytes_AS_STRING(subobj);
2207 n = PyBytes_GET_SIZE(subobj);
2208 }
2209 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2210 return NULL;
2211
2212 if (n == 0) {
2213 PyErr_SetString(PyExc_ValueError, "empty separator");
2214 return NULL;
2215 }
2216 else if (n == 1)
2217 return rsplit_char(s, len, sub[0], maxsplit);
2218
2219 list = PyList_New(PREALLOC_SIZE(maxsplit));
2220 if (list == NULL)
2221 return NULL;
2222
2223 j = len;
2224 i = j - n;
2225
2226 while ( (i >= 0) && (maxsplit-- > 0) ) {
2227 for (; i>=0; i--) {
2228 if (Py_STRING_MATCH(s, i, sub, n)) {
2229 SPLIT_ADD(s, i + n, j);
2230 j = i;
2231 i -= n;
2232 break;
2233 }
2234 }
2235 }
2236 SPLIT_ADD(s, 0, j);
2237 FIX_PREALLOC_SIZE(list);
2238 if (PyList_Reverse(list) < 0)
2239 goto onError;
2240 return list;
2241
2242onError:
2243 Py_DECREF(list);
2244 return NULL;
2245}
2246
2247PyDoc_STRVAR(extend__doc__,
2248"B.extend(iterable int) -> None\n\
2249\n\
2250Append all the elements from the iterator or sequence to the\n\
2251end of the bytes.");
2252static PyObject *
2253bytes_extend(PyBytesObject *self, PyObject *arg)
2254{
2255 if (bytes_setslice(self, self->ob_size, self->ob_size, arg) == -1)
2256 return NULL;
2257 Py_RETURN_NONE;
2258}
2259
2260
2261PyDoc_STRVAR(reverse__doc__,
2262"B.reverse() -> None\n\
2263\n\
2264Reverse the order of the values in bytes in place.");
2265static PyObject *
2266bytes_reverse(PyBytesObject *self, PyObject *unused)
2267{
2268 char swap, *head, *tail;
2269 Py_ssize_t i, j, n = self->ob_size;
2270
2271 j = n / 2;
2272 head = self->ob_bytes;
2273 tail = head + n - 1;
2274 for (i = 0; i < j; i++) {
2275 swap = *head;
2276 *head++ = *tail;
2277 *tail-- = swap;
2278 }
2279
2280 Py_RETURN_NONE;
2281}
2282
2283PyDoc_STRVAR(insert__doc__,
2284"B.insert(index, int) -> None\n\
2285\n\
2286Insert a single item into the bytes before the given index.");
2287static PyObject *
2288bytes_insert(PyBytesObject *self, PyObject *args)
2289{
2290 int value;
2291 Py_ssize_t where, n = self->ob_size;
2292
2293 if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
2294 return NULL;
2295
2296 if (n == PY_SSIZE_T_MAX) {
2297 PyErr_SetString(PyExc_OverflowError,
2298 "cannot add more objects to bytes");
2299 return NULL;
2300 }
2301 if (value < 0 || value >= 256) {
2302 PyErr_SetString(PyExc_ValueError,
2303 "byte must be in range(0, 256)");
2304 return NULL;
2305 }
2306 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2307 return NULL;
2308
2309 if (where < 0) {
2310 where += n;
2311 if (where < 0)
2312 where = 0;
2313 }
2314 if (where > n)
2315 where = n;
2316 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where + 1);
2317 self->ob_bytes[where] = value;
2318
2319 Py_RETURN_NONE;
2320}
2321
2322PyDoc_STRVAR(append__doc__,
2323"B.append(int) -> None\n\
2324\n\
2325Append a single item to the end of the bytes.");
2326static PyObject *
2327bytes_append(PyBytesObject *self, PyObject *arg)
2328{
2329 int value;
2330 Py_ssize_t n = self->ob_size;
2331
2332 if (! _getbytevalue(arg, &value))
2333 return NULL;
2334 if (n == PY_SSIZE_T_MAX) {
2335 PyErr_SetString(PyExc_OverflowError,
2336 "cannot add more objects to bytes");
2337 return NULL;
2338 }
2339 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2340 return NULL;
2341
2342 self->ob_bytes[n] = value;
2343
2344 Py_RETURN_NONE;
2345}
2346
2347PyDoc_STRVAR(pop__doc__,
2348"B.pop([index]) -> int\n\
2349\n\
2350Remove and return a single item from the bytes. If no index\n\
2351argument is give, will pop the last value.");
2352static PyObject *
2353bytes_pop(PyBytesObject *self, PyObject *args)
2354{
2355 int value;
2356 Py_ssize_t where = -1, n = self->ob_size;
2357
2358 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2359 return NULL;
2360
2361 if (n == 0) {
2362 PyErr_SetString(PyExc_OverflowError,
2363 "cannot pop an empty bytes");
2364 return NULL;
2365 }
2366 if (where < 0)
2367 where += self->ob_size;
2368 if (where < 0 || where >= self->ob_size) {
2369 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2370 return NULL;
2371 }
2372
2373 value = self->ob_bytes[where];
2374 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2375 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2376 return NULL;
2377
2378 return PyInt_FromLong(value);
2379}
2380
2381PyDoc_STRVAR(remove__doc__,
2382"B.remove(int) -> None\n\
2383\n\
2384Remove the first occurance of a value in bytes");
2385static PyObject *
2386bytes_remove(PyBytesObject *self, PyObject *arg)
2387{
2388 int value;
2389 Py_ssize_t where, n = self->ob_size;
2390
2391 if (! _getbytevalue(arg, &value))
2392 return NULL;
2393
2394 for (where = 0; where < n; where++) {
2395 if (self->ob_bytes[where] == value)
2396 break;
2397 }
2398 if (where == n) {
2399 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2400 return NULL;
2401 }
2402
2403 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2404 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2405 return NULL;
2406
2407 Py_RETURN_NONE;
2408}
2409
2410
Guido van Rossumd624f182006-04-24 13:47:05 +00002411PyDoc_STRVAR(decode_doc,
2412"B.decode([encoding[,errors]]) -> unicode obect.\n\
2413\n\
2414Decodes B using the codec registered for encoding. encoding defaults\n\
2415to the default encoding. errors may be given to set a different error\n\
2416handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2417a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2418as well as any other name registerd with codecs.register_error that is\n\
2419able to handle UnicodeDecodeErrors.");
2420
2421static PyObject *
2422bytes_decode(PyObject *self, PyObject *args)
2423{
2424 const char *encoding = NULL;
2425 const char *errors = NULL;
2426
2427 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2428 return NULL;
2429 if (encoding == NULL)
2430 encoding = PyUnicode_GetDefaultEncoding();
2431 return PyCodec_Decode(self, encoding, errors);
2432}
2433
Guido van Rossuma0867f72006-05-05 04:34:18 +00002434PyDoc_STRVAR(alloc_doc,
2435"B.__alloc__() -> int\n\
2436\n\
2437Returns the number of bytes actually allocated.");
2438
2439static PyObject *
2440bytes_alloc(PyBytesObject *self)
2441{
2442 return PyInt_FromSsize_t(self->ob_alloc);
2443}
2444
Guido van Rossum20188312006-05-05 15:15:40 +00002445PyDoc_STRVAR(join_doc,
2446"bytes.join(iterable_of_bytes) -> bytes\n\
2447\n\
2448Concatenates any number of bytes objects. Example:\n\
2449bytes.join([bytes('ab'), bytes('pq'), bytes('rs')]) -> bytes('abpqrs').");
2450
2451static PyObject *
2452bytes_join(PyObject *cls, PyObject *it)
2453{
2454 PyObject *seq;
2455 Py_ssize_t i;
2456 Py_ssize_t n;
2457 PyObject **items;
2458 Py_ssize_t totalsize = 0;
2459 PyObject *result;
2460 char *dest;
2461
2462 seq = PySequence_Fast(it, "can only join an iterable");
2463 if (seq == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002464 return NULL;
Guido van Rossum20188312006-05-05 15:15:40 +00002465 n = PySequence_Fast_GET_SIZE(seq);
2466 items = PySequence_Fast_ITEMS(seq);
2467
2468 /* Compute the total size, and check that they are all bytes */
2469 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002470 PyObject *obj = items[i];
2471 if (!PyBytes_Check(obj)) {
2472 PyErr_Format(PyExc_TypeError,
2473 "can only join an iterable of bytes "
2474 "(item %ld has type '%.100s')",
Guido van Rossum3cf5b1e2006-07-27 21:53:35 +00002475 /* XXX %ld isn't right on Win64 */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002476 (long)i, obj->ob_type->tp_name);
2477 goto error;
2478 }
2479 totalsize += PyBytes_GET_SIZE(obj);
2480 if (totalsize < 0) {
2481 PyErr_NoMemory();
2482 goto error;
2483 }
Guido van Rossum20188312006-05-05 15:15:40 +00002484 }
2485
2486 /* Allocate the result, and copy the bytes */
2487 result = PyBytes_FromStringAndSize(NULL, totalsize);
2488 if (result == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002489 goto error;
Guido van Rossum20188312006-05-05 15:15:40 +00002490 dest = PyBytes_AS_STRING(result);
2491 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002492 PyObject *obj = items[i];
2493 Py_ssize_t size = PyBytes_GET_SIZE(obj);
2494 memcpy(dest, PyBytes_AS_STRING(obj), size);
2495 dest += size;
Guido van Rossum20188312006-05-05 15:15:40 +00002496 }
2497
2498 /* Done */
2499 Py_DECREF(seq);
2500 return result;
2501
2502 /* Error handling */
2503 error:
2504 Py_DECREF(seq);
2505 return NULL;
2506}
2507
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002508PyDoc_STRVAR(fromhex_doc,
2509"bytes.fromhex(string) -> bytes\n\
2510\n\
2511Create a bytes object from a string of hexadecimal numbers.\n\
2512Spaces between two numbers are accepted. Example:\n\
2513bytes.fromhex('10 2030') -> bytes([0x10, 0x20, 0x30]).");
2514
2515static int
2516hex_digit_to_int(int c)
2517{
Georg Brandlb3f568f2007-02-27 08:49:18 +00002518 if (isdigit(c))
2519 return c - '0';
2520 else {
2521 if (isupper(c))
2522 c = tolower(c);
2523 if (c >= 'a' && c <= 'f')
2524 return c - 'a' + 10;
2525 }
2526 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002527}
2528
2529static PyObject *
2530bytes_fromhex(PyObject *cls, PyObject *args)
2531{
2532 PyObject *newbytes;
2533 char *hex, *buf;
2534 Py_ssize_t len, byteslen, i, j;
2535 int top, bot;
2536
2537 if (!PyArg_ParseTuple(args, "s#:fromhex", &hex, &len))
2538 return NULL;
2539
2540 byteslen = len / 2; /* max length if there are no spaces */
2541
2542 newbytes = PyBytes_FromStringAndSize(NULL, byteslen);
2543 if (!newbytes)
2544 return NULL;
2545 buf = PyBytes_AS_STRING(newbytes);
2546
2547 for (i = j = 0; ; i += 2) {
2548 /* skip over spaces in the input */
2549 while (Py_CHARMASK(hex[i]) == ' ')
2550 i++;
2551 if (i >= len)
2552 break;
2553 top = hex_digit_to_int(Py_CHARMASK(hex[i]));
2554 bot = hex_digit_to_int(Py_CHARMASK(hex[i+1]));
2555 if (top == -1 || bot == -1) {
2556 PyErr_Format(PyExc_ValueError,
2557 "non-hexadecimal number string '%c%c' found in "
2558 "fromhex() arg at position %zd",
2559 hex[i], hex[i+1], i);
2560 goto error;
2561 }
2562 buf[j++] = (top << 4) + bot;
2563 }
2564 if (PyBytes_Resize(newbytes, j) < 0)
2565 goto error;
2566 return newbytes;
2567
2568 error:
2569 Py_DECREF(newbytes);
2570 return NULL;
2571}
2572
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002573static PySequenceMethods bytes_as_sequence = {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002574 (lenfunc)bytes_length, /* sq_length */
2575 (binaryfunc)bytes_concat, /* sq_concat */
2576 (ssizeargfunc)bytes_repeat, /* sq_repeat */
2577 (ssizeargfunc)bytes_getitem, /* sq_item */
2578 0, /* sq_slice */
2579 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
2580 0, /* sq_ass_slice */
Guido van Rossumd624f182006-04-24 13:47:05 +00002581 (objobjproc)bytes_contains, /* sq_contains */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002582 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
2583 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002584};
2585
2586static PyMappingMethods bytes_as_mapping = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002587 (lenfunc)bytes_length,
Thomas Wouters376446d2006-12-19 08:30:14 +00002588 (binaryfunc)bytes_subscript,
2589 (objobjargproc)bytes_ass_subscript,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002590};
2591
2592static PyBufferProcs bytes_as_buffer = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002593 (readbufferproc)bytes_getbuffer,
2594 (writebufferproc)bytes_getbuffer,
2595 (segcountproc)bytes_getsegcount,
2596 /* XXX Bytes are not characters! But we need to implement
2597 bf_getcharbuffer() so we can be used as 't#' argument to codecs. */
2598 (charbufferproc)bytes_getbuffer,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002599};
2600
2601static PyMethodDef
2602bytes_methods[] = {
Neal Norwitz6968b052007-02-27 19:02:19 +00002603 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2604 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2605 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2606 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2607 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2608 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
2609 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2610 startswith__doc__},
2611 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2612 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, translate__doc__},
2613 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2614 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
2615 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2616 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2617 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
2618 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
2619 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
2620 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
2621 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
2622 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
Guido van Rossumd624f182006-04-24 13:47:05 +00002623 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002624 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002625 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS, fromhex_doc},
Guido van Rossum20188312006-05-05 15:15:40 +00002626 {"join", (PyCFunction)bytes_join, METH_O|METH_CLASS, join_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002627 {NULL}
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002628};
2629
2630PyDoc_STRVAR(bytes_doc,
2631"bytes([iterable]) -> new array of bytes.\n\
2632\n\
2633If an argument is given it must be an iterable yielding ints in range(256).");
2634
2635PyTypeObject PyBytes_Type = {
2636 PyObject_HEAD_INIT(&PyType_Type)
2637 0,
2638 "bytes",
2639 sizeof(PyBytesObject),
2640 0,
Guido van Rossumd624f182006-04-24 13:47:05 +00002641 (destructor)bytes_dealloc, /* tp_dealloc */
2642 0, /* tp_print */
2643 0, /* tp_getattr */
2644 0, /* tp_setattr */
2645 0, /* tp_compare */
2646 (reprfunc)bytes_repr, /* tp_repr */
2647 0, /* tp_as_number */
2648 &bytes_as_sequence, /* tp_as_sequence */
2649 &bytes_as_mapping, /* tp_as_mapping */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002650 0, /* tp_hash */
Guido van Rossumd624f182006-04-24 13:47:05 +00002651 0, /* tp_call */
2652 (reprfunc)bytes_str, /* tp_str */
2653 PyObject_GenericGetAttr, /* tp_getattro */
2654 0, /* tp_setattro */
2655 &bytes_as_buffer, /* tp_as_buffer */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002656 /* bytes is 'final' or 'sealed' */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002657 Py_TPFLAGS_DEFAULT, /* tp_flags */
Guido van Rossumd624f182006-04-24 13:47:05 +00002658 bytes_doc, /* tp_doc */
2659 0, /* tp_traverse */
2660 0, /* tp_clear */
2661 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2662 0, /* tp_weaklistoffset */
2663 0, /* tp_iter */
2664 0, /* tp_iternext */
2665 bytes_methods, /* tp_methods */
2666 0, /* tp_members */
2667 0, /* tp_getset */
2668 0, /* tp_base */
2669 0, /* tp_dict */
2670 0, /* tp_descr_get */
2671 0, /* tp_descr_set */
2672 0, /* tp_dictoffset */
2673 (initproc)bytes_init, /* tp_init */
2674 PyType_GenericAlloc, /* tp_alloc */
2675 PyType_GenericNew, /* tp_new */
2676 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002677};