blob: e6ab440caa952d74c05d19d42fb4d996652aa054 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroua57aae72010-06-09 16:58:35 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
44 For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
45 parameter `size' denotes number of characters to allocate, not counting any
46 null terminating character.
47
48 For PyBytes_FromString(), the parameter `str' points to a null-terminated
49 string containing exactly `size' bytes.
50
51 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
52 either NULL or else points to a string containing at least `size' bytes.
53 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
54 not have to be null-terminated. (Therefore it is safe to construct a
55 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
56 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
57 bytes (setting the last byte to the null terminating character) and you can
58 fill in the data yourself. If `str' is non-NULL then the resulting
59 PyString object must be treated as immutable and you must not fill in nor
60 alter the data yourself, since the strings may be shared.
61
62 The PyObject member `op->ob_size', which denotes the number of "extra
63 items" in a variable-size object, will contain the number of bytes
64 allocated for string data, not counting the null terminating character. It
65 is therefore equal to the equal to the `size' parameter (for
66 PyBytes_FromStringAndSize()) or the length of the string in the `str'
67 parameter (for PyBytes_FromString()).
68*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000069PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000071{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000072 register PyBytesObject *op;
73 if (size < 0) {
74 PyErr_SetString(PyExc_SystemError,
75 "Negative size passed to PyBytes_FromStringAndSize");
76 return NULL;
77 }
78 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000079#ifdef COUNT_ALLOCS
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000080 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000081#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000082 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85 if (size == 1 && str != NULL &&
86 (op = characters[*str & UCHAR_MAX]) != NULL)
87 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000088#ifdef COUNT_ALLOCS
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000089 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000091 Py_INCREF(op);
92 return (PyObject *)op;
93 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000094
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000095 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
96 PyErr_SetString(PyExc_OverflowError,
97 "byte string is too large");
98 return NULL;
99 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +0000100
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000101 /* Inline PyObject_NewVar */
102 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
103 if (op == NULL)
104 return PyErr_NoMemory();
105 PyObject_INIT_VAR(op, &PyBytes_Type, size);
106 op->ob_shash = -1;
107 if (str != NULL)
108 Py_MEMCPY(op->ob_sval, str, size);
109 op->ob_sval[size] = '\0';
110 /* share short strings */
111 if (size == 0) {
112 nullstring = op;
113 Py_INCREF(op);
114 } else if (size == 1 && str != NULL) {
115 characters[*str & UCHAR_MAX] = op;
116 Py_INCREF(op);
117 }
118 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000119}
120
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000121PyObject *
122PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000123{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000124 register size_t size;
125 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000126
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000127 assert(str != NULL);
128 size = strlen(str);
129 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
130 PyErr_SetString(PyExc_OverflowError,
131 "byte string is too long");
132 return NULL;
133 }
134 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000135#ifdef COUNT_ALLOCS
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000136 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000137#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
141 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#ifdef COUNT_ALLOCS
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000143 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000144#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000148
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000149 /* Inline PyObject_NewVar */
150 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
151 if (op == NULL)
152 return PyErr_NoMemory();
153 PyObject_INIT_VAR(op, &PyBytes_Type, size);
154 op->ob_shash = -1;
155 Py_MEMCPY(op->ob_sval, str, size+1);
156 /* share short strings */
157 if (size == 0) {
158 nullstring = op;
159 Py_INCREF(op);
160 } else if (size == 1) {
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000165}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000166
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167PyObject *
168PyBytes_FromFormatV(const char *format, va_list vargs)
169{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000175
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000176#ifdef VA_LIST_IS_ARRAY
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000177 Py_MEMCPY(count, vargs, sizeof(va_list));
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000178#else
179#ifdef __va_copy
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000180 __va_copy(count, vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000181#else
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000182 count = vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000183#endif
184#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
187 if (*f == '%') {
188 const char* p = f;
189 while (*++f && *f != '%' && !ISALPHA(*f))
190 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000191
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000192 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
193 * they don't affect the amount of space we reserve.
194 */
195 if ((*f == 'l' || *f == 'z') &&
196 (f[1] == 'd' || f[1] == 'u'))
197 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000198
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000199 switch (*f) {
200 case 'c':
201 (void)va_arg(count, int);
202 /* fall through... */
203 case '%':
204 n++;
205 break;
206 case 'd': case 'u': case 'i': case 'x':
207 (void) va_arg(count, int);
208 /* 20 bytes is enough to hold a 64-bit
209 integer. Decimal takes the most space.
210 This isn't enough for octal. */
211 n += 20;
212 break;
213 case 's':
214 s = va_arg(count, char*);
215 n += strlen(s);
216 break;
217 case 'p':
218 (void) va_arg(count, int);
219 /* maximum 64-bit pointer representation:
220 * 0xffffffffffffffff
221 * so 19 characters is enough.
222 * XXX I count 18 -- what's the extra for?
223 */
224 n += 19;
225 break;
226 default:
227 /* if we stumble upon an unknown
228 formatting code, copy the rest of
229 the format string to the output
230 string. (we cannot just skip the
231 code, since there's no way to know
232 what's in the argument list) */
233 n += strlen(p);
234 goto expand;
235 }
236 } else
237 n++;
238 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000239 expand:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000240 /* step 2: fill the buffer */
241 /* Since we've analyzed how much space we need for the worst case,
242 use sprintf directly instead of the slower PyOS_snprintf. */
243 string = PyBytes_FromStringAndSize(NULL, n);
244 if (!string)
245 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000246
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000247 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000248
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000249 for (f = format; *f; f++) {
250 if (*f == '%') {
251 const char* p = f++;
252 Py_ssize_t i;
253 int longflag = 0;
254 int size_tflag = 0;
255 /* parse the width.precision part (we're only
256 interested in the precision value, if any) */
257 n = 0;
258 while (ISDIGIT(*f))
259 n = (n*10) + *f++ - '0';
260 if (*f == '.') {
261 f++;
262 n = 0;
263 while (ISDIGIT(*f))
264 n = (n*10) + *f++ - '0';
265 }
266 while (*f && *f != '%' && !ISALPHA(*f))
267 f++;
268 /* handle the long flag, but only for %ld and %lu.
269 others can be added when necessary. */
270 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
271 longflag = 1;
272 ++f;
273 }
274 /* handle the size_t flag. */
275 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
276 size_tflag = 1;
277 ++f;
278 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000279
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000280 switch (*f) {
281 case 'c':
282 *s++ = va_arg(vargs, int);
283 break;
284 case 'd':
285 if (longflag)
286 sprintf(s, "%ld", va_arg(vargs, long));
287 else if (size_tflag)
288 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
289 va_arg(vargs, Py_ssize_t));
290 else
291 sprintf(s, "%d", va_arg(vargs, int));
292 s += strlen(s);
293 break;
294 case 'u':
295 if (longflag)
296 sprintf(s, "%lu",
297 va_arg(vargs, unsigned long));
298 else if (size_tflag)
299 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
300 va_arg(vargs, size_t));
301 else
302 sprintf(s, "%u",
303 va_arg(vargs, unsigned int));
304 s += strlen(s);
305 break;
306 case 'i':
307 sprintf(s, "%i", va_arg(vargs, int));
308 s += strlen(s);
309 break;
310 case 'x':
311 sprintf(s, "%x", va_arg(vargs, int));
312 s += strlen(s);
313 break;
314 case 's':
315 p = va_arg(vargs, char*);
316 i = strlen(p);
317 if (n > 0 && i > n)
318 i = n;
319 Py_MEMCPY(s, p, i);
320 s += i;
321 break;
322 case 'p':
323 sprintf(s, "%p", va_arg(vargs, void*));
324 /* %p is ill-defined: ensure leading 0x. */
325 if (s[1] == 'X')
326 s[1] = 'x';
327 else if (s[1] != 'x') {
328 memmove(s+2, s, strlen(s)+1);
329 s[0] = '0';
330 s[1] = 'x';
331 }
332 s += strlen(s);
333 break;
334 case '%':
335 *s++ = '%';
336 break;
337 default:
338 strcpy(s, p);
339 s += strlen(s);
340 goto end;
341 }
342 } else
343 *s++ = *f;
344 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000345
346 end:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000347 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
348 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000349}
350
351PyObject *
352PyBytes_FromFormat(const char *format, ...)
353{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000354 PyObject* ret;
355 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000356
357#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000358 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000359#else
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000360 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000362 ret = PyBytes_FromFormatV(format, vargs);
363 va_end(vargs);
364 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000365}
366
367static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000368bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000369{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000370 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000371}
372
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000373/* Unescape a backslash-escaped string. If unicode is non-zero,
374 the string is a u-literal. If recode_encoding is non-zero,
375 the string is UTF-8 encoded and should be re-encoded in the
376 specified encoding. */
377
378PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000379 Py_ssize_t len,
380 const char *errors,
381 Py_ssize_t unicode,
382 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000383{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000384 int c;
385 char *p, *buf;
386 const char *end;
387 PyObject *v;
388 Py_ssize_t newlen = recode_encoding ? 4*len:len;
389 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
390 if (v == NULL)
391 return NULL;
392 p = buf = PyBytes_AsString(v);
393 end = s + len;
394 while (s < end) {
395 if (*s != '\\') {
396 non_esc:
397 if (recode_encoding && (*s & 0x80)) {
398 PyObject *u, *w;
399 char *r;
400 const char* t;
401 Py_ssize_t rn;
402 t = s;
403 /* Decode non-ASCII bytes as UTF-8. */
404 while (t < end && (*t & 0x80)) t++;
405 u = PyUnicode_DecodeUTF8(s, t - s, errors);
406 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000407
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000408 /* Recode them in target encoding. */
409 w = PyUnicode_AsEncodedString(
410 u, recode_encoding, errors);
411 Py_DECREF(u);
412 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000413
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000414 /* Append bytes to output buffer. */
415 assert(PyBytes_Check(w));
416 r = PyBytes_AS_STRING(w);
417 rn = PyBytes_GET_SIZE(w);
418 Py_MEMCPY(p, r, rn);
419 p += rn;
420 Py_DECREF(w);
421 s = t;
422 } else {
423 *p++ = *s++;
424 }
425 continue;
426 }
427 s++;
428 if (s==end) {
429 PyErr_SetString(PyExc_ValueError,
430 "Trailing \\ in string");
431 goto failed;
432 }
433 switch (*s++) {
434 /* XXX This assumes ASCII! */
435 case '\n': break;
436 case '\\': *p++ = '\\'; break;
437 case '\'': *p++ = '\''; break;
438 case '\"': *p++ = '\"'; break;
439 case 'b': *p++ = '\b'; break;
440 case 'f': *p++ = '\014'; break; /* FF */
441 case 't': *p++ = '\t'; break;
442 case 'n': *p++ = '\n'; break;
443 case 'r': *p++ = '\r'; break;
444 case 'v': *p++ = '\013'; break; /* VT */
445 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
446 case '0': case '1': case '2': case '3':
447 case '4': case '5': case '6': case '7':
448 c = s[-1] - '0';
449 if (s < end && '0' <= *s && *s <= '7') {
450 c = (c<<3) + *s++ - '0';
451 if (s < end && '0' <= *s && *s <= '7')
452 c = (c<<3) + *s++ - '0';
453 }
454 *p++ = c;
455 break;
456 case 'x':
457 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
458 unsigned int x = 0;
459 c = Py_CHARMASK(*s);
460 s++;
461 if (ISDIGIT(c))
462 x = c - '0';
463 else if (ISLOWER(c))
464 x = 10 + c - 'a';
465 else
466 x = 10 + c - 'A';
467 x = x << 4;
468 c = Py_CHARMASK(*s);
469 s++;
470 if (ISDIGIT(c))
471 x += c - '0';
472 else if (ISLOWER(c))
473 x += 10 + c - 'a';
474 else
475 x += 10 + c - 'A';
476 *p++ = x;
477 break;
478 }
479 if (!errors || strcmp(errors, "strict") == 0) {
480 PyErr_SetString(PyExc_ValueError,
481 "invalid \\x escape");
482 goto failed;
483 }
484 if (strcmp(errors, "replace") == 0) {
485 *p++ = '?';
486 } else if (strcmp(errors, "ignore") == 0)
487 /* do nothing */;
488 else {
489 PyErr_Format(PyExc_ValueError,
490 "decoding error; unknown "
491 "error handling code: %.400s",
492 errors);
493 goto failed;
494 }
495 default:
496 *p++ = '\\';
497 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200498 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000499 UTF-8 bytes may follow. */
500 }
501 }
502 if (p-buf < newlen)
503 _PyBytes_Resize(&v, p - buf);
504 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000505 failed:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000506 Py_DECREF(v);
507 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000508}
509
510/* -------------------------------------------------------------------- */
511/* object api */
512
513Py_ssize_t
514PyBytes_Size(register PyObject *op)
515{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000516 if (!PyBytes_Check(op)) {
517 PyErr_Format(PyExc_TypeError,
518 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
519 return -1;
520 }
521 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000522}
523
524char *
525PyBytes_AsString(register PyObject *op)
526{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000527 if (!PyBytes_Check(op)) {
528 PyErr_Format(PyExc_TypeError,
529 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
530 return NULL;
531 }
532 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000533}
534
535int
536PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000537 register char **s,
538 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000539{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000540 if (s == NULL) {
541 PyErr_BadInternalCall();
542 return -1;
543 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000544
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000545 if (!PyBytes_Check(obj)) {
546 PyErr_Format(PyExc_TypeError,
547 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
548 return -1;
549 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000550
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000551 *s = PyBytes_AS_STRING(obj);
552 if (len != NULL)
553 *len = PyBytes_GET_SIZE(obj);
554 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
555 PyErr_SetString(PyExc_TypeError,
556 "expected bytes with no null");
557 return -1;
558 }
559 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000560}
Neal Norwitz6968b052007-02-27 19:02:19 +0000561
562/* -------------------------------------------------------------------- */
563/* Methods */
564
Eric Smith0923d1d2009-04-16 20:16:10 +0000565#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000566#define STRINGLIB_CHAR char
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000567
Neal Norwitz6968b052007-02-27 19:02:19 +0000568#define STRINGLIB_CMP memcmp
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000569#define STRINGLIB_LEN PyBytes_GET_SIZE
570#define STRINGLIB_NEW PyBytes_FromStringAndSize
571#define STRINGLIB_STR PyBytes_AS_STRING
572/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
573
574#define STRINGLIB_EMPTY nullstring
575#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
576#define STRINGLIB_MUTABLE 0
Neal Norwitz6968b052007-02-27 19:02:19 +0000577
578#include "stringlib/fastsearch.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000579
Neal Norwitz6968b052007-02-27 19:02:19 +0000580#include "stringlib/count.h"
581#include "stringlib/find.h"
582#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000583#include "stringlib/ctype.h"
584#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000585
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000586#define _Py_InsertThousandsGrouping _PyBytes_InsertThousandsGrouping
Eric Smitha3b1ac82009-04-03 14:45:06 +0000587#define _Py_InsertThousandsGroupingLocale _PyBytes_InsertThousandsGroupingLocale
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000588#include "stringlib/localeutil.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000589
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000590PyObject *
591PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000592{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000593 static const char *hexdigits = "0123456789abcdef";
594 register PyBytesObject* op = (PyBytesObject*) obj;
595 Py_ssize_t length = Py_SIZE(op);
596 size_t newsize = 3 + 4 * length;
597 PyObject *v;
598 if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
599 PyErr_SetString(PyExc_OverflowError,
600 "bytes object is too large to make repr");
601 return NULL;
602 }
603 v = PyUnicode_FromUnicode(NULL, newsize);
604 if (v == NULL) {
605 return NULL;
606 }
607 else {
608 register Py_ssize_t i;
609 register Py_UNICODE c;
610 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
611 int quote;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000612
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000613 /* Figure out which quote to use; single is preferred */
614 quote = '\'';
615 if (smartquotes) {
616 char *test, *start;
617 start = PyBytes_AS_STRING(op);
618 for (test = start; test < start+length; ++test) {
619 if (*test == '"') {
620 quote = '\''; /* back to single */
621 goto decided;
622 }
623 else if (*test == '\'')
624 quote = '"';
625 }
626 decided:
627 ;
628 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000629
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000630 *p++ = 'b', *p++ = quote;
631 for (i = 0; i < length; i++) {
632 /* There's at least enough room for a hex escape
633 and a closing quote. */
634 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
635 c = op->ob_sval[i];
636 if (c == quote || c == '\\')
637 *p++ = '\\', *p++ = c;
638 else if (c == '\t')
639 *p++ = '\\', *p++ = 't';
640 else if (c == '\n')
641 *p++ = '\\', *p++ = 'n';
642 else if (c == '\r')
643 *p++ = '\\', *p++ = 'r';
644 else if (c < ' ' || c >= 0x7f) {
645 *p++ = '\\';
646 *p++ = 'x';
647 *p++ = hexdigits[(c & 0xf0) >> 4];
648 *p++ = hexdigits[c & 0xf];
649 }
650 else
651 *p++ = c;
652 }
653 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
654 *p++ = quote;
655 *p = '\0';
656 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
657 Py_DECREF(v);
658 return NULL;
659 }
660 return v;
661 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000662}
663
Neal Norwitz6968b052007-02-27 19:02:19 +0000664static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000665bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000666{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000667 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000668}
669
Neal Norwitz6968b052007-02-27 19:02:19 +0000670static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000671bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000672{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000673 if (Py_BytesWarningFlag) {
674 if (PyErr_WarnEx(PyExc_BytesWarning,
675 "str() on a bytes instance", 1))
676 return NULL;
677 }
678 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000679}
680
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000681static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000682bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000683{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000684 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000685}
Neal Norwitz6968b052007-02-27 19:02:19 +0000686
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000687/* This is also used by PyBytes_Concat() */
688static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000689bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000690{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000691 Py_ssize_t size;
692 Py_buffer va, vb;
693 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000694
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000695 va.len = -1;
696 vb.len = -1;
697 if (_getbuffer(a, &va) < 0 ||
698 _getbuffer(b, &vb) < 0) {
699 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
700 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
701 goto done;
702 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000703
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000704 /* Optimize end cases */
705 if (va.len == 0 && PyBytes_CheckExact(b)) {
706 result = b;
707 Py_INCREF(result);
708 goto done;
709 }
710 if (vb.len == 0 && PyBytes_CheckExact(a)) {
711 result = a;
712 Py_INCREF(result);
713 goto done;
714 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000715
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000716 size = va.len + vb.len;
717 if (size < 0) {
718 PyErr_NoMemory();
719 goto done;
720 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000721
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000722 result = PyBytes_FromStringAndSize(NULL, size);
723 if (result != NULL) {
724 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
725 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
726 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000727
728 done:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000729 if (va.len != -1)
730 PyBuffer_Release(&va);
731 if (vb.len != -1)
732 PyBuffer_Release(&vb);
733 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000734}
Neal Norwitz6968b052007-02-27 19:02:19 +0000735
736static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000737bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000738{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000739 register Py_ssize_t i;
740 register Py_ssize_t j;
741 register Py_ssize_t size;
742 register PyBytesObject *op;
743 size_t nbytes;
744 if (n < 0)
745 n = 0;
746 /* watch out for overflows: the size can overflow int,
747 * and the # of bytes needed can overflow size_t
748 */
749 size = Py_SIZE(a) * n;
750 if (n && size / n != Py_SIZE(a)) {
751 PyErr_SetString(PyExc_OverflowError,
752 "repeated bytes are too long");
753 return NULL;
754 }
755 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
756 Py_INCREF(a);
757 return (PyObject *)a;
758 }
759 nbytes = (size_t)size;
760 if (nbytes + PyBytesObject_SIZE <= nbytes) {
761 PyErr_SetString(PyExc_OverflowError,
762 "repeated bytes are too long");
763 return NULL;
764 }
765 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
766 if (op == NULL)
767 return PyErr_NoMemory();
768 PyObject_INIT_VAR(op, &PyBytes_Type, size);
769 op->ob_shash = -1;
770 op->ob_sval[size] = '\0';
771 if (Py_SIZE(a) == 1 && n > 0) {
772 memset(op->ob_sval, a->ob_sval[0] , n);
773 return (PyObject *) op;
774 }
775 i = 0;
776 if (i < size) {
777 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
778 i = Py_SIZE(a);
779 }
780 while (i < size) {
781 j = (i <= size-i) ? i : size-i;
782 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
783 i += j;
784 }
785 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000786}
787
Guido van Rossum98297ee2007-11-06 21:34:58 +0000788static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000789bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000790{
791 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
792 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroua57aae72010-06-09 16:58:35 +0000793 Py_buffer varg;
Antoine Pitroubc760d92010-08-15 17:46:50 +0000794 Py_ssize_t pos;
Antoine Pitroua57aae72010-06-09 16:58:35 +0000795 PyErr_Clear();
796 if (_getbuffer(arg, &varg) < 0)
797 return -1;
798 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
799 varg.buf, varg.len, 0);
800 PyBuffer_Release(&varg);
801 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000802 }
803 if (ival < 0 || ival >= 256) {
Antoine Pitroua57aae72010-06-09 16:58:35 +0000804 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
805 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000806 }
807
Antoine Pitroubc760d92010-08-15 17:46:50 +0000808 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000809}
810
Neal Norwitz6968b052007-02-27 19:02:19 +0000811static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000812bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000813{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000814 if (i < 0 || i >= Py_SIZE(a)) {
815 PyErr_SetString(PyExc_IndexError, "index out of range");
816 return NULL;
817 }
818 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000819}
820
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000821static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000822bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000823{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000824 int c;
825 Py_ssize_t len_a, len_b;
826 Py_ssize_t min_len;
827 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000828
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000829 /* Make sure both arguments are strings. */
830 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
831 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
832 (PyObject_IsInstance((PyObject*)a,
833 (PyObject*)&PyUnicode_Type) ||
834 PyObject_IsInstance((PyObject*)b,
835 (PyObject*)&PyUnicode_Type))) {
836 if (PyErr_WarnEx(PyExc_BytesWarning,
837 "Comparison between bytes and string", 1))
838 return NULL;
839 }
840 result = Py_NotImplemented;
841 goto out;
842 }
843 if (a == b) {
844 switch (op) {
845 case Py_EQ:case Py_LE:case Py_GE:
846 result = Py_True;
847 goto out;
848 case Py_NE:case Py_LT:case Py_GT:
849 result = Py_False;
850 goto out;
851 }
852 }
853 if (op == Py_EQ) {
854 /* Supporting Py_NE here as well does not save
855 much time, since Py_NE is rarely used. */
856 if (Py_SIZE(a) == Py_SIZE(b)
857 && (a->ob_sval[0] == b->ob_sval[0]
858 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
859 result = Py_True;
860 } else {
861 result = Py_False;
862 }
863 goto out;
864 }
865 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
866 min_len = (len_a < len_b) ? len_a : len_b;
867 if (min_len > 0) {
868 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
869 if (c==0)
870 c = memcmp(a->ob_sval, b->ob_sval, min_len);
871 } else
872 c = 0;
873 if (c == 0)
874 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
875 switch (op) {
876 case Py_LT: c = c < 0; break;
877 case Py_LE: c = c <= 0; break;
878 case Py_EQ: assert(0); break; /* unreachable */
879 case Py_NE: c = c != 0; break;
880 case Py_GT: c = c > 0; break;
881 case Py_GE: c = c >= 0; break;
882 default:
883 result = Py_NotImplemented;
884 goto out;
885 }
886 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000887 out:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000888 Py_INCREF(result);
889 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000890}
891
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000892static long
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000893bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000894{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000895 register Py_ssize_t len;
896 register unsigned char *p;
897 register long x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000898
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000899 if (a->ob_shash != -1)
900 return a->ob_shash;
901 len = Py_SIZE(a);
Georg Brandl2daf6ae2012-02-20 19:54:16 +0100902 /*
903 We make the hash of the empty string be 0, rather than using
904 (prefix ^ suffix), since this slightly obfuscates the hash secret
905 */
906 if (len == 0) {
907 a->ob_shash = 0;
908 return 0;
909 }
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000910 p = (unsigned char *) a->ob_sval;
Georg Brandl2daf6ae2012-02-20 19:54:16 +0100911 x = _Py_HashSecret.prefix;
912 x ^= *p << 7;
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000913 while (--len >= 0)
914 x = (1000003*x) ^ *p++;
915 x ^= Py_SIZE(a);
Georg Brandl2daf6ae2012-02-20 19:54:16 +0100916 x ^= _Py_HashSecret.suffix;
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000917 if (x == -1)
918 x = -2;
919 a->ob_shash = x;
920 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000921}
922
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000923static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000924bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000925{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000926 if (PyIndex_Check(item)) {
927 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
928 if (i == -1 && PyErr_Occurred())
929 return NULL;
930 if (i < 0)
931 i += PyBytes_GET_SIZE(self);
932 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
933 PyErr_SetString(PyExc_IndexError,
934 "index out of range");
935 return NULL;
936 }
937 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
938 }
939 else if (PySlice_Check(item)) {
940 Py_ssize_t start, stop, step, slicelength, cur, i;
941 char* source_buf;
942 char* result_buf;
943 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000944
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000945 if (PySlice_GetIndicesEx((PySliceObject*)item,
946 PyBytes_GET_SIZE(self),
947 &start, &stop, &step, &slicelength) < 0) {
948 return NULL;
949 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000950
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000951 if (slicelength <= 0) {
952 return PyBytes_FromStringAndSize("", 0);
953 }
954 else if (start == 0 && step == 1 &&
955 slicelength == PyBytes_GET_SIZE(self) &&
956 PyBytes_CheckExact(self)) {
957 Py_INCREF(self);
958 return (PyObject *)self;
959 }
960 else if (step == 1) {
961 return PyBytes_FromStringAndSize(
962 PyBytes_AS_STRING(self) + start,
963 slicelength);
964 }
965 else {
966 source_buf = PyBytes_AS_STRING(self);
967 result = PyBytes_FromStringAndSize(NULL, slicelength);
968 if (result == NULL)
969 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000970
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000971 result_buf = PyBytes_AS_STRING(result);
972 for (cur = start, i = 0; i < slicelength;
973 cur += step, i++) {
974 result_buf[i] = source_buf[cur];
975 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000976
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000977 return result;
978 }
979 }
980 else {
981 PyErr_Format(PyExc_TypeError,
982 "byte indices must be integers, not %.200s",
983 Py_TYPE(item)->tp_name);
984 return NULL;
985 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000986}
987
988static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000989bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000990{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000991 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
992 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000993}
994
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000995static PySequenceMethods bytes_as_sequence = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000996 (lenfunc)bytes_length, /*sq_length*/
997 (binaryfunc)bytes_concat, /*sq_concat*/
998 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
999 (ssizeargfunc)bytes_item, /*sq_item*/
1000 0, /*sq_slice*/
1001 0, /*sq_ass_item*/
1002 0, /*sq_ass_slice*/
1003 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001004};
1005
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001006static PyMappingMethods bytes_as_mapping = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001007 (lenfunc)bytes_length,
1008 (binaryfunc)bytes_subscript,
1009 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001010};
1011
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001012static PyBufferProcs bytes_as_buffer = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001013 (getbufferproc)bytes_buffer_getbuffer,
1014 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001015};
1016
1017
1018#define LEFTSTRIP 0
1019#define RIGHTSTRIP 1
1020#define BOTHSTRIP 2
1021
1022/* Arrays indexed by above */
1023static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1024
1025#define STRIPNAME(i) (stripformat[i]+3)
1026
Neal Norwitz6968b052007-02-27 19:02:19 +00001027
1028/* Don't call if length < 2 */
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001029#define Py_STRING_MATCH(target, offset, pattern, length) \
1030 (target[offset] == pattern[0] && \
1031 target[offset+length-1] == pattern[length-1] && \
Neal Norwitz6968b052007-02-27 19:02:19 +00001032 !memcmp(target+offset+1, pattern+1, length-2) )
1033
1034
Neal Norwitz6968b052007-02-27 19:02:19 +00001035/* Overallocate the initial list to reduce the number of reallocs for small
1036 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1037 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1038 text (roughly 11 words per line) and field delimited data (usually 1-10
1039 fields). For large strings the split algorithms are bandwidth limited
1040 so increasing the preallocation likely will not improve things.*/
1041
1042#define MAX_PREALLOC 12
1043
1044/* 5 splits gives 6 elements */
1045#define PREALLOC_SIZE(maxsplit) \
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001046 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Neal Norwitz6968b052007-02-27 19:02:19 +00001047
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001048#define SPLIT_ADD(data, left, right) { \
1049 str = PyBytes_FromStringAndSize((data) + (left), \
1050 (right) - (left)); \
1051 if (str == NULL) \
1052 goto onError; \
1053 if (count < MAX_PREALLOC) { \
1054 PyList_SET_ITEM(list, count, str); \
1055 } else { \
1056 if (PyList_Append(list, str)) { \
1057 Py_DECREF(str); \
1058 goto onError; \
1059 } \
1060 else \
1061 Py_DECREF(str); \
1062 } \
1063 count++; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001064
1065/* Always force the list to the expected size. */
Christian Heimes90aa7642007-12-19 02:45:37 +00001066#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Neal Norwitz6968b052007-02-27 19:02:19 +00001067
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001068#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1069#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1070#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1071#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001072
1073Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001074split_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001075{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001076 const char *s = PyBytes_AS_STRING(self);
1077 Py_ssize_t i, j, count=0;
1078 PyObject *str;
1079 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001080
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001081 if (list == NULL)
1082 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001083
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001084 i = j = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00001085
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001086 while (maxsplit-- > 0) {
1087 SKIP_SPACE(s, i, len);
1088 if (i==len) break;
1089 j = i; i++;
1090 SKIP_NONSPACE(s, i, len);
1091 if (j == 0 && i == len && PyBytes_CheckExact(self)) {
1092 /* No whitespace in self, so just use it as list[0] */
1093 Py_INCREF(self);
1094 PyList_SET_ITEM(list, 0, (PyObject *)self);
1095 count++;
1096 break;
1097 }
1098 SPLIT_ADD(s, j, i);
1099 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001100
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001101 if (i < len) {
1102 /* Only occurs when maxsplit was reached */
1103 /* Skip any remaining whitespace and copy to end of string */
1104 SKIP_SPACE(s, i, len);
1105 if (i != len)
1106 SPLIT_ADD(s, i, len);
1107 }
1108 FIX_PREALLOC_SIZE(list);
1109 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001110 onError:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001111 Py_DECREF(list);
1112 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001113}
1114
Guido van Rossum8f950672007-09-10 16:53:45 +00001115Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001116split_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001117{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001118 const char *s = PyBytes_AS_STRING(self);
1119 register Py_ssize_t i, j, count=0;
1120 PyObject *str;
1121 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001122
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001123 if (list == NULL)
1124 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001125
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001126 i = j = 0;
1127 while ((j < len) && (maxcount-- > 0)) {
1128 for(; j<len; j++) {
1129 /* I found that using memchr makes no difference */
1130 if (s[j] == ch) {
1131 SPLIT_ADD(s, i, j);
1132 i = j = j + 1;
1133 break;
1134 }
1135 }
1136 }
1137 if (i == 0 && count == 0 && PyBytes_CheckExact(self)) {
1138 /* ch not in self, so just use self as list[0] */
1139 Py_INCREF(self);
1140 PyList_SET_ITEM(list, 0, (PyObject *)self);
1141 count++;
1142 }
1143 else if (i <= len) {
1144 SPLIT_ADD(s, i, len);
1145 }
1146 FIX_PREALLOC_SIZE(list);
1147 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001148
1149 onError:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001150 Py_DECREF(list);
1151 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001152}
1153
Neal Norwitz6968b052007-02-27 19:02:19 +00001154PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001155"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001156\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001157Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001158If sep is not specified or is None, B is split on ASCII whitespace\n\
1159characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001160If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001161
1162static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001163bytes_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001164{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001165 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1166 Py_ssize_t maxsplit = -1, count=0;
1167 const char *s = PyBytes_AS_STRING(self), *sub;
1168 Py_buffer vsub;
1169 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001170#ifdef USE_FAST
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001171 Py_ssize_t pos;
Neal Norwitz6968b052007-02-27 19:02:19 +00001172#endif
1173
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001174 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1175 return NULL;
1176 if (maxsplit < 0)
1177 maxsplit = PY_SSIZE_T_MAX;
1178 if (subobj == Py_None)
1179 return split_whitespace(self, len, maxsplit);
1180 if (_getbuffer(subobj, &vsub) < 0)
1181 return NULL;
1182 sub = vsub.buf;
1183 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001184
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001185 if (n == 0) {
1186 PyErr_SetString(PyExc_ValueError, "empty separator");
1187 PyBuffer_Release(&vsub);
1188 return NULL;
1189 }
1190 else if (n == 1) {
1191 list = split_char(self, len, sub[0], maxsplit);
1192 PyBuffer_Release(&vsub);
1193 return list;
1194 }
Guido van Rossum8f950672007-09-10 16:53:45 +00001195
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001196 list = PyList_New(PREALLOC_SIZE(maxsplit));
1197 if (list == NULL) {
1198 PyBuffer_Release(&vsub);
1199 return NULL;
1200 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001201
1202#ifdef USE_FAST
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001203 i = j = 0;
1204 while (maxsplit-- > 0) {
1205 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1206 if (pos < 0)
1207 break;
1208 j = i+pos;
1209 SPLIT_ADD(s, i, j);
1210 i = j + n;
1211 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001212#else
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001213 i = j = 0;
1214 while ((j+n <= len) && (maxsplit-- > 0)) {
1215 for (; j+n <= len; j++) {
1216 if (Py_STRING_MATCH(s, j, sub, n)) {
1217 SPLIT_ADD(s, i, j);
1218 i = j = j + n;
1219 break;
1220 }
1221 }
1222 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001223#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001224 SPLIT_ADD(s, i, len);
1225 FIX_PREALLOC_SIZE(list);
1226 PyBuffer_Release(&vsub);
1227 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001228
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001229 onError:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001230 Py_DECREF(list);
1231 PyBuffer_Release(&vsub);
1232 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001233}
1234
Neal Norwitz6968b052007-02-27 19:02:19 +00001235PyDoc_STRVAR(partition__doc__,
1236"B.partition(sep) -> (head, sep, tail)\n\
1237\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001238Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001239the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001240found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001241
1242static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001243bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001244{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001245 const char *sep;
1246 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001247
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001248 if (PyBytes_Check(sep_obj)) {
1249 sep = PyBytes_AS_STRING(sep_obj);
1250 sep_len = PyBytes_GET_SIZE(sep_obj);
1251 }
1252 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1253 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001254
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001255 return stringlib_partition(
1256 (PyObject*) self,
1257 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1258 sep_obj, sep, sep_len
1259 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001260}
1261
1262PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti4c81fbb2010-01-25 12:02:24 +00001263"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001264\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001265Search for the separator sep in B, starting at the end of B,\n\
1266and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001267part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001268bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001269
1270static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001271bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001272{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001273 const char *sep;
1274 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001275
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001276 if (PyBytes_Check(sep_obj)) {
1277 sep = PyBytes_AS_STRING(sep_obj);
1278 sep_len = PyBytes_GET_SIZE(sep_obj);
1279 }
1280 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1281 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001282
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001283 return stringlib_rpartition(
1284 (PyObject*) self,
1285 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1286 sep_obj, sep, sep_len
1287 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001288}
1289
1290Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001291rsplit_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001292{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001293 const char *s = PyBytes_AS_STRING(self);
1294 Py_ssize_t i, j, count=0;
1295 PyObject *str;
1296 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001297
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001298 if (list == NULL)
1299 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001300
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001301 i = j = len-1;
Neal Norwitz6968b052007-02-27 19:02:19 +00001302
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001303 while (maxsplit-- > 0) {
1304 RSKIP_SPACE(s, i);
1305 if (i<0) break;
1306 j = i; i--;
1307 RSKIP_NONSPACE(s, i);
1308 if (j == len-1 && i < 0 && PyBytes_CheckExact(self)) {
1309 /* No whitespace in self, so just use it as list[0] */
1310 Py_INCREF(self);
1311 PyList_SET_ITEM(list, 0, (PyObject *)self);
1312 count++;
1313 break;
1314 }
1315 SPLIT_ADD(s, i + 1, j + 1);
1316 }
1317 if (i >= 0) {
1318 /* Only occurs when maxsplit was reached. Skip any remaining
1319 whitespace and copy to beginning of string. */
1320 RSKIP_SPACE(s, i);
1321 if (i >= 0)
1322 SPLIT_ADD(s, 0, i + 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001323
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001324 }
1325 FIX_PREALLOC_SIZE(list);
1326 if (PyList_Reverse(list) < 0)
1327 goto onError;
1328 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001329 onError:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001330 Py_DECREF(list);
1331 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001332}
1333
Guido van Rossum8f950672007-09-10 16:53:45 +00001334Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001335rsplit_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001336{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001337 const char *s = PyBytes_AS_STRING(self);
1338 register Py_ssize_t i, j, count=0;
1339 PyObject *str;
1340 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001341
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001342 if (list == NULL)
1343 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001344
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001345 i = j = len - 1;
1346 while ((i >= 0) && (maxcount-- > 0)) {
1347 for (; i >= 0; i--) {
1348 if (s[i] == ch) {
1349 SPLIT_ADD(s, i + 1, j + 1);
1350 j = i = i - 1;
1351 break;
1352 }
1353 }
1354 }
1355 if (i < 0 && count == 0 && PyBytes_CheckExact(self)) {
1356 /* ch not in self, so just use self as list[0] */
1357 Py_INCREF(self);
1358 PyList_SET_ITEM(list, 0, (PyObject *)self);
1359 count++;
1360 }
1361 else if (j >= -1) {
1362 SPLIT_ADD(s, 0, j + 1);
1363 }
1364 FIX_PREALLOC_SIZE(list);
1365 if (PyList_Reverse(list) < 0)
1366 goto onError;
1367 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001368
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001369 onError:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001370 Py_DECREF(list);
1371 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001372}
1373
Neal Norwitz6968b052007-02-27 19:02:19 +00001374PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001375"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001376\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001377Return a list of the sections in B, using sep as the delimiter,\n\
1378starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001379If sep is not given, B is split on ASCII whitespace characters\n\
1380(space, tab, return, newline, formfeed, vertical tab).\n\
1381If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001382
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001383
Neal Norwitz6968b052007-02-27 19:02:19 +00001384static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001385bytes_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001386{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001387 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1388 Py_ssize_t maxsplit = -1, count=0;
1389 const char *s, *sub;
1390 Py_buffer vsub;
1391 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001392
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001393 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1394 return NULL;
1395 if (maxsplit < 0)
1396 maxsplit = PY_SSIZE_T_MAX;
1397 if (subobj == Py_None)
1398 return rsplit_whitespace(self, len, maxsplit);
1399 if (_getbuffer(subobj, &vsub) < 0)
1400 return NULL;
1401 sub = vsub.buf;
1402 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001403
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001404 if (n == 0) {
1405 PyErr_SetString(PyExc_ValueError, "empty separator");
1406 PyBuffer_Release(&vsub);
1407 return NULL;
1408 }
1409 else if (n == 1) {
1410 list = rsplit_char(self, len, sub[0], maxsplit);
1411 PyBuffer_Release(&vsub);
1412 return list;
1413 }
Guido van Rossum8f950672007-09-10 16:53:45 +00001414
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001415 list = PyList_New(PREALLOC_SIZE(maxsplit));
1416 if (list == NULL) {
1417 PyBuffer_Release(&vsub);
1418 return NULL;
1419 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001420
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001421 j = len;
1422 i = j - n;
Neal Norwitz6968b052007-02-27 19:02:19 +00001423
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001424 s = PyBytes_AS_STRING(self);
1425 while ( (i >= 0) && (maxsplit-- > 0) ) {
1426 for (; i>=0; i--) {
1427 if (Py_STRING_MATCH(s, i, sub, n)) {
1428 SPLIT_ADD(s, i + n, j);
1429 j = i;
1430 i -= n;
1431 break;
1432 }
1433 }
1434 }
1435 SPLIT_ADD(s, 0, j);
1436 FIX_PREALLOC_SIZE(list);
1437 if (PyList_Reverse(list) < 0)
1438 goto onError;
1439 PyBuffer_Release(&vsub);
1440 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001441
1442onError:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001443 Py_DECREF(list);
1444 PyBuffer_Release(&vsub);
1445 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001446}
1447
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001448#undef SPLIT_ADD
1449#undef MAX_PREALLOC
1450#undef PREALLOC_SIZE
1451
1452
1453PyDoc_STRVAR(join__doc__,
1454"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001455\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001456Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001457Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1458
Neal Norwitz6968b052007-02-27 19:02:19 +00001459static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001460bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001461{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001462 char *sep = PyBytes_AS_STRING(self);
1463 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1464 PyObject *res = NULL;
1465 char *p;
1466 Py_ssize_t seqlen = 0;
1467 size_t sz = 0;
1468 Py_ssize_t i;
1469 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001470
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001471 seq = PySequence_Fast(orig, "");
1472 if (seq == NULL) {
1473 return NULL;
1474 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001475
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001476 seqlen = PySequence_Size(seq);
1477 if (seqlen == 0) {
1478 Py_DECREF(seq);
1479 return PyBytes_FromString("");
1480 }
1481 if (seqlen == 1) {
1482 item = PySequence_Fast_GET_ITEM(seq, 0);
1483 if (PyBytes_CheckExact(item)) {
1484 Py_INCREF(item);
1485 Py_DECREF(seq);
1486 return item;
1487 }
1488 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001489
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001490 /* There are at least two things to join, or else we have a subclass
1491 * of the builtin types in the sequence.
1492 * Do a pre-pass to figure out the total amount of space we'll
1493 * need (sz), and see whether all argument are bytes.
1494 */
1495 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1496 for (i = 0; i < seqlen; i++) {
1497 const size_t old_sz = sz;
1498 item = PySequence_Fast_GET_ITEM(seq, i);
1499 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1500 PyErr_Format(PyExc_TypeError,
1501 "sequence item %zd: expected bytes,"
1502 " %.80s found",
1503 i, Py_TYPE(item)->tp_name);
1504 Py_DECREF(seq);
1505 return NULL;
1506 }
1507 sz += Py_SIZE(item);
1508 if (i != 0)
1509 sz += seplen;
1510 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1511 PyErr_SetString(PyExc_OverflowError,
1512 "join() result is too long for bytes");
1513 Py_DECREF(seq);
1514 return NULL;
1515 }
1516 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001517
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001518 /* Allocate result space. */
1519 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1520 if (res == NULL) {
1521 Py_DECREF(seq);
1522 return NULL;
1523 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001524
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001525 /* Catenate everything. */
1526 /* I'm not worried about a PyByteArray item growing because there's
1527 nowhere in this function where we release the GIL. */
1528 p = PyBytes_AS_STRING(res);
1529 for (i = 0; i < seqlen; ++i) {
1530 size_t n;
1531 char *q;
1532 if (i) {
1533 Py_MEMCPY(p, sep, seplen);
1534 p += seplen;
1535 }
1536 item = PySequence_Fast_GET_ITEM(seq, i);
1537 n = Py_SIZE(item);
1538 if (PyBytes_Check(item))
1539 q = PyBytes_AS_STRING(item);
1540 else
1541 q = PyByteArray_AS_STRING(item);
1542 Py_MEMCPY(p, q, n);
1543 p += n;
1544 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001545
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001546 Py_DECREF(seq);
1547 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001548}
1549
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001550PyObject *
1551_PyBytes_Join(PyObject *sep, PyObject *x)
1552{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001553 assert(sep != NULL && PyBytes_Check(sep));
1554 assert(x != NULL);
1555 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001556}
1557
1558Py_LOCAL_INLINE(void)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001559bytes_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001560{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001561 if (*end > len)
1562 *end = len;
1563 else if (*end < 0)
1564 *end += len;
1565 if (*end < 0)
1566 *end = 0;
1567 if (*start < 0)
1568 *start += len;
1569 if (*start < 0)
1570 *start = 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001571}
1572
1573Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001574bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001575{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001576 PyObject *subobj;
1577 const char *sub;
1578 Py_ssize_t sub_len;
1579 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001580
Jesus Ceaac451502011-04-20 17:09:23 +02001581 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1582 args, &subobj, &start, &end))
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001583 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001584
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001585 if (PyBytes_Check(subobj)) {
1586 sub = PyBytes_AS_STRING(subobj);
1587 sub_len = PyBytes_GET_SIZE(subobj);
1588 }
1589 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1590 /* XXX - the "expected a character buffer object" is pretty
1591 confusing for a non-expert. remap to something else ? */
1592 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001593
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001594 if (dir > 0)
1595 return stringlib_find_slice(
1596 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1597 sub, sub_len, start, end);
1598 else
1599 return stringlib_rfind_slice(
1600 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1601 sub, sub_len, start, end);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001602}
1603
1604
1605PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001606"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001607\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001608Return the lowest index in S where substring sub is found,\n\
1609such that sub is contained within s[start:end]. Optional\n\
1610arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001611\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001612Return -1 on failure.");
1613
Neal Norwitz6968b052007-02-27 19:02:19 +00001614static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001615bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001616{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001617 Py_ssize_t result = bytes_find_internal(self, args, +1);
1618 if (result == -2)
1619 return NULL;
1620 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001621}
1622
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001623
1624PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001625"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001626\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001627Like B.find() but raise ValueError when the substring is not found.");
1628
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001629static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001630bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001631{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001632 Py_ssize_t result = bytes_find_internal(self, args, +1);
1633 if (result == -2)
1634 return NULL;
1635 if (result == -1) {
1636 PyErr_SetString(PyExc_ValueError,
1637 "substring not found");
1638 return NULL;
1639 }
1640 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001641}
1642
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001643
1644PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001645"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001646\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001647Return the highest index in B where substring sub is found,\n\
1648such that sub is contained within s[start:end]. Optional\n\
1649arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001650\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001651Return -1 on failure.");
1652
Neal Norwitz6968b052007-02-27 19:02:19 +00001653static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001654bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001655{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001656 Py_ssize_t result = bytes_find_internal(self, args, -1);
1657 if (result == -2)
1658 return NULL;
1659 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001660}
1661
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001662
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001663PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001664"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001665\n\
1666Like B.rfind() but raise ValueError when the substring is not found.");
1667
1668static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001669bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001670{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001671 Py_ssize_t result = bytes_find_internal(self, args, -1);
1672 if (result == -2)
1673 return NULL;
1674 if (result == -1) {
1675 PyErr_SetString(PyExc_ValueError,
1676 "substring not found");
1677 return NULL;
1678 }
1679 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001680}
1681
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001682
1683Py_LOCAL_INLINE(PyObject *)
1684do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001685{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001686 Py_buffer vsep;
1687 char *s = PyBytes_AS_STRING(self);
1688 Py_ssize_t len = PyBytes_GET_SIZE(self);
1689 char *sep;
1690 Py_ssize_t seplen;
1691 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001692
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001693 if (_getbuffer(sepobj, &vsep) < 0)
1694 return NULL;
1695 sep = vsep.buf;
1696 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001697
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001698 i = 0;
1699 if (striptype != RIGHTSTRIP) {
1700 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1701 i++;
1702 }
1703 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001704
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001705 j = len;
1706 if (striptype != LEFTSTRIP) {
1707 do {
1708 j--;
1709 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1710 j++;
1711 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001712
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001713 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001714
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001715 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1716 Py_INCREF(self);
1717 return (PyObject*)self;
1718 }
1719 else
1720 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001721}
1722
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001723
1724Py_LOCAL_INLINE(PyObject *)
1725do_strip(PyBytesObject *self, int striptype)
1726{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001727 char *s = PyBytes_AS_STRING(self);
1728 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001729
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001730 i = 0;
1731 if (striptype != RIGHTSTRIP) {
1732 while (i < len && ISSPACE(s[i])) {
1733 i++;
1734 }
1735 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001736
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001737 j = len;
1738 if (striptype != LEFTSTRIP) {
1739 do {
1740 j--;
1741 } while (j >= i && ISSPACE(s[j]));
1742 j++;
1743 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001744
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001745 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1746 Py_INCREF(self);
1747 return (PyObject*)self;
1748 }
1749 else
1750 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001751}
1752
1753
1754Py_LOCAL_INLINE(PyObject *)
1755do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1756{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001757 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001758
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001759 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1760 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001761
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001762 if (sep != NULL && sep != Py_None) {
1763 return do_xstrip(self, striptype, sep);
1764 }
1765 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001766}
1767
1768
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001769PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001770"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001771\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001772Strip leading and trailing bytes contained in the argument.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001773If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001774static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001775bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001776{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001777 if (PyTuple_GET_SIZE(args) == 0)
1778 return do_strip(self, BOTHSTRIP); /* Common case */
1779 else
1780 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001781}
1782
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001783
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001784PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001785"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001786\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001787Strip leading bytes contained in the argument.\n\
1788If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001789static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001790bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001791{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001792 if (PyTuple_GET_SIZE(args) == 0)
1793 return do_strip(self, LEFTSTRIP); /* Common case */
1794 else
1795 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001796}
1797
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001798
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001799PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001800"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001801\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001802Strip trailing bytes contained in the argument.\n\
1803If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001804static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001805bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001806{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001807 if (PyTuple_GET_SIZE(args) == 0)
1808 return do_strip(self, RIGHTSTRIP); /* Common case */
1809 else
1810 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001811}
Neal Norwitz6968b052007-02-27 19:02:19 +00001812
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001813
1814PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001815"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001816\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001817Return the number of non-overlapping occurrences of substring sub in\n\
1818string S[start:end]. Optional arguments start and end are interpreted\n\
1819as in slice notation.");
1820
1821static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001822bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001823{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001824 PyObject *sub_obj;
1825 const char *str = PyBytes_AS_STRING(self), *sub;
1826 Py_ssize_t sub_len;
1827 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001828
Jesus Ceaac451502011-04-20 17:09:23 +02001829 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001830 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001831
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001832 if (PyBytes_Check(sub_obj)) {
1833 sub = PyBytes_AS_STRING(sub_obj);
1834 sub_len = PyBytes_GET_SIZE(sub_obj);
1835 }
1836 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1837 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001838
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001839 bytes_adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001840
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001841 return PyLong_FromSsize_t(
1842 stringlib_count(str + start, end - start, sub, sub_len)
1843 );
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001844}
1845
1846
1847PyDoc_STRVAR(translate__doc__,
1848"B.translate(table[, deletechars]) -> bytes\n\
1849\n\
1850Return a copy of B, where all characters occurring in the\n\
1851optional argument deletechars are removed, and the remaining\n\
1852characters have been mapped through the given translation\n\
1853table, which must be a bytes object of length 256.");
1854
1855static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001856bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001857{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001858 register char *input, *output;
1859 const char *table;
1860 register Py_ssize_t i, c, changed = 0;
1861 PyObject *input_obj = (PyObject*)self;
1862 const char *output_start, *del_table=NULL;
1863 Py_ssize_t inlen, tablen, dellen = 0;
1864 PyObject *result;
1865 int trans_table[256];
1866 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001867
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001868 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1869 &tableobj, &delobj))
1870 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001871
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001872 if (PyBytes_Check(tableobj)) {
1873 table = PyBytes_AS_STRING(tableobj);
1874 tablen = PyBytes_GET_SIZE(tableobj);
1875 }
1876 else if (tableobj == Py_None) {
1877 table = NULL;
1878 tablen = 256;
1879 }
1880 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1881 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001882
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001883 if (tablen != 256) {
1884 PyErr_SetString(PyExc_ValueError,
1885 "translation table must be 256 characters long");
1886 return NULL;
1887 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001888
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001889 if (delobj != NULL) {
1890 if (PyBytes_Check(delobj)) {
1891 del_table = PyBytes_AS_STRING(delobj);
1892 dellen = PyBytes_GET_SIZE(delobj);
1893 }
1894 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1895 return NULL;
1896 }
1897 else {
1898 del_table = NULL;
1899 dellen = 0;
1900 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001901
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001902 inlen = PyBytes_GET_SIZE(input_obj);
1903 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1904 if (result == NULL)
1905 return NULL;
1906 output_start = output = PyBytes_AsString(result);
1907 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001908
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001909 if (dellen == 0 && table != NULL) {
1910 /* If no deletions are required, use faster code */
1911 for (i = inlen; --i >= 0; ) {
1912 c = Py_CHARMASK(*input++);
1913 if (Py_CHARMASK((*output++ = table[c])) != c)
1914 changed = 1;
1915 }
1916 if (changed || !PyBytes_CheckExact(input_obj))
1917 return result;
1918 Py_DECREF(result);
1919 Py_INCREF(input_obj);
1920 return input_obj;
1921 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001922
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001923 if (table == NULL) {
1924 for (i = 0; i < 256; i++)
1925 trans_table[i] = Py_CHARMASK(i);
1926 } else {
1927 for (i = 0; i < 256; i++)
1928 trans_table[i] = Py_CHARMASK(table[i]);
1929 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001930
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001931 for (i = 0; i < dellen; i++)
1932 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001933
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001934 for (i = inlen; --i >= 0; ) {
1935 c = Py_CHARMASK(*input++);
1936 if (trans_table[c] != -1)
1937 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1938 continue;
1939 changed = 1;
1940 }
1941 if (!changed && PyBytes_CheckExact(input_obj)) {
1942 Py_DECREF(result);
1943 Py_INCREF(input_obj);
1944 return input_obj;
1945 }
1946 /* Fix the size of the resulting string */
1947 if (inlen > 0)
1948 _PyBytes_Resize(&result, output - output_start);
1949 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001950}
1951
1952
Georg Brandlabc38772009-04-12 15:51:51 +00001953static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001954bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001955{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001956 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001957}
1958
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001959#define FORWARD 1
1960#define REVERSE -1
1961
1962/* find and count characters and substrings */
1963
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001964#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001965 ((char *)memchr((const void *)(target), c, target_len))
1966
1967/* String ops must return a string. */
1968/* If the object is subclass of string, create a copy */
1969Py_LOCAL(PyBytesObject *)
1970return_self(PyBytesObject *self)
1971{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001972 if (PyBytes_CheckExact(self)) {
1973 Py_INCREF(self);
1974 return self;
1975 }
1976 return (PyBytesObject *)PyBytes_FromStringAndSize(
1977 PyBytes_AS_STRING(self),
1978 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001979}
1980
1981Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitroubc760d92010-08-15 17:46:50 +00001982countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001983{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001984 Py_ssize_t count=0;
1985 const char *start=target;
1986 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001987
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001988 while ( (start=findchar(start, end-start, c)) != NULL ) {
1989 count++;
1990 if (count >= maxcount)
1991 break;
1992 start += 1;
1993 }
1994 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001995}
1996
1997Py_LOCAL(Py_ssize_t)
1998findstring(const char *target, Py_ssize_t target_len,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001999 const char *pattern, Py_ssize_t pattern_len,
2000 Py_ssize_t start,
2001 Py_ssize_t end,
2002 int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002003{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002004 if (start < 0) {
2005 start += target_len;
2006 if (start < 0)
2007 start = 0;
2008 }
2009 if (end > target_len) {
2010 end = target_len;
2011 } else if (end < 0) {
2012 end += target_len;
2013 if (end < 0)
2014 end = 0;
2015 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002016
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002017 /* zero-length substrings always match at the first attempt */
2018 if (pattern_len == 0)
2019 return (direction > 0) ? start : end;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002020
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002021 end -= pattern_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002022
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002023 if (direction < 0) {
2024 for (; end >= start; end--)
2025 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2026 return end;
2027 } else {
2028 for (; start <= end; start++)
2029 if (Py_STRING_MATCH(target, start,pattern,pattern_len))
2030 return start;
2031 }
2032 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002033}
2034
2035Py_LOCAL_INLINE(Py_ssize_t)
2036countstring(const char *target, Py_ssize_t target_len,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002037 const char *pattern, Py_ssize_t pattern_len,
2038 Py_ssize_t start,
2039 Py_ssize_t end,
2040 int direction, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002041{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002042 Py_ssize_t count=0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002043
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002044 if (start < 0) {
2045 start += target_len;
2046 if (start < 0)
2047 start = 0;
2048 }
2049 if (end > target_len) {
2050 end = target_len;
2051 } else if (end < 0) {
2052 end += target_len;
2053 if (end < 0)
2054 end = 0;
2055 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002056
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002057 /* zero-length substrings match everywhere */
2058 if (pattern_len == 0 || maxcount == 0) {
2059 if (target_len+1 < maxcount)
2060 return target_len+1;
2061 return maxcount;
2062 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002063
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002064 end -= pattern_len;
2065 if (direction < 0) {
2066 for (; (end >= start); end--)
2067 if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
2068 count++;
2069 if (--maxcount <= 0) break;
2070 end -= pattern_len-1;
2071 }
2072 } else {
2073 for (; (start <= end); start++)
2074 if (Py_STRING_MATCH(target, start,
2075 pattern, pattern_len)) {
2076 count++;
2077 if (--maxcount <= 0)
2078 break;
2079 start += pattern_len-1;
2080 }
2081 }
2082 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002083}
2084
2085
2086/* Algorithms for different cases of string replacement */
2087
2088/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2089Py_LOCAL(PyBytesObject *)
2090replace_interleave(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002091 const char *to_s, Py_ssize_t to_len,
2092 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002093{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002094 char *self_s, *result_s;
2095 Py_ssize_t self_len, result_len;
2096 Py_ssize_t count, i, product;
2097 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002098
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002099 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002100
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002101 /* 1 at the end plus 1 after every character */
2102 count = self_len+1;
2103 if (maxcount < count)
2104 count = maxcount;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002105
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002106 /* Check for overflow */
2107 /* result_len = count * to_len + self_len; */
2108 product = count * to_len;
2109 if (product / to_len != count) {
2110 PyErr_SetString(PyExc_OverflowError,
2111 "replacement bytes are too long");
2112 return NULL;
2113 }
2114 result_len = product + self_len;
2115 if (result_len < 0) {
2116 PyErr_SetString(PyExc_OverflowError,
2117 "replacement bytes are too long");
2118 return NULL;
2119 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002120
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002121 if (! (result = (PyBytesObject *)
2122 PyBytes_FromStringAndSize(NULL, result_len)) )
2123 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002124
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002125 self_s = PyBytes_AS_STRING(self);
2126 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002127
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002128 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002129
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002130 /* Lay the first one down (guaranteed this will occur) */
2131 Py_MEMCPY(result_s, to_s, to_len);
2132 result_s += to_len;
2133 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002134
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002135 for (i=0; i<count; i++) {
2136 *result_s++ = *self_s++;
2137 Py_MEMCPY(result_s, to_s, to_len);
2138 result_s += to_len;
2139 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002140
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002141 /* Copy the rest of the original string */
2142 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002143
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002144 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002145}
2146
2147/* Special case for deleting a single character */
2148/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2149Py_LOCAL(PyBytesObject *)
2150replace_delete_single_character(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002151 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002152{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002153 char *self_s, *result_s;
2154 char *start, *next, *end;
2155 Py_ssize_t self_len, result_len;
2156 Py_ssize_t count;
2157 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002158
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002159 self_len = PyBytes_GET_SIZE(self);
2160 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002161
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002162 count = countchar(self_s, self_len, from_c, maxcount);
2163 if (count == 0) {
2164 return return_self(self);
2165 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002166
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002167 result_len = self_len - count; /* from_len == 1 */
2168 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002169
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002170 if ( (result = (PyBytesObject *)
2171 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2172 return NULL;
2173 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002174
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002175 start = self_s;
2176 end = self_s + self_len;
2177 while (count-- > 0) {
2178 next = findchar(start, end-start, from_c);
2179 if (next == NULL)
2180 break;
2181 Py_MEMCPY(result_s, start, next-start);
2182 result_s += (next-start);
2183 start = next+1;
2184 }
2185 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002186
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002187 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002188}
2189
2190/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2191
2192Py_LOCAL(PyBytesObject *)
2193replace_delete_substring(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002194 const char *from_s, Py_ssize_t from_len,
2195 Py_ssize_t maxcount) {
2196 char *self_s, *result_s;
2197 char *start, *next, *end;
2198 Py_ssize_t self_len, result_len;
2199 Py_ssize_t count, offset;
2200 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002201
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002202 self_len = PyBytes_GET_SIZE(self);
2203 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002204
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002205 count = countstring(self_s, self_len,
2206 from_s, from_len,
2207 0, self_len, 1,
2208 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002209
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002210 if (count == 0) {
2211 /* no matches */
2212 return return_self(self);
2213 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002214
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002215 result_len = self_len - (count * from_len);
2216 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002217
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002218 if ( (result = (PyBytesObject *)
2219 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2220 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002221
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002222 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002223
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002224 start = self_s;
2225 end = self_s + self_len;
2226 while (count-- > 0) {
2227 offset = findstring(start, end-start,
2228 from_s, from_len,
2229 0, end-start, FORWARD);
2230 if (offset == -1)
2231 break;
2232 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002233
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002234 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002235
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002236 result_s += (next-start);
2237 start = next+from_len;
2238 }
2239 Py_MEMCPY(result_s, start, end-start);
2240 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002241}
2242
2243/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2244Py_LOCAL(PyBytesObject *)
2245replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002246 char from_c, char to_c,
2247 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002248{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002249 char *self_s, *result_s, *start, *end, *next;
2250 Py_ssize_t self_len;
2251 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002252
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002253 /* The result string will be the same size */
2254 self_s = PyBytes_AS_STRING(self);
2255 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002256
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002257 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002258
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002259 if (next == NULL) {
2260 /* No matches; return the original string */
2261 return return_self(self);
2262 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002263
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002264 /* Need to make a new string */
2265 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2266 if (result == NULL)
2267 return NULL;
2268 result_s = PyBytes_AS_STRING(result);
2269 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002270
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002271 /* change everything in-place, starting with this one */
2272 start = result_s + (next-self_s);
2273 *start = to_c;
2274 start++;
2275 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002276
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002277 while (--maxcount > 0) {
2278 next = findchar(start, end-start, from_c);
2279 if (next == NULL)
2280 break;
2281 *next = to_c;
2282 start = next+1;
2283 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002284
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002285 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002286}
2287
2288/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2289Py_LOCAL(PyBytesObject *)
2290replace_substring_in_place(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002291 const char *from_s, Py_ssize_t from_len,
2292 const char *to_s, Py_ssize_t to_len,
2293 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002294{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002295 char *result_s, *start, *end;
2296 char *self_s;
2297 Py_ssize_t self_len, offset;
2298 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002299
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002300 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002301
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002302 self_s = PyBytes_AS_STRING(self);
2303 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002304
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002305 offset = findstring(self_s, self_len,
2306 from_s, from_len,
2307 0, self_len, FORWARD);
2308 if (offset == -1) {
2309 /* No matches; return the original string */
2310 return return_self(self);
2311 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002312
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002313 /* Need to make a new string */
2314 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2315 if (result == NULL)
2316 return NULL;
2317 result_s = PyBytes_AS_STRING(result);
2318 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002319
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002320 /* change everything in-place, starting with this one */
2321 start = result_s + offset;
2322 Py_MEMCPY(start, to_s, from_len);
2323 start += from_len;
2324 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002325
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002326 while ( --maxcount > 0) {
2327 offset = findstring(start, end-start,
2328 from_s, from_len,
2329 0, end-start, FORWARD);
2330 if (offset==-1)
2331 break;
2332 Py_MEMCPY(start+offset, to_s, from_len);
2333 start += offset+from_len;
2334 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002335
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002336 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002337}
2338
2339/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2340Py_LOCAL(PyBytesObject *)
2341replace_single_character(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002342 char from_c,
2343 const char *to_s, Py_ssize_t to_len,
2344 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002345{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002346 char *self_s, *result_s;
2347 char *start, *next, *end;
2348 Py_ssize_t self_len, result_len;
2349 Py_ssize_t count, product;
2350 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002351
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002352 self_s = PyBytes_AS_STRING(self);
2353 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002354
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002355 count = countchar(self_s, self_len, from_c, maxcount);
2356 if (count == 0) {
2357 /* no matches, return unchanged */
2358 return return_self(self);
2359 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002360
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002361 /* use the difference between current and new, hence the "-1" */
2362 /* result_len = self_len + count * (to_len-1) */
2363 product = count * (to_len-1);
2364 if (product / (to_len-1) != count) {
2365 PyErr_SetString(PyExc_OverflowError,
2366 "replacement bytes are too long");
2367 return NULL;
2368 }
2369 result_len = self_len + product;
2370 if (result_len < 0) {
2371 PyErr_SetString(PyExc_OverflowError,
2372 "replacment bytes are too long");
2373 return NULL;
2374 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002375
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002376 if ( (result = (PyBytesObject *)
2377 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2378 return NULL;
2379 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002380
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002381 start = self_s;
2382 end = self_s + self_len;
2383 while (count-- > 0) {
2384 next = findchar(start, end-start, from_c);
2385 if (next == NULL)
2386 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002387
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002388 if (next == start) {
2389 /* replace with the 'to' */
2390 Py_MEMCPY(result_s, to_s, to_len);
2391 result_s += to_len;
2392 start += 1;
2393 } else {
2394 /* copy the unchanged old then the 'to' */
2395 Py_MEMCPY(result_s, start, next-start);
2396 result_s += (next-start);
2397 Py_MEMCPY(result_s, to_s, to_len);
2398 result_s += to_len;
2399 start = next+1;
2400 }
2401 }
2402 /* Copy the remainder of the remaining string */
2403 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002404
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002405 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002406}
2407
2408/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2409Py_LOCAL(PyBytesObject *)
2410replace_substring(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002411 const char *from_s, Py_ssize_t from_len,
2412 const char *to_s, Py_ssize_t to_len,
2413 Py_ssize_t maxcount) {
2414 char *self_s, *result_s;
2415 char *start, *next, *end;
2416 Py_ssize_t self_len, result_len;
2417 Py_ssize_t count, offset, product;
2418 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002419
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002420 self_s = PyBytes_AS_STRING(self);
2421 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002422
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002423 count = countstring(self_s, self_len,
2424 from_s, from_len,
2425 0, self_len, FORWARD, maxcount);
2426 if (count == 0) {
2427 /* no matches, return unchanged */
2428 return return_self(self);
2429 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002430
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002431 /* Check for overflow */
2432 /* result_len = self_len + count * (to_len-from_len) */
2433 product = count * (to_len-from_len);
2434 if (product / (to_len-from_len) != count) {
2435 PyErr_SetString(PyExc_OverflowError,
2436 "replacement bytes are too long");
2437 return NULL;
2438 }
2439 result_len = self_len + product;
2440 if (result_len < 0) {
2441 PyErr_SetString(PyExc_OverflowError,
2442 "replacement bytes are too long");
2443 return NULL;
2444 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002445
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002446 if ( (result = (PyBytesObject *)
2447 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2448 return NULL;
2449 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002450
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002451 start = self_s;
2452 end = self_s + self_len;
2453 while (count-- > 0) {
2454 offset = findstring(start, end-start,
2455 from_s, from_len,
2456 0, end-start, FORWARD);
2457 if (offset == -1)
2458 break;
2459 next = start+offset;
2460 if (next == start) {
2461 /* replace with the 'to' */
2462 Py_MEMCPY(result_s, to_s, to_len);
2463 result_s += to_len;
2464 start += from_len;
2465 } else {
2466 /* copy the unchanged old then the 'to' */
2467 Py_MEMCPY(result_s, start, next-start);
2468 result_s += (next-start);
2469 Py_MEMCPY(result_s, to_s, to_len);
2470 result_s += to_len;
2471 start = next+from_len;
2472 }
2473 }
2474 /* Copy the remainder of the remaining string */
2475 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002476
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002477 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002478}
2479
2480
2481Py_LOCAL(PyBytesObject *)
2482replace(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002483 const char *from_s, Py_ssize_t from_len,
2484 const char *to_s, Py_ssize_t to_len,
2485 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002486{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002487 if (maxcount < 0) {
2488 maxcount = PY_SSIZE_T_MAX;
2489 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2490 /* nothing to do; return the original string */
2491 return return_self(self);
2492 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002493
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002494 if (maxcount == 0 ||
2495 (from_len == 0 && to_len == 0)) {
2496 /* nothing to do; return the original string */
2497 return return_self(self);
2498 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002499
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002500 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002501
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002502 if (from_len == 0) {
2503 /* insert the 'to' string everywhere. */
2504 /* >>> "Python".replace("", ".") */
2505 /* '.P.y.t.h.o.n.' */
2506 return replace_interleave(self, to_s, to_len, maxcount);
2507 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002508
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002509 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2510 /* point for an empty self string to generate a non-empty string */
2511 /* Special case so the remaining code always gets a non-empty string */
2512 if (PyBytes_GET_SIZE(self) == 0) {
2513 return return_self(self);
2514 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002515
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002516 if (to_len == 0) {
2517 /* delete all occurrences of 'from' string */
2518 if (from_len == 1) {
2519 return replace_delete_single_character(
2520 self, from_s[0], maxcount);
2521 } else {
2522 return replace_delete_substring(self, from_s,
2523 from_len, maxcount);
2524 }
2525 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002526
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002527 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002528
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002529 if (from_len == to_len) {
2530 if (from_len == 1) {
2531 return replace_single_character_in_place(
2532 self,
2533 from_s[0],
2534 to_s[0],
2535 maxcount);
2536 } else {
2537 return replace_substring_in_place(
2538 self, from_s, from_len, to_s, to_len,
2539 maxcount);
2540 }
2541 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002542
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002543 /* Otherwise use the more generic algorithms */
2544 if (from_len == 1) {
2545 return replace_single_character(self, from_s[0],
2546 to_s, to_len, maxcount);
2547 } else {
2548 /* len('from')>=2, len('to')>=1 */
2549 return replace_substring(self, from_s, from_len, to_s, to_len,
2550 maxcount);
2551 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002552}
2553
2554PyDoc_STRVAR(replace__doc__,
2555"B.replace(old, new[, count]) -> bytes\n\
2556\n\
2557Return a copy of B with all occurrences of subsection\n\
2558old replaced by new. If the optional argument count is\n\
Senthil Kumaranf7734202010-09-08 13:00:07 +00002559given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002560
2561static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002562bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002563{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002564 Py_ssize_t count = -1;
2565 PyObject *from, *to;
2566 const char *from_s, *to_s;
2567 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002568
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002569 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2570 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002571
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002572 if (PyBytes_Check(from)) {
2573 from_s = PyBytes_AS_STRING(from);
2574 from_len = PyBytes_GET_SIZE(from);
2575 }
2576 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2577 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002578
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002579 if (PyBytes_Check(to)) {
2580 to_s = PyBytes_AS_STRING(to);
2581 to_len = PyBytes_GET_SIZE(to);
2582 }
2583 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2584 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002585
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002586 return (PyObject *)replace((PyBytesObject *) self,
2587 from_s, from_len,
2588 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002589}
2590
2591/** End DALKE **/
2592
2593/* Matches the end (direction >= 0) or start (direction < 0) of self
2594 * against substr, using the start and end arguments. Returns
2595 * -1 on error, 0 if not found and 1 if found.
2596 */
2597Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002598_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002599 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002600{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002601 Py_ssize_t len = PyBytes_GET_SIZE(self);
2602 Py_ssize_t slen;
2603 const char* sub;
2604 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002605
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002606 if (PyBytes_Check(substr)) {
2607 sub = PyBytes_AS_STRING(substr);
2608 slen = PyBytes_GET_SIZE(substr);
2609 }
2610 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2611 return -1;
2612 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002613
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002614 bytes_adjust_indices(&start, &end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002615
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002616 if (direction < 0) {
2617 /* startswith */
2618 if (start+slen > len)
2619 return 0;
2620 } else {
2621 /* endswith */
2622 if (end-start < slen || start > len)
2623 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002624
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002625 if (end-slen > start)
2626 start = end - slen;
2627 }
2628 if (end-start >= slen)
2629 return ! memcmp(str+start, sub, slen);
2630 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002631}
2632
2633
2634PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002635"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002636\n\
2637Return True if B starts with the specified prefix, False otherwise.\n\
2638With optional start, test B beginning at that position.\n\
2639With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002640prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002641
2642static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002643bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002644{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002645 Py_ssize_t start = 0;
2646 Py_ssize_t end = PY_SSIZE_T_MAX;
2647 PyObject *subobj;
2648 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002649
Jesus Ceaac451502011-04-20 17:09:23 +02002650 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002651 return NULL;
2652 if (PyTuple_Check(subobj)) {
2653 Py_ssize_t i;
2654 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2655 result = _bytes_tailmatch(self,
2656 PyTuple_GET_ITEM(subobj, i),
2657 start, end, -1);
2658 if (result == -1)
2659 return NULL;
2660 else if (result) {
2661 Py_RETURN_TRUE;
2662 }
2663 }
2664 Py_RETURN_FALSE;
2665 }
2666 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002667 if (result == -1) {
2668 if (PyErr_ExceptionMatches(PyExc_TypeError))
2669 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2670 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002671 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002672 }
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002673 else
2674 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002675}
2676
2677
2678PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002679"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002680\n\
2681Return True if B ends with the specified suffix, False otherwise.\n\
2682With optional start, test B beginning at that position.\n\
2683With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002684suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002685
2686static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002687bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002688{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002689 Py_ssize_t start = 0;
2690 Py_ssize_t end = PY_SSIZE_T_MAX;
2691 PyObject *subobj;
2692 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002693
Jesus Ceaac451502011-04-20 17:09:23 +02002694 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002695 return NULL;
2696 if (PyTuple_Check(subobj)) {
2697 Py_ssize_t i;
2698 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2699 result = _bytes_tailmatch(self,
2700 PyTuple_GET_ITEM(subobj, i),
2701 start, end, +1);
2702 if (result == -1)
2703 return NULL;
2704 else if (result) {
2705 Py_RETURN_TRUE;
2706 }
2707 }
2708 Py_RETURN_FALSE;
2709 }
2710 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002711 if (result == -1) {
2712 if (PyErr_ExceptionMatches(PyExc_TypeError))
2713 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2714 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002715 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002716 }
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002717 else
2718 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002719}
2720
2721
2722PyDoc_STRVAR(decode__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002723"B.decode([encoding[, errors]]) -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002724\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002725Decode S using the codec registered for encoding. encoding defaults\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002726to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002727handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2728a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002729as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002730able to handle UnicodeDecodeErrors.");
2731
2732static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002733bytes_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002734{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002735 const char *encoding = NULL;
2736 const char *errors = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +00002737
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002738 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2739 return NULL;
2740 if (encoding == NULL)
2741 encoding = PyUnicode_GetDefaultEncoding();
2742 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002743}
2744
Guido van Rossum20188312006-05-05 15:15:40 +00002745
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002746PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002747"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002748\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002749Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002750Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002751Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002752
2753static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002754hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002755{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002756 if (c >= 128)
2757 return -1;
2758 if (ISDIGIT(c))
2759 return c - '0';
2760 else {
2761 if (ISUPPER(c))
2762 c = TOLOWER(c);
2763 if (c >= 'a' && c <= 'f')
2764 return c - 'a' + 10;
2765 }
2766 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002767}
2768
2769static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002770bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002771{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002772 PyObject *newstring, *hexobj;
2773 char *buf;
2774 Py_UNICODE *hex;
2775 Py_ssize_t hexlen, byteslen, i, j;
2776 int top, bot;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002777
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002778 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2779 return NULL;
2780 assert(PyUnicode_Check(hexobj));
2781 hexlen = PyUnicode_GET_SIZE(hexobj);
2782 hex = PyUnicode_AS_UNICODE(hexobj);
2783 byteslen = hexlen/2; /* This overestimates if there are spaces */
2784 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2785 if (!newstring)
2786 return NULL;
2787 buf = PyBytes_AS_STRING(newstring);
2788 for (i = j = 0; i < hexlen; i += 2) {
2789 /* skip over spaces in the input */
2790 while (hex[i] == ' ')
2791 i++;
2792 if (i >= hexlen)
2793 break;
2794 top = hex_digit_to_int(hex[i]);
2795 bot = hex_digit_to_int(hex[i+1]);
2796 if (top == -1 || bot == -1) {
2797 PyErr_Format(PyExc_ValueError,
2798 "non-hexadecimal number found in "
2799 "fromhex() arg at position %zd", i);
2800 goto error;
2801 }
2802 buf[j++] = (top << 4) + bot;
2803 }
2804 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2805 goto error;
2806 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002807
2808 error:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002809 Py_XDECREF(newstring);
2810 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002811}
2812
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002813PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002814"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002815
2816static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002817bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002818{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002819 Py_ssize_t res;
2820 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2821 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002822}
2823
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002824
2825static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002826bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002827{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002828 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002829}
2830
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002831
2832static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002833bytes_methods[] = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002834 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2835 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2836 _Py_capitalize__doc__},
2837 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2838 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2839 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode__doc__},
2840 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2841 endswith__doc__},
2842 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2843 expandtabs__doc__},
2844 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2845 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2846 fromhex_doc},
2847 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2848 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2849 _Py_isalnum__doc__},
2850 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2851 _Py_isalpha__doc__},
2852 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2853 _Py_isdigit__doc__},
2854 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2855 _Py_islower__doc__},
2856 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2857 _Py_isspace__doc__},
2858 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2859 _Py_istitle__doc__},
2860 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2861 _Py_isupper__doc__},
2862 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2863 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2864 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2865 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2866 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2867 _Py_maketrans__doc__},
2868 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2869 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2870 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2871 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2872 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2873 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2874 rpartition__doc__},
2875 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2876 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
2877 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2878 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
2879 splitlines__doc__},
2880 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2881 startswith__doc__},
2882 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2883 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2884 _Py_swapcase__doc__},
2885 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2886 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2887 translate__doc__},
2888 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2889 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2890 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2891 sizeof__doc__},
2892 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002893};
2894
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002895static PyObject *
2896str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2897
2898static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002899bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002900{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002901 PyObject *x = NULL;
2902 const char *encoding = NULL;
2903 const char *errors = NULL;
2904 PyObject *new = NULL;
2905 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002906
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002907 if (type != &PyBytes_Type)
2908 return str_subtype_new(type, args, kwds);
2909 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2910 &encoding, &errors))
2911 return NULL;
2912 if (x == NULL) {
2913 if (encoding != NULL || errors != NULL) {
2914 PyErr_SetString(PyExc_TypeError,
2915 "encoding or errors without sequence "
2916 "argument");
2917 return NULL;
2918 }
2919 return PyBytes_FromString("");
2920 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002921
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002922 if (PyUnicode_Check(x)) {
2923 /* Encode via the codec registry */
2924 if (encoding == NULL) {
2925 PyErr_SetString(PyExc_TypeError,
2926 "string argument without an encoding");
2927 return NULL;
2928 }
2929 new = PyUnicode_AsEncodedString(x, encoding, errors);
2930 if (new == NULL)
2931 return NULL;
2932 assert(PyBytes_Check(new));
2933 return new;
2934 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002935
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002936 /* If it's not unicode, there can't be encoding or errors */
2937 if (encoding != NULL || errors != NULL) {
2938 PyErr_SetString(PyExc_TypeError,
2939 "encoding or errors without a string argument");
2940 return NULL;
2941 }
2942 return PyObject_Bytes(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002943}
2944
2945PyObject *
2946PyBytes_FromObject(PyObject *x)
2947{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002948 PyObject *new, *it;
2949 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002950
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002951 if (x == NULL) {
2952 PyErr_BadInternalCall();
2953 return NULL;
2954 }
Benjamin Peterson4b24a422008-08-27 00:28:34 +00002955
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002956 /* Is it an int? */
2957 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2958 if (size == -1 && PyErr_Occurred()) {
2959 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2960 return NULL;
2961 PyErr_Clear();
2962 }
2963 else if (size < 0) {
2964 PyErr_SetString(PyExc_ValueError, "negative count");
2965 return NULL;
2966 }
2967 else {
2968 new = PyBytes_FromStringAndSize(NULL, size);
2969 if (new == NULL) {
2970 return NULL;
2971 }
2972 if (size > 0) {
2973 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2974 }
2975 return new;
2976 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002977
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002978 /* Use the modern buffer interface */
2979 if (PyObject_CheckBuffer(x)) {
2980 Py_buffer view;
2981 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2982 return NULL;
2983 new = PyBytes_FromStringAndSize(NULL, view.len);
2984 if (!new)
2985 goto fail;
2986 /* XXX(brett.cannon): Better way to get to internal buffer? */
2987 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2988 &view, view.len, 'C') < 0)
2989 goto fail;
2990 PyBuffer_Release(&view);
2991 return new;
2992 fail:
2993 Py_XDECREF(new);
2994 PyBuffer_Release(&view);
2995 return NULL;
2996 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002997
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002998 /* For iterator version, create a string object and resize as needed */
2999 /* XXX(gb): is 64 a good value? also, optimize if length is known */
3000 /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
3001 input being a truly long iterator. */
3002 size = 64;
3003 new = PyBytes_FromStringAndSize(NULL, size);
3004 if (new == NULL)
3005 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003006
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003007 /* XXX Optimize this if the arguments is a list, tuple */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003008
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003009 /* Get the iterator */
3010 it = PyObject_GetIter(x);
3011 if (it == NULL)
3012 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003013
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003014 /* Run the iterator to exhaustion */
3015 for (i = 0; ; i++) {
3016 PyObject *item;
3017 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003018
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003019 /* Get the next item */
3020 item = PyIter_Next(it);
3021 if (item == NULL) {
3022 if (PyErr_Occurred())
3023 goto error;
3024 break;
3025 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003026
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003027 /* Interpret it as an int (__index__) */
3028 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3029 Py_DECREF(item);
3030 if (value == -1 && PyErr_Occurred())
3031 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003032
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003033 /* Range check */
3034 if (value < 0 || value >= 256) {
3035 PyErr_SetString(PyExc_ValueError,
3036 "bytes must be in range(0, 256)");
3037 goto error;
3038 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003039
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003040 /* Append the byte */
3041 if (i >= size) {
3042 size *= 2;
3043 if (_PyBytes_Resize(&new, size) < 0)
3044 goto error;
3045 }
Antoine Pitroubc760d92010-08-15 17:46:50 +00003046 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003047 }
3048 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003049
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003050 /* Clean up and return success */
3051 Py_DECREF(it);
3052 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003053
3054 error:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003055 /* Error handling when new != NULL */
3056 Py_XDECREF(it);
3057 Py_DECREF(new);
3058 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003059}
3060
3061static PyObject *
3062str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3063{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003064 PyObject *tmp, *pnew;
3065 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003066
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003067 assert(PyType_IsSubtype(type, &PyBytes_Type));
3068 tmp = bytes_new(&PyBytes_Type, args, kwds);
3069 if (tmp == NULL)
3070 return NULL;
3071 assert(PyBytes_CheckExact(tmp));
3072 n = PyBytes_GET_SIZE(tmp);
3073 pnew = type->tp_alloc(type, n);
3074 if (pnew != NULL) {
3075 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3076 PyBytes_AS_STRING(tmp), n+1);
3077 ((PyBytesObject *)pnew)->ob_shash =
3078 ((PyBytesObject *)tmp)->ob_shash;
3079 }
3080 Py_DECREF(tmp);
3081 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003082}
3083
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003084PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003085"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003086bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003087bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
3088bytes(memory_view) -> bytes\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003089\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003090Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003091 - an iterable yielding integers in range(256)\n\
3092 - a text string encoded using the specified encoding\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003093 - a bytes or a buffer object\n\
3094 - any object implementing the buffer API.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003095
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003096static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003097
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003098PyTypeObject PyBytes_Type = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003099 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3100 "bytes",
3101 PyBytesObject_SIZE,
3102 sizeof(char),
3103 bytes_dealloc, /* tp_dealloc */
3104 0, /* tp_print */
3105 0, /* tp_getattr */
3106 0, /* tp_setattr */
3107 0, /* tp_reserved */
3108 (reprfunc)bytes_repr, /* tp_repr */
3109 0, /* tp_as_number */
3110 &bytes_as_sequence, /* tp_as_sequence */
3111 &bytes_as_mapping, /* tp_as_mapping */
3112 (hashfunc)bytes_hash, /* tp_hash */
3113 0, /* tp_call */
3114 bytes_str, /* tp_str */
3115 PyObject_GenericGetAttr, /* tp_getattro */
3116 0, /* tp_setattro */
3117 &bytes_as_buffer, /* tp_as_buffer */
3118 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3119 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3120 bytes_doc, /* tp_doc */
3121 0, /* tp_traverse */
3122 0, /* tp_clear */
3123 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3124 0, /* tp_weaklistoffset */
3125 bytes_iter, /* tp_iter */
3126 0, /* tp_iternext */
3127 bytes_methods, /* tp_methods */
3128 0, /* tp_members */
3129 0, /* tp_getset */
3130 &PyBaseObject_Type, /* tp_base */
3131 0, /* tp_dict */
3132 0, /* tp_descr_get */
3133 0, /* tp_descr_set */
3134 0, /* tp_dictoffset */
3135 0, /* tp_init */
3136 0, /* tp_alloc */
3137 bytes_new, /* tp_new */
3138 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003139};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003140
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003141void
3142PyBytes_Concat(register PyObject **pv, register PyObject *w)
3143{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003144 register PyObject *v;
3145 assert(pv != NULL);
3146 if (*pv == NULL)
3147 return;
3148 if (w == NULL) {
3149 Py_DECREF(*pv);
3150 *pv = NULL;
3151 return;
3152 }
3153 v = bytes_concat(*pv, w);
3154 Py_DECREF(*pv);
3155 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003156}
3157
3158void
3159PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
3160{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003161 PyBytes_Concat(pv, w);
3162 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003163}
3164
3165
3166/* The following function breaks the notion that strings are immutable:
3167 it changes the size of a string. We get away with this only if there
3168 is only one module referencing the object. You can also think of it
3169 as creating a new string object and destroying the old one, only
3170 more efficiently. In any case, don't use this if the string may
3171 already be known to some other part of the code...
3172 Note that if there's not enough memory to resize the string, the original
3173 string object at *pv is deallocated, *pv is set to NULL, an "out of
3174 memory" exception is set, and -1 is returned. Else (on success) 0 is
3175 returned, and the value in *pv may or may not be the same as on input.
3176 As always, an extra byte is allocated for a trailing \0 byte (newsize
3177 does *not* include that), and a trailing \0 byte is stored.
3178*/
3179
3180int
3181_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3182{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003183 register PyObject *v;
3184 register PyBytesObject *sv;
3185 v = *pv;
3186 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3187 *pv = 0;
3188 Py_DECREF(v);
3189 PyErr_BadInternalCall();
3190 return -1;
3191 }
3192 /* XXX UNREF/NEWREF interface should be more symmetrical */
3193 _Py_DEC_REFTOTAL;
3194 _Py_ForgetReference(v);
3195 *pv = (PyObject *)
3196 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
3197 if (*pv == NULL) {
3198 PyObject_Del(v);
3199 PyErr_NoMemory();
3200 return -1;
3201 }
3202 _Py_NewReference(*pv);
3203 sv = (PyBytesObject *) *pv;
3204 Py_SIZE(sv) = newsize;
3205 sv->ob_sval[newsize] = '\0';
3206 sv->ob_shash = -1; /* invalidate cached hash value */
3207 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003208}
3209
3210/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
3211 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3212 * Python's regular ints.
3213 * Return value: a new PyString*, or NULL if error.
3214 * . *pbuf is set to point into it,
3215 * *plen set to the # of chars following that.
3216 * Caller must decref it when done using pbuf.
3217 * The string starting at *pbuf is of the form
3218 * "-"? ("0x" | "0X")? digit+
3219 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3220 * set in flags. The case of hex digits will be correct,
3221 * There will be at least prec digits, zero-filled on the left if
3222 * necessary to get that many.
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003223 * val object to be converted
3224 * flags bitmask of format flags; only F_ALT is looked at
3225 * prec minimum number of digits; 0-fill on left if needed
3226 * type a character in [duoxX]; u acts the same as d
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003227 *
3228 * CAUTION: o, x and X conversions on regular ints can never
3229 * produce a '-' sign, but can for Python's unbounded ints.
3230 */
3231PyObject*
3232_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003233 char **pbuf, int *plen)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003234{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003235 PyObject *result = NULL;
3236 char *buf;
3237 Py_ssize_t i;
3238 int sign; /* 1 if '-', else 0 */
3239 int len; /* number of characters */
3240 Py_ssize_t llen;
3241 int numdigits; /* len == numnondigits + numdigits */
3242 int numnondigits = 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003243
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003244 /* Avoid exceeding SSIZE_T_MAX */
3245 if (prec > INT_MAX-3) {
3246 PyErr_SetString(PyExc_OverflowError,
3247 "precision too large");
3248 return NULL;
3249 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003250
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003251 switch (type) {
3252 case 'd':
3253 case 'u':
3254 /* Special-case boolean: we want 0/1 */
3255 if (PyBool_Check(val))
3256 result = PyNumber_ToBase(val, 10);
3257 else
3258 result = Py_TYPE(val)->tp_str(val);
3259 break;
3260 case 'o':
3261 numnondigits = 2;
3262 result = PyNumber_ToBase(val, 8);
3263 break;
3264 case 'x':
3265 case 'X':
3266 numnondigits = 2;
3267 result = PyNumber_ToBase(val, 16);
3268 break;
3269 default:
3270 assert(!"'type' not in [duoxX]");
3271 }
3272 if (!result)
3273 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003274
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003275 buf = _PyUnicode_AsString(result);
3276 if (!buf) {
3277 Py_DECREF(result);
3278 return NULL;
3279 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003280
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003281 /* To modify the string in-place, there can only be one reference. */
3282 if (Py_REFCNT(result) != 1) {
3283 PyErr_BadInternalCall();
3284 return NULL;
3285 }
3286 llen = PyUnicode_GetSize(result);
3287 if (llen > INT_MAX) {
3288 PyErr_SetString(PyExc_ValueError,
3289 "string too large in _PyBytes_FormatLong");
3290 return NULL;
3291 }
3292 len = (int)llen;
3293 if (buf[len-1] == 'L') {
3294 --len;
3295 buf[len] = '\0';
3296 }
3297 sign = buf[0] == '-';
3298 numnondigits += sign;
3299 numdigits = len - numnondigits;
3300 assert(numdigits > 0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003301
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003302 /* Get rid of base marker unless F_ALT */
3303 if (((flags & F_ALT) == 0 &&
3304 (type == 'o' || type == 'x' || type == 'X'))) {
3305 assert(buf[sign] == '0');
3306 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
3307 buf[sign+1] == 'o');
3308 numnondigits -= 2;
3309 buf += 2;
3310 len -= 2;
3311 if (sign)
3312 buf[0] = '-';
3313 assert(len == numnondigits + numdigits);
3314 assert(numdigits > 0);
3315 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003316
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003317 /* Fill with leading zeroes to meet minimum width. */
3318 if (prec > numdigits) {
3319 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
3320 numnondigits + prec);
3321 char *b1;
3322 if (!r1) {
3323 Py_DECREF(result);
3324 return NULL;
3325 }
3326 b1 = PyBytes_AS_STRING(r1);
3327 for (i = 0; i < numnondigits; ++i)
3328 *b1++ = *buf++;
3329 for (i = 0; i < prec - numdigits; i++)
3330 *b1++ = '0';
3331 for (i = 0; i < numdigits; i++)
3332 *b1++ = *buf++;
3333 *b1 = '\0';
3334 Py_DECREF(result);
3335 result = r1;
3336 buf = PyBytes_AS_STRING(result);
3337 len = numnondigits + prec;
3338 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003339
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003340 /* Fix up case for hex conversions. */
3341 if (type == 'X') {
3342 /* Need to convert all lower case letters to upper case.
3343 and need to convert 0x to 0X (and -0x to -0X). */
3344 for (i = 0; i < len; i++)
3345 if (buf[i] >= 'a' && buf[i] <= 'x')
3346 buf[i] -= 'a'-'A';
3347 }
3348 *pbuf = buf;
3349 *plen = len;
3350 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003351}
3352
3353void
3354PyBytes_Fini(void)
3355{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003356 int i;
3357 for (i = 0; i < UCHAR_MAX + 1; i++) {
3358 Py_XDECREF(characters[i]);
3359 characters[i] = NULL;
3360 }
3361 Py_XDECREF(nullstring);
3362 nullstring = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003363}
3364
Benjamin Peterson4116f362008-05-27 00:36:20 +00003365/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003366
3367typedef struct {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003368 PyObject_HEAD
3369 Py_ssize_t it_index;
3370 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003371} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003372
3373static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003374striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003375{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003376 _PyObject_GC_UNTRACK(it);
3377 Py_XDECREF(it->it_seq);
3378 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003379}
3380
3381static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003382striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003383{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003384 Py_VISIT(it->it_seq);
3385 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003386}
3387
3388static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003389striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003390{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003391 PyBytesObject *seq;
3392 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003393
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003394 assert(it != NULL);
3395 seq = it->it_seq;
3396 if (seq == NULL)
3397 return NULL;
3398 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003399
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003400 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3401 item = PyLong_FromLong(
3402 (unsigned char)seq->ob_sval[it->it_index]);
3403 if (item != NULL)
3404 ++it->it_index;
3405 return item;
3406 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003407
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003408 Py_DECREF(seq);
3409 it->it_seq = NULL;
3410 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003411}
3412
3413static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003414striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003415{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003416 Py_ssize_t len = 0;
3417 if (it->it_seq)
3418 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3419 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003420}
3421
3422PyDoc_STRVAR(length_hint_doc,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003423 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003424
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003425static PyMethodDef striter_methods[] = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003426 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3427 length_hint_doc},
3428 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003429};
3430
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003431PyTypeObject PyBytesIter_Type = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003432 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3433 "bytes_iterator", /* tp_name */
3434 sizeof(striterobject), /* tp_basicsize */
3435 0, /* tp_itemsize */
3436 /* methods */
3437 (destructor)striter_dealloc, /* tp_dealloc */
3438 0, /* tp_print */
3439 0, /* tp_getattr */
3440 0, /* tp_setattr */
3441 0, /* tp_reserved */
3442 0, /* tp_repr */
3443 0, /* tp_as_number */
3444 0, /* tp_as_sequence */
3445 0, /* tp_as_mapping */
3446 0, /* tp_hash */
3447 0, /* tp_call */
3448 0, /* tp_str */
3449 PyObject_GenericGetAttr, /* tp_getattro */
3450 0, /* tp_setattro */
3451 0, /* tp_as_buffer */
3452 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3453 0, /* tp_doc */
3454 (traverseproc)striter_traverse, /* tp_traverse */
3455 0, /* tp_clear */
3456 0, /* tp_richcompare */
3457 0, /* tp_weaklistoffset */
3458 PyObject_SelfIter, /* tp_iter */
3459 (iternextfunc)striter_next, /* tp_iternext */
3460 striter_methods, /* tp_methods */
3461 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003462};
3463
3464static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003465bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003466{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003467 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003468
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003469 if (!PyBytes_Check(seq)) {
3470 PyErr_BadInternalCall();
3471 return NULL;
3472 }
3473 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3474 if (it == NULL)
3475 return NULL;
3476 it->it_index = 0;
3477 Py_INCREF(seq);
3478 it->it_seq = (PyBytesObject *)seq;
3479 _PyObject_GC_TRACK(it);
3480 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003481}