blob: db93f37bacc4522856c263927fdd6b036c15eb31 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroua57aae72010-06-09 16:58:35 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
44 For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
45 parameter `size' denotes number of characters to allocate, not counting any
46 null terminating character.
47
48 For PyBytes_FromString(), the parameter `str' points to a null-terminated
49 string containing exactly `size' bytes.
50
51 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
52 either NULL or else points to a string containing at least `size' bytes.
53 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
54 not have to be null-terminated. (Therefore it is safe to construct a
55 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
56 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
57 bytes (setting the last byte to the null terminating character) and you can
58 fill in the data yourself. If `str' is non-NULL then the resulting
59 PyString object must be treated as immutable and you must not fill in nor
60 alter the data yourself, since the strings may be shared.
61
62 The PyObject member `op->ob_size', which denotes the number of "extra
63 items" in a variable-size object, will contain the number of bytes
64 allocated for string data, not counting the null terminating character. It
65 is therefore equal to the equal to the `size' parameter (for
66 PyBytes_FromStringAndSize()) or the length of the string in the `str'
67 parameter (for PyBytes_FromString()).
68*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000069PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000071{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000072 register PyBytesObject *op;
73 if (size < 0) {
74 PyErr_SetString(PyExc_SystemError,
75 "Negative size passed to PyBytes_FromStringAndSize");
76 return NULL;
77 }
78 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000079#ifdef COUNT_ALLOCS
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000080 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000081#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000082 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85 if (size == 1 && str != NULL &&
86 (op = characters[*str & UCHAR_MAX]) != NULL)
87 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000088#ifdef COUNT_ALLOCS
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000089 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000091 Py_INCREF(op);
92 return (PyObject *)op;
93 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000094
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000095 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
96 PyErr_SetString(PyExc_OverflowError,
97 "byte string is too large");
98 return NULL;
99 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +0000100
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000101 /* Inline PyObject_NewVar */
102 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
103 if (op == NULL)
104 return PyErr_NoMemory();
105 PyObject_INIT_VAR(op, &PyBytes_Type, size);
106 op->ob_shash = -1;
107 if (str != NULL)
108 Py_MEMCPY(op->ob_sval, str, size);
109 op->ob_sval[size] = '\0';
110 /* share short strings */
111 if (size == 0) {
112 nullstring = op;
113 Py_INCREF(op);
114 } else if (size == 1 && str != NULL) {
115 characters[*str & UCHAR_MAX] = op;
116 Py_INCREF(op);
117 }
118 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000119}
120
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000121PyObject *
122PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000123{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000124 register size_t size;
125 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000126
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000127 assert(str != NULL);
128 size = strlen(str);
129 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
130 PyErr_SetString(PyExc_OverflowError,
131 "byte string is too long");
132 return NULL;
133 }
134 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000135#ifdef COUNT_ALLOCS
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000136 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000137#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
141 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#ifdef COUNT_ALLOCS
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000143 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000144#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000148
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000149 /* Inline PyObject_NewVar */
150 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
151 if (op == NULL)
152 return PyErr_NoMemory();
153 PyObject_INIT_VAR(op, &PyBytes_Type, size);
154 op->ob_shash = -1;
155 Py_MEMCPY(op->ob_sval, str, size+1);
156 /* share short strings */
157 if (size == 0) {
158 nullstring = op;
159 Py_INCREF(op);
160 } else if (size == 1) {
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000165}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000166
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167PyObject *
168PyBytes_FromFormatV(const char *format, va_list vargs)
169{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000175
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000176#ifdef VA_LIST_IS_ARRAY
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000177 Py_MEMCPY(count, vargs, sizeof(va_list));
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000178#else
179#ifdef __va_copy
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000180 __va_copy(count, vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000181#else
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000182 count = vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000183#endif
184#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
187 if (*f == '%') {
188 const char* p = f;
189 while (*++f && *f != '%' && !ISALPHA(*f))
190 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000191
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000192 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
193 * they don't affect the amount of space we reserve.
194 */
195 if ((*f == 'l' || *f == 'z') &&
196 (f[1] == 'd' || f[1] == 'u'))
197 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000198
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000199 switch (*f) {
200 case 'c':
201 (void)va_arg(count, int);
202 /* fall through... */
203 case '%':
204 n++;
205 break;
206 case 'd': case 'u': case 'i': case 'x':
207 (void) va_arg(count, int);
208 /* 20 bytes is enough to hold a 64-bit
209 integer. Decimal takes the most space.
210 This isn't enough for octal. */
211 n += 20;
212 break;
213 case 's':
214 s = va_arg(count, char*);
215 n += strlen(s);
216 break;
217 case 'p':
218 (void) va_arg(count, int);
219 /* maximum 64-bit pointer representation:
220 * 0xffffffffffffffff
221 * so 19 characters is enough.
222 * XXX I count 18 -- what's the extra for?
223 */
224 n += 19;
225 break;
226 default:
227 /* if we stumble upon an unknown
228 formatting code, copy the rest of
229 the format string to the output
230 string. (we cannot just skip the
231 code, since there's no way to know
232 what's in the argument list) */
233 n += strlen(p);
234 goto expand;
235 }
236 } else
237 n++;
238 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000239 expand:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000240 /* step 2: fill the buffer */
241 /* Since we've analyzed how much space we need for the worst case,
242 use sprintf directly instead of the slower PyOS_snprintf. */
243 string = PyBytes_FromStringAndSize(NULL, n);
244 if (!string)
245 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000246
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000247 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000248
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000249 for (f = format; *f; f++) {
250 if (*f == '%') {
251 const char* p = f++;
252 Py_ssize_t i;
253 int longflag = 0;
254 int size_tflag = 0;
255 /* parse the width.precision part (we're only
256 interested in the precision value, if any) */
257 n = 0;
258 while (ISDIGIT(*f))
259 n = (n*10) + *f++ - '0';
260 if (*f == '.') {
261 f++;
262 n = 0;
263 while (ISDIGIT(*f))
264 n = (n*10) + *f++ - '0';
265 }
266 while (*f && *f != '%' && !ISALPHA(*f))
267 f++;
268 /* handle the long flag, but only for %ld and %lu.
269 others can be added when necessary. */
270 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
271 longflag = 1;
272 ++f;
273 }
274 /* handle the size_t flag. */
275 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
276 size_tflag = 1;
277 ++f;
278 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000279
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000280 switch (*f) {
281 case 'c':
282 *s++ = va_arg(vargs, int);
283 break;
284 case 'd':
285 if (longflag)
286 sprintf(s, "%ld", va_arg(vargs, long));
287 else if (size_tflag)
288 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
289 va_arg(vargs, Py_ssize_t));
290 else
291 sprintf(s, "%d", va_arg(vargs, int));
292 s += strlen(s);
293 break;
294 case 'u':
295 if (longflag)
296 sprintf(s, "%lu",
297 va_arg(vargs, unsigned long));
298 else if (size_tflag)
299 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
300 va_arg(vargs, size_t));
301 else
302 sprintf(s, "%u",
303 va_arg(vargs, unsigned int));
304 s += strlen(s);
305 break;
306 case 'i':
307 sprintf(s, "%i", va_arg(vargs, int));
308 s += strlen(s);
309 break;
310 case 'x':
311 sprintf(s, "%x", va_arg(vargs, int));
312 s += strlen(s);
313 break;
314 case 's':
315 p = va_arg(vargs, char*);
316 i = strlen(p);
317 if (n > 0 && i > n)
318 i = n;
319 Py_MEMCPY(s, p, i);
320 s += i;
321 break;
322 case 'p':
323 sprintf(s, "%p", va_arg(vargs, void*));
324 /* %p is ill-defined: ensure leading 0x. */
325 if (s[1] == 'X')
326 s[1] = 'x';
327 else if (s[1] != 'x') {
328 memmove(s+2, s, strlen(s)+1);
329 s[0] = '0';
330 s[1] = 'x';
331 }
332 s += strlen(s);
333 break;
334 case '%':
335 *s++ = '%';
336 break;
337 default:
338 strcpy(s, p);
339 s += strlen(s);
340 goto end;
341 }
342 } else
343 *s++ = *f;
344 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000345
346 end:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000347 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
348 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000349}
350
351PyObject *
352PyBytes_FromFormat(const char *format, ...)
353{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000354 PyObject* ret;
355 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000356
357#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000358 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000359#else
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000360 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000362 ret = PyBytes_FromFormatV(format, vargs);
363 va_end(vargs);
364 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000365}
366
367static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000368bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000369{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000370 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000371}
372
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000373/* Unescape a backslash-escaped string. If unicode is non-zero,
374 the string is a u-literal. If recode_encoding is non-zero,
375 the string is UTF-8 encoded and should be re-encoded in the
376 specified encoding. */
377
378PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000379 Py_ssize_t len,
380 const char *errors,
381 Py_ssize_t unicode,
382 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000383{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000384 int c;
385 char *p, *buf;
386 const char *end;
387 PyObject *v;
388 Py_ssize_t newlen = recode_encoding ? 4*len:len;
389 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
390 if (v == NULL)
391 return NULL;
392 p = buf = PyBytes_AsString(v);
393 end = s + len;
394 while (s < end) {
395 if (*s != '\\') {
396 non_esc:
397 if (recode_encoding && (*s & 0x80)) {
398 PyObject *u, *w;
399 char *r;
400 const char* t;
401 Py_ssize_t rn;
402 t = s;
403 /* Decode non-ASCII bytes as UTF-8. */
404 while (t < end && (*t & 0x80)) t++;
405 u = PyUnicode_DecodeUTF8(s, t - s, errors);
406 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000407
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000408 /* Recode them in target encoding. */
409 w = PyUnicode_AsEncodedString(
410 u, recode_encoding, errors);
411 Py_DECREF(u);
412 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000413
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000414 /* Append bytes to output buffer. */
415 assert(PyBytes_Check(w));
416 r = PyBytes_AS_STRING(w);
417 rn = PyBytes_GET_SIZE(w);
418 Py_MEMCPY(p, r, rn);
419 p += rn;
420 Py_DECREF(w);
421 s = t;
422 } else {
423 *p++ = *s++;
424 }
425 continue;
426 }
427 s++;
428 if (s==end) {
429 PyErr_SetString(PyExc_ValueError,
430 "Trailing \\ in string");
431 goto failed;
432 }
433 switch (*s++) {
434 /* XXX This assumes ASCII! */
435 case '\n': break;
436 case '\\': *p++ = '\\'; break;
437 case '\'': *p++ = '\''; break;
438 case '\"': *p++ = '\"'; break;
439 case 'b': *p++ = '\b'; break;
440 case 'f': *p++ = '\014'; break; /* FF */
441 case 't': *p++ = '\t'; break;
442 case 'n': *p++ = '\n'; break;
443 case 'r': *p++ = '\r'; break;
444 case 'v': *p++ = '\013'; break; /* VT */
445 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
446 case '0': case '1': case '2': case '3':
447 case '4': case '5': case '6': case '7':
448 c = s[-1] - '0';
449 if (s < end && '0' <= *s && *s <= '7') {
450 c = (c<<3) + *s++ - '0';
451 if (s < end && '0' <= *s && *s <= '7')
452 c = (c<<3) + *s++ - '0';
453 }
454 *p++ = c;
455 break;
456 case 'x':
457 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
458 unsigned int x = 0;
459 c = Py_CHARMASK(*s);
460 s++;
461 if (ISDIGIT(c))
462 x = c - '0';
463 else if (ISLOWER(c))
464 x = 10 + c - 'a';
465 else
466 x = 10 + c - 'A';
467 x = x << 4;
468 c = Py_CHARMASK(*s);
469 s++;
470 if (ISDIGIT(c))
471 x += c - '0';
472 else if (ISLOWER(c))
473 x += 10 + c - 'a';
474 else
475 x += 10 + c - 'A';
476 *p++ = x;
477 break;
478 }
479 if (!errors || strcmp(errors, "strict") == 0) {
480 PyErr_SetString(PyExc_ValueError,
481 "invalid \\x escape");
482 goto failed;
483 }
484 if (strcmp(errors, "replace") == 0) {
485 *p++ = '?';
486 } else if (strcmp(errors, "ignore") == 0)
487 /* do nothing */;
488 else {
489 PyErr_Format(PyExc_ValueError,
490 "decoding error; unknown "
491 "error handling code: %.400s",
492 errors);
493 goto failed;
494 }
495 default:
496 *p++ = '\\';
497 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200498 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000499 UTF-8 bytes may follow. */
500 }
501 }
502 if (p-buf < newlen)
503 _PyBytes_Resize(&v, p - buf);
504 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000505 failed:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000506 Py_DECREF(v);
507 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000508}
509
510/* -------------------------------------------------------------------- */
511/* object api */
512
513Py_ssize_t
514PyBytes_Size(register PyObject *op)
515{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000516 if (!PyBytes_Check(op)) {
517 PyErr_Format(PyExc_TypeError,
518 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
519 return -1;
520 }
521 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000522}
523
524char *
525PyBytes_AsString(register PyObject *op)
526{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000527 if (!PyBytes_Check(op)) {
528 PyErr_Format(PyExc_TypeError,
529 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
530 return NULL;
531 }
532 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000533}
534
535int
536PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000537 register char **s,
538 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000539{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000540 if (s == NULL) {
541 PyErr_BadInternalCall();
542 return -1;
543 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000544
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000545 if (!PyBytes_Check(obj)) {
546 PyErr_Format(PyExc_TypeError,
547 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
548 return -1;
549 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000550
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000551 *s = PyBytes_AS_STRING(obj);
552 if (len != NULL)
553 *len = PyBytes_GET_SIZE(obj);
554 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
555 PyErr_SetString(PyExc_TypeError,
556 "expected bytes with no null");
557 return -1;
558 }
559 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000560}
Neal Norwitz6968b052007-02-27 19:02:19 +0000561
562/* -------------------------------------------------------------------- */
563/* Methods */
564
Eric Smith0923d1d2009-04-16 20:16:10 +0000565#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000566#define STRINGLIB_CHAR char
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000567
Neal Norwitz6968b052007-02-27 19:02:19 +0000568#define STRINGLIB_CMP memcmp
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000569#define STRINGLIB_LEN PyBytes_GET_SIZE
570#define STRINGLIB_NEW PyBytes_FromStringAndSize
571#define STRINGLIB_STR PyBytes_AS_STRING
572/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
573
574#define STRINGLIB_EMPTY nullstring
575#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
576#define STRINGLIB_MUTABLE 0
Neal Norwitz6968b052007-02-27 19:02:19 +0000577
578#include "stringlib/fastsearch.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000579
Neal Norwitz6968b052007-02-27 19:02:19 +0000580#include "stringlib/count.h"
581#include "stringlib/find.h"
582#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000583#include "stringlib/ctype.h"
584#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000585
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000586#define _Py_InsertThousandsGrouping _PyBytes_InsertThousandsGrouping
Eric Smitha3b1ac82009-04-03 14:45:06 +0000587#define _Py_InsertThousandsGroupingLocale _PyBytes_InsertThousandsGroupingLocale
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000588#include "stringlib/localeutil.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000589
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000590PyObject *
591PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000592{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000593 static const char *hexdigits = "0123456789abcdef";
594 register PyBytesObject* op = (PyBytesObject*) obj;
595 Py_ssize_t length = Py_SIZE(op);
596 size_t newsize = 3 + 4 * length;
597 PyObject *v;
598 if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
599 PyErr_SetString(PyExc_OverflowError,
600 "bytes object is too large to make repr");
601 return NULL;
602 }
603 v = PyUnicode_FromUnicode(NULL, newsize);
604 if (v == NULL) {
605 return NULL;
606 }
607 else {
608 register Py_ssize_t i;
609 register Py_UNICODE c;
610 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
611 int quote;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000612
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000613 /* Figure out which quote to use; single is preferred */
614 quote = '\'';
615 if (smartquotes) {
616 char *test, *start;
617 start = PyBytes_AS_STRING(op);
618 for (test = start; test < start+length; ++test) {
619 if (*test == '"') {
620 quote = '\''; /* back to single */
621 goto decided;
622 }
623 else if (*test == '\'')
624 quote = '"';
625 }
626 decided:
627 ;
628 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000629
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000630 *p++ = 'b', *p++ = quote;
631 for (i = 0; i < length; i++) {
632 /* There's at least enough room for a hex escape
633 and a closing quote. */
634 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
635 c = op->ob_sval[i];
636 if (c == quote || c == '\\')
637 *p++ = '\\', *p++ = c;
638 else if (c == '\t')
639 *p++ = '\\', *p++ = 't';
640 else if (c == '\n')
641 *p++ = '\\', *p++ = 'n';
642 else if (c == '\r')
643 *p++ = '\\', *p++ = 'r';
644 else if (c < ' ' || c >= 0x7f) {
645 *p++ = '\\';
646 *p++ = 'x';
647 *p++ = hexdigits[(c & 0xf0) >> 4];
648 *p++ = hexdigits[c & 0xf];
649 }
650 else
651 *p++ = c;
652 }
653 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
654 *p++ = quote;
655 *p = '\0';
656 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
657 Py_DECREF(v);
658 return NULL;
659 }
660 return v;
661 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000662}
663
Neal Norwitz6968b052007-02-27 19:02:19 +0000664static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000665bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000666{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000667 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000668}
669
Neal Norwitz6968b052007-02-27 19:02:19 +0000670static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000671bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000672{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000673 if (Py_BytesWarningFlag) {
674 if (PyErr_WarnEx(PyExc_BytesWarning,
675 "str() on a bytes instance", 1))
676 return NULL;
677 }
678 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000679}
680
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000681static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000682bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000683{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000684 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000685}
Neal Norwitz6968b052007-02-27 19:02:19 +0000686
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000687/* This is also used by PyBytes_Concat() */
688static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000689bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000690{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000691 Py_ssize_t size;
692 Py_buffer va, vb;
693 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000694
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000695 va.len = -1;
696 vb.len = -1;
697 if (_getbuffer(a, &va) < 0 ||
698 _getbuffer(b, &vb) < 0) {
699 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
700 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
701 goto done;
702 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000703
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000704 /* Optimize end cases */
705 if (va.len == 0 && PyBytes_CheckExact(b)) {
706 result = b;
707 Py_INCREF(result);
708 goto done;
709 }
710 if (vb.len == 0 && PyBytes_CheckExact(a)) {
711 result = a;
712 Py_INCREF(result);
713 goto done;
714 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000715
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000716 size = va.len + vb.len;
717 if (size < 0) {
718 PyErr_NoMemory();
719 goto done;
720 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000721
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000722 result = PyBytes_FromStringAndSize(NULL, size);
723 if (result != NULL) {
724 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
725 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
726 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000727
728 done:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000729 if (va.len != -1)
730 PyBuffer_Release(&va);
731 if (vb.len != -1)
732 PyBuffer_Release(&vb);
733 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000734}
Neal Norwitz6968b052007-02-27 19:02:19 +0000735
736static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000737bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000738{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000739 register Py_ssize_t i;
740 register Py_ssize_t j;
741 register Py_ssize_t size;
742 register PyBytesObject *op;
743 size_t nbytes;
744 if (n < 0)
745 n = 0;
746 /* watch out for overflows: the size can overflow int,
747 * and the # of bytes needed can overflow size_t
748 */
749 size = Py_SIZE(a) * n;
750 if (n && size / n != Py_SIZE(a)) {
751 PyErr_SetString(PyExc_OverflowError,
752 "repeated bytes are too long");
753 return NULL;
754 }
755 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
756 Py_INCREF(a);
757 return (PyObject *)a;
758 }
759 nbytes = (size_t)size;
760 if (nbytes + PyBytesObject_SIZE <= nbytes) {
761 PyErr_SetString(PyExc_OverflowError,
762 "repeated bytes are too long");
763 return NULL;
764 }
765 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
766 if (op == NULL)
767 return PyErr_NoMemory();
768 PyObject_INIT_VAR(op, &PyBytes_Type, size);
769 op->ob_shash = -1;
770 op->ob_sval[size] = '\0';
771 if (Py_SIZE(a) == 1 && n > 0) {
772 memset(op->ob_sval, a->ob_sval[0] , n);
773 return (PyObject *) op;
774 }
775 i = 0;
776 if (i < size) {
777 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
778 i = Py_SIZE(a);
779 }
780 while (i < size) {
781 j = (i <= size-i) ? i : size-i;
782 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
783 i += j;
784 }
785 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000786}
787
Guido van Rossum98297ee2007-11-06 21:34:58 +0000788static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000789bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000790{
791 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
792 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroua57aae72010-06-09 16:58:35 +0000793 Py_buffer varg;
Antoine Pitroubc760d92010-08-15 17:46:50 +0000794 Py_ssize_t pos;
Antoine Pitroua57aae72010-06-09 16:58:35 +0000795 PyErr_Clear();
796 if (_getbuffer(arg, &varg) < 0)
797 return -1;
798 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
799 varg.buf, varg.len, 0);
800 PyBuffer_Release(&varg);
801 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000802 }
803 if (ival < 0 || ival >= 256) {
Antoine Pitroua57aae72010-06-09 16:58:35 +0000804 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
805 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000806 }
807
Antoine Pitroubc760d92010-08-15 17:46:50 +0000808 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000809}
810
Neal Norwitz6968b052007-02-27 19:02:19 +0000811static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000812bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000813{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000814 if (i < 0 || i >= Py_SIZE(a)) {
815 PyErr_SetString(PyExc_IndexError, "index out of range");
816 return NULL;
817 }
818 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000819}
820
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000821static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000822bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000823{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000824 int c;
825 Py_ssize_t len_a, len_b;
826 Py_ssize_t min_len;
827 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000828
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000829 /* Make sure both arguments are strings. */
830 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
831 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
832 (PyObject_IsInstance((PyObject*)a,
833 (PyObject*)&PyUnicode_Type) ||
834 PyObject_IsInstance((PyObject*)b,
835 (PyObject*)&PyUnicode_Type))) {
836 if (PyErr_WarnEx(PyExc_BytesWarning,
837 "Comparison between bytes and string", 1))
838 return NULL;
839 }
840 result = Py_NotImplemented;
841 goto out;
842 }
843 if (a == b) {
844 switch (op) {
845 case Py_EQ:case Py_LE:case Py_GE:
846 result = Py_True;
847 goto out;
848 case Py_NE:case Py_LT:case Py_GT:
849 result = Py_False;
850 goto out;
851 }
852 }
853 if (op == Py_EQ) {
854 /* Supporting Py_NE here as well does not save
855 much time, since Py_NE is rarely used. */
856 if (Py_SIZE(a) == Py_SIZE(b)
857 && (a->ob_sval[0] == b->ob_sval[0]
858 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
859 result = Py_True;
860 } else {
861 result = Py_False;
862 }
863 goto out;
864 }
865 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
866 min_len = (len_a < len_b) ? len_a : len_b;
867 if (min_len > 0) {
868 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
869 if (c==0)
870 c = memcmp(a->ob_sval, b->ob_sval, min_len);
871 } else
872 c = 0;
873 if (c == 0)
874 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
875 switch (op) {
876 case Py_LT: c = c < 0; break;
877 case Py_LE: c = c <= 0; break;
878 case Py_EQ: assert(0); break; /* unreachable */
879 case Py_NE: c = c != 0; break;
880 case Py_GT: c = c > 0; break;
881 case Py_GE: c = c >= 0; break;
882 default:
883 result = Py_NotImplemented;
884 goto out;
885 }
886 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000887 out:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000888 Py_INCREF(result);
889 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000890}
891
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000892static long
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000893bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000894{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000895 register Py_ssize_t len;
896 register unsigned char *p;
897 register long x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000898
Benjamin Peterson69e97272012-02-21 11:08:50 -0500899 assert(_Py_HashSecret_Initialized);
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000900 if (a->ob_shash != -1)
901 return a->ob_shash;
902 len = Py_SIZE(a);
Georg Brandl2daf6ae2012-02-20 19:54:16 +0100903 /*
904 We make the hash of the empty string be 0, rather than using
905 (prefix ^ suffix), since this slightly obfuscates the hash secret
906 */
907 if (len == 0) {
908 a->ob_shash = 0;
909 return 0;
910 }
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000911 p = (unsigned char *) a->ob_sval;
Georg Brandl2daf6ae2012-02-20 19:54:16 +0100912 x = _Py_HashSecret.prefix;
913 x ^= *p << 7;
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000914 while (--len >= 0)
915 x = (1000003*x) ^ *p++;
916 x ^= Py_SIZE(a);
Georg Brandl2daf6ae2012-02-20 19:54:16 +0100917 x ^= _Py_HashSecret.suffix;
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000918 if (x == -1)
919 x = -2;
920 a->ob_shash = x;
921 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000922}
923
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000924static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000925bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000926{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000927 if (PyIndex_Check(item)) {
928 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
929 if (i == -1 && PyErr_Occurred())
930 return NULL;
931 if (i < 0)
932 i += PyBytes_GET_SIZE(self);
933 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
934 PyErr_SetString(PyExc_IndexError,
935 "index out of range");
936 return NULL;
937 }
938 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
939 }
940 else if (PySlice_Check(item)) {
941 Py_ssize_t start, stop, step, slicelength, cur, i;
942 char* source_buf;
943 char* result_buf;
944 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000945
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000946 if (PySlice_GetIndicesEx((PySliceObject*)item,
947 PyBytes_GET_SIZE(self),
948 &start, &stop, &step, &slicelength) < 0) {
949 return NULL;
950 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000951
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000952 if (slicelength <= 0) {
953 return PyBytes_FromStringAndSize("", 0);
954 }
955 else if (start == 0 && step == 1 &&
956 slicelength == PyBytes_GET_SIZE(self) &&
957 PyBytes_CheckExact(self)) {
958 Py_INCREF(self);
959 return (PyObject *)self;
960 }
961 else if (step == 1) {
962 return PyBytes_FromStringAndSize(
963 PyBytes_AS_STRING(self) + start,
964 slicelength);
965 }
966 else {
967 source_buf = PyBytes_AS_STRING(self);
968 result = PyBytes_FromStringAndSize(NULL, slicelength);
969 if (result == NULL)
970 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000971
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000972 result_buf = PyBytes_AS_STRING(result);
973 for (cur = start, i = 0; i < slicelength;
974 cur += step, i++) {
975 result_buf[i] = source_buf[cur];
976 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000977
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000978 return result;
979 }
980 }
981 else {
982 PyErr_Format(PyExc_TypeError,
983 "byte indices must be integers, not %.200s",
984 Py_TYPE(item)->tp_name);
985 return NULL;
986 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000987}
988
989static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000990bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000991{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000992 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
993 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000994}
995
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000996static PySequenceMethods bytes_as_sequence = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000997 (lenfunc)bytes_length, /*sq_length*/
998 (binaryfunc)bytes_concat, /*sq_concat*/
999 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1000 (ssizeargfunc)bytes_item, /*sq_item*/
1001 0, /*sq_slice*/
1002 0, /*sq_ass_item*/
1003 0, /*sq_ass_slice*/
1004 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001005};
1006
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001007static PyMappingMethods bytes_as_mapping = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001008 (lenfunc)bytes_length,
1009 (binaryfunc)bytes_subscript,
1010 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001011};
1012
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001013static PyBufferProcs bytes_as_buffer = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001014 (getbufferproc)bytes_buffer_getbuffer,
1015 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001016};
1017
1018
1019#define LEFTSTRIP 0
1020#define RIGHTSTRIP 1
1021#define BOTHSTRIP 2
1022
1023/* Arrays indexed by above */
1024static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1025
1026#define STRIPNAME(i) (stripformat[i]+3)
1027
Neal Norwitz6968b052007-02-27 19:02:19 +00001028
1029/* Don't call if length < 2 */
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001030#define Py_STRING_MATCH(target, offset, pattern, length) \
1031 (target[offset] == pattern[0] && \
1032 target[offset+length-1] == pattern[length-1] && \
Neal Norwitz6968b052007-02-27 19:02:19 +00001033 !memcmp(target+offset+1, pattern+1, length-2) )
1034
1035
Neal Norwitz6968b052007-02-27 19:02:19 +00001036/* Overallocate the initial list to reduce the number of reallocs for small
1037 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1038 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1039 text (roughly 11 words per line) and field delimited data (usually 1-10
1040 fields). For large strings the split algorithms are bandwidth limited
1041 so increasing the preallocation likely will not improve things.*/
1042
1043#define MAX_PREALLOC 12
1044
1045/* 5 splits gives 6 elements */
1046#define PREALLOC_SIZE(maxsplit) \
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001047 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Neal Norwitz6968b052007-02-27 19:02:19 +00001048
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001049#define SPLIT_ADD(data, left, right) { \
1050 str = PyBytes_FromStringAndSize((data) + (left), \
1051 (right) - (left)); \
1052 if (str == NULL) \
1053 goto onError; \
1054 if (count < MAX_PREALLOC) { \
1055 PyList_SET_ITEM(list, count, str); \
1056 } else { \
1057 if (PyList_Append(list, str)) { \
1058 Py_DECREF(str); \
1059 goto onError; \
1060 } \
1061 else \
1062 Py_DECREF(str); \
1063 } \
1064 count++; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001065
1066/* Always force the list to the expected size. */
Christian Heimes90aa7642007-12-19 02:45:37 +00001067#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Neal Norwitz6968b052007-02-27 19:02:19 +00001068
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001069#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1070#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1071#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1072#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001073
1074Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001075split_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001076{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001077 const char *s = PyBytes_AS_STRING(self);
1078 Py_ssize_t i, j, count=0;
1079 PyObject *str;
1080 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001081
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001082 if (list == NULL)
1083 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001084
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001085 i = j = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00001086
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001087 while (maxsplit-- > 0) {
1088 SKIP_SPACE(s, i, len);
1089 if (i==len) break;
1090 j = i; i++;
1091 SKIP_NONSPACE(s, i, len);
1092 if (j == 0 && i == len && PyBytes_CheckExact(self)) {
1093 /* No whitespace in self, so just use it as list[0] */
1094 Py_INCREF(self);
1095 PyList_SET_ITEM(list, 0, (PyObject *)self);
1096 count++;
1097 break;
1098 }
1099 SPLIT_ADD(s, j, i);
1100 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001101
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001102 if (i < len) {
1103 /* Only occurs when maxsplit was reached */
1104 /* Skip any remaining whitespace and copy to end of string */
1105 SKIP_SPACE(s, i, len);
1106 if (i != len)
1107 SPLIT_ADD(s, i, len);
1108 }
1109 FIX_PREALLOC_SIZE(list);
1110 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001111 onError:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001112 Py_DECREF(list);
1113 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001114}
1115
Guido van Rossum8f950672007-09-10 16:53:45 +00001116Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001117split_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001118{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001119 const char *s = PyBytes_AS_STRING(self);
1120 register Py_ssize_t i, j, count=0;
1121 PyObject *str;
1122 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001123
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001124 if (list == NULL)
1125 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001126
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001127 i = j = 0;
1128 while ((j < len) && (maxcount-- > 0)) {
1129 for(; j<len; j++) {
1130 /* I found that using memchr makes no difference */
1131 if (s[j] == ch) {
1132 SPLIT_ADD(s, i, j);
1133 i = j = j + 1;
1134 break;
1135 }
1136 }
1137 }
1138 if (i == 0 && count == 0 && PyBytes_CheckExact(self)) {
1139 /* ch not in self, so just use self as list[0] */
1140 Py_INCREF(self);
1141 PyList_SET_ITEM(list, 0, (PyObject *)self);
1142 count++;
1143 }
1144 else if (i <= len) {
1145 SPLIT_ADD(s, i, len);
1146 }
1147 FIX_PREALLOC_SIZE(list);
1148 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001149
1150 onError:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001151 Py_DECREF(list);
1152 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001153}
1154
Neal Norwitz6968b052007-02-27 19:02:19 +00001155PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001156"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001157\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001158Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001159If sep is not specified or is None, B is split on ASCII whitespace\n\
1160characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001161If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001162
1163static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001164bytes_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001165{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001166 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1167 Py_ssize_t maxsplit = -1, count=0;
1168 const char *s = PyBytes_AS_STRING(self), *sub;
1169 Py_buffer vsub;
1170 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001171#ifdef USE_FAST
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001172 Py_ssize_t pos;
Neal Norwitz6968b052007-02-27 19:02:19 +00001173#endif
1174
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001175 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1176 return NULL;
1177 if (maxsplit < 0)
1178 maxsplit = PY_SSIZE_T_MAX;
1179 if (subobj == Py_None)
1180 return split_whitespace(self, len, maxsplit);
1181 if (_getbuffer(subobj, &vsub) < 0)
1182 return NULL;
1183 sub = vsub.buf;
1184 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001185
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001186 if (n == 0) {
1187 PyErr_SetString(PyExc_ValueError, "empty separator");
1188 PyBuffer_Release(&vsub);
1189 return NULL;
1190 }
1191 else if (n == 1) {
1192 list = split_char(self, len, sub[0], maxsplit);
1193 PyBuffer_Release(&vsub);
1194 return list;
1195 }
Guido van Rossum8f950672007-09-10 16:53:45 +00001196
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001197 list = PyList_New(PREALLOC_SIZE(maxsplit));
1198 if (list == NULL) {
1199 PyBuffer_Release(&vsub);
1200 return NULL;
1201 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001202
1203#ifdef USE_FAST
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001204 i = j = 0;
1205 while (maxsplit-- > 0) {
1206 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1207 if (pos < 0)
1208 break;
1209 j = i+pos;
1210 SPLIT_ADD(s, i, j);
1211 i = j + n;
1212 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001213#else
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001214 i = j = 0;
1215 while ((j+n <= len) && (maxsplit-- > 0)) {
1216 for (; j+n <= len; j++) {
1217 if (Py_STRING_MATCH(s, j, sub, n)) {
1218 SPLIT_ADD(s, i, j);
1219 i = j = j + n;
1220 break;
1221 }
1222 }
1223 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001224#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001225 SPLIT_ADD(s, i, len);
1226 FIX_PREALLOC_SIZE(list);
1227 PyBuffer_Release(&vsub);
1228 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001229
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001230 onError:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001231 Py_DECREF(list);
1232 PyBuffer_Release(&vsub);
1233 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001234}
1235
Neal Norwitz6968b052007-02-27 19:02:19 +00001236PyDoc_STRVAR(partition__doc__,
1237"B.partition(sep) -> (head, sep, tail)\n\
1238\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001239Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001240the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001241found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001242
1243static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001244bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001245{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001246 const char *sep;
1247 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001248
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001249 if (PyBytes_Check(sep_obj)) {
1250 sep = PyBytes_AS_STRING(sep_obj);
1251 sep_len = PyBytes_GET_SIZE(sep_obj);
1252 }
1253 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1254 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001255
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001256 return stringlib_partition(
1257 (PyObject*) self,
1258 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1259 sep_obj, sep, sep_len
1260 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001261}
1262
1263PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti4c81fbb2010-01-25 12:02:24 +00001264"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001265\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001266Search for the separator sep in B, starting at the end of B,\n\
1267and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001268part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001269bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001270
1271static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001272bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001273{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001274 const char *sep;
1275 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001276
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001277 if (PyBytes_Check(sep_obj)) {
1278 sep = PyBytes_AS_STRING(sep_obj);
1279 sep_len = PyBytes_GET_SIZE(sep_obj);
1280 }
1281 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1282 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001283
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001284 return stringlib_rpartition(
1285 (PyObject*) self,
1286 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1287 sep_obj, sep, sep_len
1288 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001289}
1290
1291Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001292rsplit_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001293{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001294 const char *s = PyBytes_AS_STRING(self);
1295 Py_ssize_t i, j, count=0;
1296 PyObject *str;
1297 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001298
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001299 if (list == NULL)
1300 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001301
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001302 i = j = len-1;
Neal Norwitz6968b052007-02-27 19:02:19 +00001303
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001304 while (maxsplit-- > 0) {
1305 RSKIP_SPACE(s, i);
1306 if (i<0) break;
1307 j = i; i--;
1308 RSKIP_NONSPACE(s, i);
1309 if (j == len-1 && i < 0 && PyBytes_CheckExact(self)) {
1310 /* No whitespace in self, so just use it as list[0] */
1311 Py_INCREF(self);
1312 PyList_SET_ITEM(list, 0, (PyObject *)self);
1313 count++;
1314 break;
1315 }
1316 SPLIT_ADD(s, i + 1, j + 1);
1317 }
1318 if (i >= 0) {
1319 /* Only occurs when maxsplit was reached. Skip any remaining
1320 whitespace and copy to beginning of string. */
1321 RSKIP_SPACE(s, i);
1322 if (i >= 0)
1323 SPLIT_ADD(s, 0, i + 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001324
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001325 }
1326 FIX_PREALLOC_SIZE(list);
1327 if (PyList_Reverse(list) < 0)
1328 goto onError;
1329 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001330 onError:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001331 Py_DECREF(list);
1332 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001333}
1334
Guido van Rossum8f950672007-09-10 16:53:45 +00001335Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001336rsplit_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001337{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001338 const char *s = PyBytes_AS_STRING(self);
1339 register Py_ssize_t i, j, count=0;
1340 PyObject *str;
1341 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001342
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001343 if (list == NULL)
1344 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001345
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001346 i = j = len - 1;
1347 while ((i >= 0) && (maxcount-- > 0)) {
1348 for (; i >= 0; i--) {
1349 if (s[i] == ch) {
1350 SPLIT_ADD(s, i + 1, j + 1);
1351 j = i = i - 1;
1352 break;
1353 }
1354 }
1355 }
1356 if (i < 0 && count == 0 && PyBytes_CheckExact(self)) {
1357 /* ch not in self, so just use self as list[0] */
1358 Py_INCREF(self);
1359 PyList_SET_ITEM(list, 0, (PyObject *)self);
1360 count++;
1361 }
1362 else if (j >= -1) {
1363 SPLIT_ADD(s, 0, j + 1);
1364 }
1365 FIX_PREALLOC_SIZE(list);
1366 if (PyList_Reverse(list) < 0)
1367 goto onError;
1368 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001369
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001370 onError:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001371 Py_DECREF(list);
1372 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001373}
1374
Neal Norwitz6968b052007-02-27 19:02:19 +00001375PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001376"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001377\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001378Return a list of the sections in B, using sep as the delimiter,\n\
1379starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001380If sep is not given, B is split on ASCII whitespace characters\n\
1381(space, tab, return, newline, formfeed, vertical tab).\n\
1382If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001383
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001384
Neal Norwitz6968b052007-02-27 19:02:19 +00001385static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001386bytes_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001387{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001388 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1389 Py_ssize_t maxsplit = -1, count=0;
1390 const char *s, *sub;
1391 Py_buffer vsub;
1392 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001393
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001394 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1395 return NULL;
1396 if (maxsplit < 0)
1397 maxsplit = PY_SSIZE_T_MAX;
1398 if (subobj == Py_None)
1399 return rsplit_whitespace(self, len, maxsplit);
1400 if (_getbuffer(subobj, &vsub) < 0)
1401 return NULL;
1402 sub = vsub.buf;
1403 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001404
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001405 if (n == 0) {
1406 PyErr_SetString(PyExc_ValueError, "empty separator");
1407 PyBuffer_Release(&vsub);
1408 return NULL;
1409 }
1410 else if (n == 1) {
1411 list = rsplit_char(self, len, sub[0], maxsplit);
1412 PyBuffer_Release(&vsub);
1413 return list;
1414 }
Guido van Rossum8f950672007-09-10 16:53:45 +00001415
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001416 list = PyList_New(PREALLOC_SIZE(maxsplit));
1417 if (list == NULL) {
1418 PyBuffer_Release(&vsub);
1419 return NULL;
1420 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001421
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001422 j = len;
1423 i = j - n;
Neal Norwitz6968b052007-02-27 19:02:19 +00001424
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001425 s = PyBytes_AS_STRING(self);
1426 while ( (i >= 0) && (maxsplit-- > 0) ) {
1427 for (; i>=0; i--) {
1428 if (Py_STRING_MATCH(s, i, sub, n)) {
1429 SPLIT_ADD(s, i + n, j);
1430 j = i;
1431 i -= n;
1432 break;
1433 }
1434 }
1435 }
1436 SPLIT_ADD(s, 0, j);
1437 FIX_PREALLOC_SIZE(list);
1438 if (PyList_Reverse(list) < 0)
1439 goto onError;
1440 PyBuffer_Release(&vsub);
1441 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001442
1443onError:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001444 Py_DECREF(list);
1445 PyBuffer_Release(&vsub);
1446 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001447}
1448
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001449#undef SPLIT_ADD
1450#undef MAX_PREALLOC
1451#undef PREALLOC_SIZE
1452
1453
1454PyDoc_STRVAR(join__doc__,
1455"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001456\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001457Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001458Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1459
Neal Norwitz6968b052007-02-27 19:02:19 +00001460static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001461bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001462{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001463 char *sep = PyBytes_AS_STRING(self);
1464 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1465 PyObject *res = NULL;
1466 char *p;
1467 Py_ssize_t seqlen = 0;
1468 size_t sz = 0;
1469 Py_ssize_t i;
1470 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001471
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001472 seq = PySequence_Fast(orig, "");
1473 if (seq == NULL) {
1474 return NULL;
1475 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001476
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001477 seqlen = PySequence_Size(seq);
1478 if (seqlen == 0) {
1479 Py_DECREF(seq);
1480 return PyBytes_FromString("");
1481 }
1482 if (seqlen == 1) {
1483 item = PySequence_Fast_GET_ITEM(seq, 0);
1484 if (PyBytes_CheckExact(item)) {
1485 Py_INCREF(item);
1486 Py_DECREF(seq);
1487 return item;
1488 }
1489 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001490
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001491 /* There are at least two things to join, or else we have a subclass
1492 * of the builtin types in the sequence.
1493 * Do a pre-pass to figure out the total amount of space we'll
1494 * need (sz), and see whether all argument are bytes.
1495 */
1496 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1497 for (i = 0; i < seqlen; i++) {
1498 const size_t old_sz = sz;
1499 item = PySequence_Fast_GET_ITEM(seq, i);
1500 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1501 PyErr_Format(PyExc_TypeError,
1502 "sequence item %zd: expected bytes,"
1503 " %.80s found",
1504 i, Py_TYPE(item)->tp_name);
1505 Py_DECREF(seq);
1506 return NULL;
1507 }
1508 sz += Py_SIZE(item);
1509 if (i != 0)
1510 sz += seplen;
1511 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1512 PyErr_SetString(PyExc_OverflowError,
1513 "join() result is too long for bytes");
1514 Py_DECREF(seq);
1515 return NULL;
1516 }
1517 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001518
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001519 /* Allocate result space. */
1520 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1521 if (res == NULL) {
1522 Py_DECREF(seq);
1523 return NULL;
1524 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001525
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001526 /* Catenate everything. */
1527 /* I'm not worried about a PyByteArray item growing because there's
1528 nowhere in this function where we release the GIL. */
1529 p = PyBytes_AS_STRING(res);
1530 for (i = 0; i < seqlen; ++i) {
1531 size_t n;
1532 char *q;
1533 if (i) {
1534 Py_MEMCPY(p, sep, seplen);
1535 p += seplen;
1536 }
1537 item = PySequence_Fast_GET_ITEM(seq, i);
1538 n = Py_SIZE(item);
1539 if (PyBytes_Check(item))
1540 q = PyBytes_AS_STRING(item);
1541 else
1542 q = PyByteArray_AS_STRING(item);
1543 Py_MEMCPY(p, q, n);
1544 p += n;
1545 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001546
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001547 Py_DECREF(seq);
1548 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001549}
1550
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001551PyObject *
1552_PyBytes_Join(PyObject *sep, PyObject *x)
1553{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001554 assert(sep != NULL && PyBytes_Check(sep));
1555 assert(x != NULL);
1556 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001557}
1558
1559Py_LOCAL_INLINE(void)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001560bytes_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001561{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001562 if (*end > len)
1563 *end = len;
1564 else if (*end < 0)
1565 *end += len;
1566 if (*end < 0)
1567 *end = 0;
1568 if (*start < 0)
1569 *start += len;
1570 if (*start < 0)
1571 *start = 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001572}
1573
1574Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001575bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001576{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001577 PyObject *subobj;
1578 const char *sub;
1579 Py_ssize_t sub_len;
1580 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001581
Jesus Ceaac451502011-04-20 17:09:23 +02001582 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1583 args, &subobj, &start, &end))
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001584 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001585
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001586 if (PyBytes_Check(subobj)) {
1587 sub = PyBytes_AS_STRING(subobj);
1588 sub_len = PyBytes_GET_SIZE(subobj);
1589 }
1590 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1591 /* XXX - the "expected a character buffer object" is pretty
1592 confusing for a non-expert. remap to something else ? */
1593 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001594
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001595 if (dir > 0)
1596 return stringlib_find_slice(
1597 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1598 sub, sub_len, start, end);
1599 else
1600 return stringlib_rfind_slice(
1601 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1602 sub, sub_len, start, end);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001603}
1604
1605
1606PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001607"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001608\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001609Return the lowest index in S where substring sub is found,\n\
1610such that sub is contained within s[start:end]. Optional\n\
1611arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001612\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001613Return -1 on failure.");
1614
Neal Norwitz6968b052007-02-27 19:02:19 +00001615static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001616bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001617{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001618 Py_ssize_t result = bytes_find_internal(self, args, +1);
1619 if (result == -2)
1620 return NULL;
1621 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001622}
1623
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001624
1625PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001626"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001627\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001628Like B.find() but raise ValueError when the substring is not found.");
1629
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001630static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001631bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001632{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001633 Py_ssize_t result = bytes_find_internal(self, args, +1);
1634 if (result == -2)
1635 return NULL;
1636 if (result == -1) {
1637 PyErr_SetString(PyExc_ValueError,
1638 "substring not found");
1639 return NULL;
1640 }
1641 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001642}
1643
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001644
1645PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001646"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001647\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001648Return the highest index in B where substring sub is found,\n\
1649such that sub is contained within s[start:end]. Optional\n\
1650arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001651\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001652Return -1 on failure.");
1653
Neal Norwitz6968b052007-02-27 19:02:19 +00001654static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001655bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001656{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001657 Py_ssize_t result = bytes_find_internal(self, args, -1);
1658 if (result == -2)
1659 return NULL;
1660 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001661}
1662
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001663
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001664PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001665"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001666\n\
1667Like B.rfind() but raise ValueError when the substring is not found.");
1668
1669static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001670bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001671{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001672 Py_ssize_t result = bytes_find_internal(self, args, -1);
1673 if (result == -2)
1674 return NULL;
1675 if (result == -1) {
1676 PyErr_SetString(PyExc_ValueError,
1677 "substring not found");
1678 return NULL;
1679 }
1680 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001681}
1682
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001683
1684Py_LOCAL_INLINE(PyObject *)
1685do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001686{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001687 Py_buffer vsep;
1688 char *s = PyBytes_AS_STRING(self);
1689 Py_ssize_t len = PyBytes_GET_SIZE(self);
1690 char *sep;
1691 Py_ssize_t seplen;
1692 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001693
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001694 if (_getbuffer(sepobj, &vsep) < 0)
1695 return NULL;
1696 sep = vsep.buf;
1697 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001698
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001699 i = 0;
1700 if (striptype != RIGHTSTRIP) {
1701 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1702 i++;
1703 }
1704 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001705
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001706 j = len;
1707 if (striptype != LEFTSTRIP) {
1708 do {
1709 j--;
1710 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1711 j++;
1712 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001713
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001714 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001715
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001716 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1717 Py_INCREF(self);
1718 return (PyObject*)self;
1719 }
1720 else
1721 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001722}
1723
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001724
1725Py_LOCAL_INLINE(PyObject *)
1726do_strip(PyBytesObject *self, int striptype)
1727{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001728 char *s = PyBytes_AS_STRING(self);
1729 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001730
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001731 i = 0;
1732 if (striptype != RIGHTSTRIP) {
1733 while (i < len && ISSPACE(s[i])) {
1734 i++;
1735 }
1736 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001737
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001738 j = len;
1739 if (striptype != LEFTSTRIP) {
1740 do {
1741 j--;
1742 } while (j >= i && ISSPACE(s[j]));
1743 j++;
1744 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001745
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001746 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1747 Py_INCREF(self);
1748 return (PyObject*)self;
1749 }
1750 else
1751 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001752}
1753
1754
1755Py_LOCAL_INLINE(PyObject *)
1756do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1757{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001758 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001759
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001760 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1761 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001762
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001763 if (sep != NULL && sep != Py_None) {
1764 return do_xstrip(self, striptype, sep);
1765 }
1766 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001767}
1768
1769
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001770PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001771"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001772\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001773Strip leading and trailing bytes contained in the argument.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001774If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001775static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001776bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001777{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001778 if (PyTuple_GET_SIZE(args) == 0)
1779 return do_strip(self, BOTHSTRIP); /* Common case */
1780 else
1781 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001782}
1783
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001784
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001785PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001786"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001787\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001788Strip leading bytes contained in the argument.\n\
1789If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001790static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001791bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001792{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001793 if (PyTuple_GET_SIZE(args) == 0)
1794 return do_strip(self, LEFTSTRIP); /* Common case */
1795 else
1796 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001797}
1798
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001799
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001800PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001801"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001802\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001803Strip trailing bytes contained in the argument.\n\
1804If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001805static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001806bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001807{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001808 if (PyTuple_GET_SIZE(args) == 0)
1809 return do_strip(self, RIGHTSTRIP); /* Common case */
1810 else
1811 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001812}
Neal Norwitz6968b052007-02-27 19:02:19 +00001813
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001814
1815PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001816"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001817\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001818Return the number of non-overlapping occurrences of substring sub in\n\
1819string S[start:end]. Optional arguments start and end are interpreted\n\
1820as in slice notation.");
1821
1822static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001823bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001824{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001825 PyObject *sub_obj;
1826 const char *str = PyBytes_AS_STRING(self), *sub;
1827 Py_ssize_t sub_len;
1828 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001829
Jesus Ceaac451502011-04-20 17:09:23 +02001830 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001831 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001832
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001833 if (PyBytes_Check(sub_obj)) {
1834 sub = PyBytes_AS_STRING(sub_obj);
1835 sub_len = PyBytes_GET_SIZE(sub_obj);
1836 }
1837 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1838 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001839
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001840 bytes_adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001841
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001842 return PyLong_FromSsize_t(
1843 stringlib_count(str + start, end - start, sub, sub_len)
1844 );
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001845}
1846
1847
1848PyDoc_STRVAR(translate__doc__,
1849"B.translate(table[, deletechars]) -> bytes\n\
1850\n\
1851Return a copy of B, where all characters occurring in the\n\
1852optional argument deletechars are removed, and the remaining\n\
1853characters have been mapped through the given translation\n\
1854table, which must be a bytes object of length 256.");
1855
1856static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001857bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001858{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001859 register char *input, *output;
1860 const char *table;
1861 register Py_ssize_t i, c, changed = 0;
1862 PyObject *input_obj = (PyObject*)self;
1863 const char *output_start, *del_table=NULL;
1864 Py_ssize_t inlen, tablen, dellen = 0;
1865 PyObject *result;
1866 int trans_table[256];
1867 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001868
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001869 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1870 &tableobj, &delobj))
1871 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001872
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001873 if (PyBytes_Check(tableobj)) {
1874 table = PyBytes_AS_STRING(tableobj);
1875 tablen = PyBytes_GET_SIZE(tableobj);
1876 }
1877 else if (tableobj == Py_None) {
1878 table = NULL;
1879 tablen = 256;
1880 }
1881 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1882 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001883
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001884 if (tablen != 256) {
1885 PyErr_SetString(PyExc_ValueError,
1886 "translation table must be 256 characters long");
1887 return NULL;
1888 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001889
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001890 if (delobj != NULL) {
1891 if (PyBytes_Check(delobj)) {
1892 del_table = PyBytes_AS_STRING(delobj);
1893 dellen = PyBytes_GET_SIZE(delobj);
1894 }
1895 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1896 return NULL;
1897 }
1898 else {
1899 del_table = NULL;
1900 dellen = 0;
1901 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001902
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001903 inlen = PyBytes_GET_SIZE(input_obj);
1904 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1905 if (result == NULL)
1906 return NULL;
1907 output_start = output = PyBytes_AsString(result);
1908 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001909
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001910 if (dellen == 0 && table != NULL) {
1911 /* If no deletions are required, use faster code */
1912 for (i = inlen; --i >= 0; ) {
1913 c = Py_CHARMASK(*input++);
1914 if (Py_CHARMASK((*output++ = table[c])) != c)
1915 changed = 1;
1916 }
1917 if (changed || !PyBytes_CheckExact(input_obj))
1918 return result;
1919 Py_DECREF(result);
1920 Py_INCREF(input_obj);
1921 return input_obj;
1922 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001923
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001924 if (table == NULL) {
1925 for (i = 0; i < 256; i++)
1926 trans_table[i] = Py_CHARMASK(i);
1927 } else {
1928 for (i = 0; i < 256; i++)
1929 trans_table[i] = Py_CHARMASK(table[i]);
1930 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001931
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001932 for (i = 0; i < dellen; i++)
1933 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001934
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001935 for (i = inlen; --i >= 0; ) {
1936 c = Py_CHARMASK(*input++);
1937 if (trans_table[c] != -1)
1938 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1939 continue;
1940 changed = 1;
1941 }
1942 if (!changed && PyBytes_CheckExact(input_obj)) {
1943 Py_DECREF(result);
1944 Py_INCREF(input_obj);
1945 return input_obj;
1946 }
1947 /* Fix the size of the resulting string */
1948 if (inlen > 0)
1949 _PyBytes_Resize(&result, output - output_start);
1950 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001951}
1952
1953
Georg Brandlabc38772009-04-12 15:51:51 +00001954static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001955bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001956{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001957 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001958}
1959
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001960#define FORWARD 1
1961#define REVERSE -1
1962
1963/* find and count characters and substrings */
1964
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001965#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001966 ((char *)memchr((const void *)(target), c, target_len))
1967
1968/* String ops must return a string. */
1969/* If the object is subclass of string, create a copy */
1970Py_LOCAL(PyBytesObject *)
1971return_self(PyBytesObject *self)
1972{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001973 if (PyBytes_CheckExact(self)) {
1974 Py_INCREF(self);
1975 return self;
1976 }
1977 return (PyBytesObject *)PyBytes_FromStringAndSize(
1978 PyBytes_AS_STRING(self),
1979 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001980}
1981
1982Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitroubc760d92010-08-15 17:46:50 +00001983countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001984{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001985 Py_ssize_t count=0;
1986 const char *start=target;
1987 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001988
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001989 while ( (start=findchar(start, end-start, c)) != NULL ) {
1990 count++;
1991 if (count >= maxcount)
1992 break;
1993 start += 1;
1994 }
1995 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001996}
1997
1998Py_LOCAL(Py_ssize_t)
1999findstring(const char *target, Py_ssize_t target_len,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002000 const char *pattern, Py_ssize_t pattern_len,
2001 Py_ssize_t start,
2002 Py_ssize_t end,
2003 int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002004{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002005 if (start < 0) {
2006 start += target_len;
2007 if (start < 0)
2008 start = 0;
2009 }
2010 if (end > target_len) {
2011 end = target_len;
2012 } else if (end < 0) {
2013 end += target_len;
2014 if (end < 0)
2015 end = 0;
2016 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002017
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002018 /* zero-length substrings always match at the first attempt */
2019 if (pattern_len == 0)
2020 return (direction > 0) ? start : end;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002021
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002022 end -= pattern_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002023
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002024 if (direction < 0) {
2025 for (; end >= start; end--)
2026 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2027 return end;
2028 } else {
2029 for (; start <= end; start++)
2030 if (Py_STRING_MATCH(target, start,pattern,pattern_len))
2031 return start;
2032 }
2033 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002034}
2035
2036Py_LOCAL_INLINE(Py_ssize_t)
2037countstring(const char *target, Py_ssize_t target_len,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002038 const char *pattern, Py_ssize_t pattern_len,
2039 Py_ssize_t start,
2040 Py_ssize_t end,
2041 int direction, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002042{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002043 Py_ssize_t count=0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002044
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002045 if (start < 0) {
2046 start += target_len;
2047 if (start < 0)
2048 start = 0;
2049 }
2050 if (end > target_len) {
2051 end = target_len;
2052 } else if (end < 0) {
2053 end += target_len;
2054 if (end < 0)
2055 end = 0;
2056 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002057
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002058 /* zero-length substrings match everywhere */
2059 if (pattern_len == 0 || maxcount == 0) {
2060 if (target_len+1 < maxcount)
2061 return target_len+1;
2062 return maxcount;
2063 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002064
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002065 end -= pattern_len;
2066 if (direction < 0) {
2067 for (; (end >= start); end--)
2068 if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
2069 count++;
2070 if (--maxcount <= 0) break;
2071 end -= pattern_len-1;
2072 }
2073 } else {
2074 for (; (start <= end); start++)
2075 if (Py_STRING_MATCH(target, start,
2076 pattern, pattern_len)) {
2077 count++;
2078 if (--maxcount <= 0)
2079 break;
2080 start += pattern_len-1;
2081 }
2082 }
2083 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002084}
2085
2086
2087/* Algorithms for different cases of string replacement */
2088
2089/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2090Py_LOCAL(PyBytesObject *)
2091replace_interleave(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002092 const char *to_s, Py_ssize_t to_len,
2093 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002094{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002095 char *self_s, *result_s;
2096 Py_ssize_t self_len, result_len;
2097 Py_ssize_t count, i, product;
2098 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002099
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002100 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002101
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002102 /* 1 at the end plus 1 after every character */
2103 count = self_len+1;
2104 if (maxcount < count)
2105 count = maxcount;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002106
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002107 /* Check for overflow */
2108 /* result_len = count * to_len + self_len; */
2109 product = count * to_len;
2110 if (product / to_len != count) {
2111 PyErr_SetString(PyExc_OverflowError,
2112 "replacement bytes are too long");
2113 return NULL;
2114 }
2115 result_len = product + self_len;
2116 if (result_len < 0) {
2117 PyErr_SetString(PyExc_OverflowError,
2118 "replacement bytes are too long");
2119 return NULL;
2120 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002121
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002122 if (! (result = (PyBytesObject *)
2123 PyBytes_FromStringAndSize(NULL, result_len)) )
2124 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002125
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002126 self_s = PyBytes_AS_STRING(self);
2127 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002128
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002129 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002130
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002131 /* Lay the first one down (guaranteed this will occur) */
2132 Py_MEMCPY(result_s, to_s, to_len);
2133 result_s += to_len;
2134 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002135
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002136 for (i=0; i<count; i++) {
2137 *result_s++ = *self_s++;
2138 Py_MEMCPY(result_s, to_s, to_len);
2139 result_s += to_len;
2140 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002141
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002142 /* Copy the rest of the original string */
2143 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002144
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002145 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002146}
2147
2148/* Special case for deleting a single character */
2149/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2150Py_LOCAL(PyBytesObject *)
2151replace_delete_single_character(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002152 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002153{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002154 char *self_s, *result_s;
2155 char *start, *next, *end;
2156 Py_ssize_t self_len, result_len;
2157 Py_ssize_t count;
2158 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002159
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002160 self_len = PyBytes_GET_SIZE(self);
2161 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002162
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002163 count = countchar(self_s, self_len, from_c, maxcount);
2164 if (count == 0) {
2165 return return_self(self);
2166 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002167
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002168 result_len = self_len - count; /* from_len == 1 */
2169 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002170
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002171 if ( (result = (PyBytesObject *)
2172 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2173 return NULL;
2174 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002175
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002176 start = self_s;
2177 end = self_s + self_len;
2178 while (count-- > 0) {
2179 next = findchar(start, end-start, from_c);
2180 if (next == NULL)
2181 break;
2182 Py_MEMCPY(result_s, start, next-start);
2183 result_s += (next-start);
2184 start = next+1;
2185 }
2186 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002187
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002188 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002189}
2190
2191/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2192
2193Py_LOCAL(PyBytesObject *)
2194replace_delete_substring(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002195 const char *from_s, Py_ssize_t from_len,
2196 Py_ssize_t maxcount) {
2197 char *self_s, *result_s;
2198 char *start, *next, *end;
2199 Py_ssize_t self_len, result_len;
2200 Py_ssize_t count, offset;
2201 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002202
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002203 self_len = PyBytes_GET_SIZE(self);
2204 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002205
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002206 count = countstring(self_s, self_len,
2207 from_s, from_len,
2208 0, self_len, 1,
2209 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002210
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002211 if (count == 0) {
2212 /* no matches */
2213 return return_self(self);
2214 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002215
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002216 result_len = self_len - (count * from_len);
2217 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002218
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002219 if ( (result = (PyBytesObject *)
2220 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2221 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002222
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002223 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002224
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002225 start = self_s;
2226 end = self_s + self_len;
2227 while (count-- > 0) {
2228 offset = findstring(start, end-start,
2229 from_s, from_len,
2230 0, end-start, FORWARD);
2231 if (offset == -1)
2232 break;
2233 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002234
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002235 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002236
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002237 result_s += (next-start);
2238 start = next+from_len;
2239 }
2240 Py_MEMCPY(result_s, start, end-start);
2241 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002242}
2243
2244/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2245Py_LOCAL(PyBytesObject *)
2246replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002247 char from_c, char to_c,
2248 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002249{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002250 char *self_s, *result_s, *start, *end, *next;
2251 Py_ssize_t self_len;
2252 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002253
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002254 /* The result string will be the same size */
2255 self_s = PyBytes_AS_STRING(self);
2256 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002257
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002258 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002259
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002260 if (next == NULL) {
2261 /* No matches; return the original string */
2262 return return_self(self);
2263 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002264
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002265 /* Need to make a new string */
2266 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2267 if (result == NULL)
2268 return NULL;
2269 result_s = PyBytes_AS_STRING(result);
2270 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002271
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002272 /* change everything in-place, starting with this one */
2273 start = result_s + (next-self_s);
2274 *start = to_c;
2275 start++;
2276 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002277
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002278 while (--maxcount > 0) {
2279 next = findchar(start, end-start, from_c);
2280 if (next == NULL)
2281 break;
2282 *next = to_c;
2283 start = next+1;
2284 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002285
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002286 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002287}
2288
2289/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2290Py_LOCAL(PyBytesObject *)
2291replace_substring_in_place(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002292 const char *from_s, Py_ssize_t from_len,
2293 const char *to_s, Py_ssize_t to_len,
2294 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002295{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002296 char *result_s, *start, *end;
2297 char *self_s;
2298 Py_ssize_t self_len, offset;
2299 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002300
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002301 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002302
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002303 self_s = PyBytes_AS_STRING(self);
2304 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002305
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002306 offset = findstring(self_s, self_len,
2307 from_s, from_len,
2308 0, self_len, FORWARD);
2309 if (offset == -1) {
2310 /* No matches; return the original string */
2311 return return_self(self);
2312 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002313
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002314 /* Need to make a new string */
2315 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2316 if (result == NULL)
2317 return NULL;
2318 result_s = PyBytes_AS_STRING(result);
2319 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002320
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002321 /* change everything in-place, starting with this one */
2322 start = result_s + offset;
2323 Py_MEMCPY(start, to_s, from_len);
2324 start += from_len;
2325 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002326
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002327 while ( --maxcount > 0) {
2328 offset = findstring(start, end-start,
2329 from_s, from_len,
2330 0, end-start, FORWARD);
2331 if (offset==-1)
2332 break;
2333 Py_MEMCPY(start+offset, to_s, from_len);
2334 start += offset+from_len;
2335 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002336
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002337 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002338}
2339
2340/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2341Py_LOCAL(PyBytesObject *)
2342replace_single_character(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002343 char from_c,
2344 const char *to_s, Py_ssize_t to_len,
2345 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002346{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002347 char *self_s, *result_s;
2348 char *start, *next, *end;
2349 Py_ssize_t self_len, result_len;
2350 Py_ssize_t count, product;
2351 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002352
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002353 self_s = PyBytes_AS_STRING(self);
2354 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002355
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002356 count = countchar(self_s, self_len, from_c, maxcount);
2357 if (count == 0) {
2358 /* no matches, return unchanged */
2359 return return_self(self);
2360 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002361
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002362 /* use the difference between current and new, hence the "-1" */
2363 /* result_len = self_len + count * (to_len-1) */
2364 product = count * (to_len-1);
2365 if (product / (to_len-1) != count) {
2366 PyErr_SetString(PyExc_OverflowError,
2367 "replacement bytes are too long");
2368 return NULL;
2369 }
2370 result_len = self_len + product;
2371 if (result_len < 0) {
2372 PyErr_SetString(PyExc_OverflowError,
2373 "replacment bytes are too long");
2374 return NULL;
2375 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002376
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002377 if ( (result = (PyBytesObject *)
2378 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2379 return NULL;
2380 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002381
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002382 start = self_s;
2383 end = self_s + self_len;
2384 while (count-- > 0) {
2385 next = findchar(start, end-start, from_c);
2386 if (next == NULL)
2387 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002388
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002389 if (next == start) {
2390 /* replace with the 'to' */
2391 Py_MEMCPY(result_s, to_s, to_len);
2392 result_s += to_len;
2393 start += 1;
2394 } else {
2395 /* copy the unchanged old then the 'to' */
2396 Py_MEMCPY(result_s, start, next-start);
2397 result_s += (next-start);
2398 Py_MEMCPY(result_s, to_s, to_len);
2399 result_s += to_len;
2400 start = next+1;
2401 }
2402 }
2403 /* Copy the remainder of the remaining string */
2404 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002405
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002406 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002407}
2408
2409/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2410Py_LOCAL(PyBytesObject *)
2411replace_substring(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002412 const char *from_s, Py_ssize_t from_len,
2413 const char *to_s, Py_ssize_t to_len,
2414 Py_ssize_t maxcount) {
2415 char *self_s, *result_s;
2416 char *start, *next, *end;
2417 Py_ssize_t self_len, result_len;
2418 Py_ssize_t count, offset, product;
2419 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002420
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002421 self_s = PyBytes_AS_STRING(self);
2422 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002423
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002424 count = countstring(self_s, self_len,
2425 from_s, from_len,
2426 0, self_len, FORWARD, maxcount);
2427 if (count == 0) {
2428 /* no matches, return unchanged */
2429 return return_self(self);
2430 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002431
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002432 /* Check for overflow */
2433 /* result_len = self_len + count * (to_len-from_len) */
2434 product = count * (to_len-from_len);
2435 if (product / (to_len-from_len) != count) {
2436 PyErr_SetString(PyExc_OverflowError,
2437 "replacement bytes are too long");
2438 return NULL;
2439 }
2440 result_len = self_len + product;
2441 if (result_len < 0) {
2442 PyErr_SetString(PyExc_OverflowError,
2443 "replacement bytes are too long");
2444 return NULL;
2445 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002446
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002447 if ( (result = (PyBytesObject *)
2448 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2449 return NULL;
2450 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002451
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002452 start = self_s;
2453 end = self_s + self_len;
2454 while (count-- > 0) {
2455 offset = findstring(start, end-start,
2456 from_s, from_len,
2457 0, end-start, FORWARD);
2458 if (offset == -1)
2459 break;
2460 next = start+offset;
2461 if (next == start) {
2462 /* replace with the 'to' */
2463 Py_MEMCPY(result_s, to_s, to_len);
2464 result_s += to_len;
2465 start += from_len;
2466 } else {
2467 /* copy the unchanged old then the 'to' */
2468 Py_MEMCPY(result_s, start, next-start);
2469 result_s += (next-start);
2470 Py_MEMCPY(result_s, to_s, to_len);
2471 result_s += to_len;
2472 start = next+from_len;
2473 }
2474 }
2475 /* Copy the remainder of the remaining string */
2476 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002477
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002478 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002479}
2480
2481
2482Py_LOCAL(PyBytesObject *)
2483replace(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002484 const char *from_s, Py_ssize_t from_len,
2485 const char *to_s, Py_ssize_t to_len,
2486 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002487{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002488 if (maxcount < 0) {
2489 maxcount = PY_SSIZE_T_MAX;
2490 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2491 /* nothing to do; return the original string */
2492 return return_self(self);
2493 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002494
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002495 if (maxcount == 0 ||
2496 (from_len == 0 && to_len == 0)) {
2497 /* nothing to do; return the original string */
2498 return return_self(self);
2499 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002500
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002501 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002502
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002503 if (from_len == 0) {
2504 /* insert the 'to' string everywhere. */
2505 /* >>> "Python".replace("", ".") */
2506 /* '.P.y.t.h.o.n.' */
2507 return replace_interleave(self, to_s, to_len, maxcount);
2508 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002509
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002510 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2511 /* point for an empty self string to generate a non-empty string */
2512 /* Special case so the remaining code always gets a non-empty string */
2513 if (PyBytes_GET_SIZE(self) == 0) {
2514 return return_self(self);
2515 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002516
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002517 if (to_len == 0) {
2518 /* delete all occurrences of 'from' string */
2519 if (from_len == 1) {
2520 return replace_delete_single_character(
2521 self, from_s[0], maxcount);
2522 } else {
2523 return replace_delete_substring(self, from_s,
2524 from_len, maxcount);
2525 }
2526 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002527
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002528 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002529
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002530 if (from_len == to_len) {
2531 if (from_len == 1) {
2532 return replace_single_character_in_place(
2533 self,
2534 from_s[0],
2535 to_s[0],
2536 maxcount);
2537 } else {
2538 return replace_substring_in_place(
2539 self, from_s, from_len, to_s, to_len,
2540 maxcount);
2541 }
2542 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002543
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002544 /* Otherwise use the more generic algorithms */
2545 if (from_len == 1) {
2546 return replace_single_character(self, from_s[0],
2547 to_s, to_len, maxcount);
2548 } else {
2549 /* len('from')>=2, len('to')>=1 */
2550 return replace_substring(self, from_s, from_len, to_s, to_len,
2551 maxcount);
2552 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002553}
2554
2555PyDoc_STRVAR(replace__doc__,
2556"B.replace(old, new[, count]) -> bytes\n\
2557\n\
2558Return a copy of B with all occurrences of subsection\n\
2559old replaced by new. If the optional argument count is\n\
Senthil Kumaranf7734202010-09-08 13:00:07 +00002560given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002561
2562static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002563bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002564{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002565 Py_ssize_t count = -1;
2566 PyObject *from, *to;
2567 const char *from_s, *to_s;
2568 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002569
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002570 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2571 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002572
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002573 if (PyBytes_Check(from)) {
2574 from_s = PyBytes_AS_STRING(from);
2575 from_len = PyBytes_GET_SIZE(from);
2576 }
2577 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2578 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002579
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002580 if (PyBytes_Check(to)) {
2581 to_s = PyBytes_AS_STRING(to);
2582 to_len = PyBytes_GET_SIZE(to);
2583 }
2584 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2585 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002586
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002587 return (PyObject *)replace((PyBytesObject *) self,
2588 from_s, from_len,
2589 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002590}
2591
2592/** End DALKE **/
2593
2594/* Matches the end (direction >= 0) or start (direction < 0) of self
2595 * against substr, using the start and end arguments. Returns
2596 * -1 on error, 0 if not found and 1 if found.
2597 */
2598Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002599_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002600 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002601{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002602 Py_ssize_t len = PyBytes_GET_SIZE(self);
2603 Py_ssize_t slen;
2604 const char* sub;
2605 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002606
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002607 if (PyBytes_Check(substr)) {
2608 sub = PyBytes_AS_STRING(substr);
2609 slen = PyBytes_GET_SIZE(substr);
2610 }
2611 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2612 return -1;
2613 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002614
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002615 bytes_adjust_indices(&start, &end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002616
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002617 if (direction < 0) {
2618 /* startswith */
2619 if (start+slen > len)
2620 return 0;
2621 } else {
2622 /* endswith */
2623 if (end-start < slen || start > len)
2624 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002625
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002626 if (end-slen > start)
2627 start = end - slen;
2628 }
2629 if (end-start >= slen)
2630 return ! memcmp(str+start, sub, slen);
2631 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002632}
2633
2634
2635PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002636"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002637\n\
2638Return True if B starts with the specified prefix, False otherwise.\n\
2639With optional start, test B beginning at that position.\n\
2640With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002641prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002642
2643static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002644bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002645{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002646 Py_ssize_t start = 0;
2647 Py_ssize_t end = PY_SSIZE_T_MAX;
2648 PyObject *subobj;
2649 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002650
Jesus Ceaac451502011-04-20 17:09:23 +02002651 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002652 return NULL;
2653 if (PyTuple_Check(subobj)) {
2654 Py_ssize_t i;
2655 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2656 result = _bytes_tailmatch(self,
2657 PyTuple_GET_ITEM(subobj, i),
2658 start, end, -1);
2659 if (result == -1)
2660 return NULL;
2661 else if (result) {
2662 Py_RETURN_TRUE;
2663 }
2664 }
2665 Py_RETURN_FALSE;
2666 }
2667 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002668 if (result == -1) {
2669 if (PyErr_ExceptionMatches(PyExc_TypeError))
2670 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2671 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002672 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002673 }
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002674 else
2675 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002676}
2677
2678
2679PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002680"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002681\n\
2682Return True if B ends with the specified suffix, False otherwise.\n\
2683With optional start, test B beginning at that position.\n\
2684With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002685suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002686
2687static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002688bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002689{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002690 Py_ssize_t start = 0;
2691 Py_ssize_t end = PY_SSIZE_T_MAX;
2692 PyObject *subobj;
2693 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002694
Jesus Ceaac451502011-04-20 17:09:23 +02002695 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002696 return NULL;
2697 if (PyTuple_Check(subobj)) {
2698 Py_ssize_t i;
2699 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2700 result = _bytes_tailmatch(self,
2701 PyTuple_GET_ITEM(subobj, i),
2702 start, end, +1);
2703 if (result == -1)
2704 return NULL;
2705 else if (result) {
2706 Py_RETURN_TRUE;
2707 }
2708 }
2709 Py_RETURN_FALSE;
2710 }
2711 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002712 if (result == -1) {
2713 if (PyErr_ExceptionMatches(PyExc_TypeError))
2714 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2715 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002716 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002717 }
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002718 else
2719 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002720}
2721
2722
2723PyDoc_STRVAR(decode__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002724"B.decode([encoding[, errors]]) -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002725\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002726Decode S using the codec registered for encoding. encoding defaults\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002727to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002728handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2729a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002730as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002731able to handle UnicodeDecodeErrors.");
2732
2733static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002734bytes_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002735{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002736 const char *encoding = NULL;
2737 const char *errors = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +00002738
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002739 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2740 return NULL;
2741 if (encoding == NULL)
2742 encoding = PyUnicode_GetDefaultEncoding();
2743 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002744}
2745
Guido van Rossum20188312006-05-05 15:15:40 +00002746
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002747PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002748"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002749\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002750Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002751Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002752Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002753
2754static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002755hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002756{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002757 if (c >= 128)
2758 return -1;
2759 if (ISDIGIT(c))
2760 return c - '0';
2761 else {
2762 if (ISUPPER(c))
2763 c = TOLOWER(c);
2764 if (c >= 'a' && c <= 'f')
2765 return c - 'a' + 10;
2766 }
2767 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002768}
2769
2770static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002771bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002772{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002773 PyObject *newstring, *hexobj;
2774 char *buf;
2775 Py_UNICODE *hex;
2776 Py_ssize_t hexlen, byteslen, i, j;
2777 int top, bot;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002778
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002779 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2780 return NULL;
2781 assert(PyUnicode_Check(hexobj));
2782 hexlen = PyUnicode_GET_SIZE(hexobj);
2783 hex = PyUnicode_AS_UNICODE(hexobj);
2784 byteslen = hexlen/2; /* This overestimates if there are spaces */
2785 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2786 if (!newstring)
2787 return NULL;
2788 buf = PyBytes_AS_STRING(newstring);
2789 for (i = j = 0; i < hexlen; i += 2) {
2790 /* skip over spaces in the input */
2791 while (hex[i] == ' ')
2792 i++;
2793 if (i >= hexlen)
2794 break;
2795 top = hex_digit_to_int(hex[i]);
2796 bot = hex_digit_to_int(hex[i+1]);
2797 if (top == -1 || bot == -1) {
2798 PyErr_Format(PyExc_ValueError,
2799 "non-hexadecimal number found in "
2800 "fromhex() arg at position %zd", i);
2801 goto error;
2802 }
2803 buf[j++] = (top << 4) + bot;
2804 }
2805 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2806 goto error;
2807 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002808
2809 error:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002810 Py_XDECREF(newstring);
2811 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002812}
2813
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002814PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002815"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002816
2817static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002818bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002819{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002820 Py_ssize_t res;
2821 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2822 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002823}
2824
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002825
2826static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002827bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002828{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002829 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002830}
2831
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002832
2833static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002834bytes_methods[] = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002835 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2836 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2837 _Py_capitalize__doc__},
2838 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2839 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2840 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode__doc__},
2841 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2842 endswith__doc__},
2843 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2844 expandtabs__doc__},
2845 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2846 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2847 fromhex_doc},
2848 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2849 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2850 _Py_isalnum__doc__},
2851 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2852 _Py_isalpha__doc__},
2853 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2854 _Py_isdigit__doc__},
2855 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2856 _Py_islower__doc__},
2857 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2858 _Py_isspace__doc__},
2859 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2860 _Py_istitle__doc__},
2861 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2862 _Py_isupper__doc__},
2863 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2864 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2865 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2866 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2867 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2868 _Py_maketrans__doc__},
2869 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2870 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2871 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2872 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2873 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2874 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2875 rpartition__doc__},
2876 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2877 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
2878 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2879 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
2880 splitlines__doc__},
2881 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2882 startswith__doc__},
2883 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2884 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2885 _Py_swapcase__doc__},
2886 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2887 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2888 translate__doc__},
2889 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2890 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2891 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2892 sizeof__doc__},
2893 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002894};
2895
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002896static PyObject *
2897str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2898
2899static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002900bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002901{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002902 PyObject *x = NULL;
2903 const char *encoding = NULL;
2904 const char *errors = NULL;
2905 PyObject *new = NULL;
2906 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002907
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002908 if (type != &PyBytes_Type)
2909 return str_subtype_new(type, args, kwds);
2910 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2911 &encoding, &errors))
2912 return NULL;
2913 if (x == NULL) {
2914 if (encoding != NULL || errors != NULL) {
2915 PyErr_SetString(PyExc_TypeError,
2916 "encoding or errors without sequence "
2917 "argument");
2918 return NULL;
2919 }
2920 return PyBytes_FromString("");
2921 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002922
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002923 if (PyUnicode_Check(x)) {
2924 /* Encode via the codec registry */
2925 if (encoding == NULL) {
2926 PyErr_SetString(PyExc_TypeError,
2927 "string argument without an encoding");
2928 return NULL;
2929 }
2930 new = PyUnicode_AsEncodedString(x, encoding, errors);
2931 if (new == NULL)
2932 return NULL;
2933 assert(PyBytes_Check(new));
2934 return new;
2935 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002936
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002937 /* If it's not unicode, there can't be encoding or errors */
2938 if (encoding != NULL || errors != NULL) {
2939 PyErr_SetString(PyExc_TypeError,
2940 "encoding or errors without a string argument");
2941 return NULL;
2942 }
2943 return PyObject_Bytes(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002944}
2945
2946PyObject *
2947PyBytes_FromObject(PyObject *x)
2948{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002949 PyObject *new, *it;
2950 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002951
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002952 if (x == NULL) {
2953 PyErr_BadInternalCall();
2954 return NULL;
2955 }
Benjamin Peterson4b24a422008-08-27 00:28:34 +00002956
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002957 /* Is it an int? */
2958 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2959 if (size == -1 && PyErr_Occurred()) {
2960 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2961 return NULL;
2962 PyErr_Clear();
2963 }
2964 else if (size < 0) {
2965 PyErr_SetString(PyExc_ValueError, "negative count");
2966 return NULL;
2967 }
2968 else {
2969 new = PyBytes_FromStringAndSize(NULL, size);
2970 if (new == NULL) {
2971 return NULL;
2972 }
2973 if (size > 0) {
2974 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2975 }
2976 return new;
2977 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002978
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002979 /* Use the modern buffer interface */
2980 if (PyObject_CheckBuffer(x)) {
2981 Py_buffer view;
2982 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2983 return NULL;
2984 new = PyBytes_FromStringAndSize(NULL, view.len);
2985 if (!new)
2986 goto fail;
2987 /* XXX(brett.cannon): Better way to get to internal buffer? */
2988 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2989 &view, view.len, 'C') < 0)
2990 goto fail;
2991 PyBuffer_Release(&view);
2992 return new;
2993 fail:
2994 Py_XDECREF(new);
2995 PyBuffer_Release(&view);
2996 return NULL;
2997 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002998
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002999 /* For iterator version, create a string object and resize as needed */
3000 /* XXX(gb): is 64 a good value? also, optimize if length is known */
3001 /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
3002 input being a truly long iterator. */
3003 size = 64;
3004 new = PyBytes_FromStringAndSize(NULL, size);
3005 if (new == NULL)
3006 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003007
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003008 /* XXX Optimize this if the arguments is a list, tuple */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003009
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003010 /* Get the iterator */
3011 it = PyObject_GetIter(x);
3012 if (it == NULL)
3013 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003014
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003015 /* Run the iterator to exhaustion */
3016 for (i = 0; ; i++) {
3017 PyObject *item;
3018 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003019
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003020 /* Get the next item */
3021 item = PyIter_Next(it);
3022 if (item == NULL) {
3023 if (PyErr_Occurred())
3024 goto error;
3025 break;
3026 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003027
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003028 /* Interpret it as an int (__index__) */
3029 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3030 Py_DECREF(item);
3031 if (value == -1 && PyErr_Occurred())
3032 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003033
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003034 /* Range check */
3035 if (value < 0 || value >= 256) {
3036 PyErr_SetString(PyExc_ValueError,
3037 "bytes must be in range(0, 256)");
3038 goto error;
3039 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003040
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003041 /* Append the byte */
3042 if (i >= size) {
3043 size *= 2;
3044 if (_PyBytes_Resize(&new, size) < 0)
3045 goto error;
3046 }
Antoine Pitroubc760d92010-08-15 17:46:50 +00003047 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003048 }
3049 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003050
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003051 /* Clean up and return success */
3052 Py_DECREF(it);
3053 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003054
3055 error:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003056 /* Error handling when new != NULL */
3057 Py_XDECREF(it);
3058 Py_DECREF(new);
3059 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003060}
3061
3062static PyObject *
3063str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3064{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003065 PyObject *tmp, *pnew;
3066 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003067
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003068 assert(PyType_IsSubtype(type, &PyBytes_Type));
3069 tmp = bytes_new(&PyBytes_Type, args, kwds);
3070 if (tmp == NULL)
3071 return NULL;
3072 assert(PyBytes_CheckExact(tmp));
3073 n = PyBytes_GET_SIZE(tmp);
3074 pnew = type->tp_alloc(type, n);
3075 if (pnew != NULL) {
3076 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3077 PyBytes_AS_STRING(tmp), n+1);
3078 ((PyBytesObject *)pnew)->ob_shash =
3079 ((PyBytesObject *)tmp)->ob_shash;
3080 }
3081 Py_DECREF(tmp);
3082 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003083}
3084
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003085PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003086"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003087bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003088bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
3089bytes(memory_view) -> bytes\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003090\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003091Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003092 - an iterable yielding integers in range(256)\n\
3093 - a text string encoded using the specified encoding\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003094 - a bytes or a buffer object\n\
3095 - any object implementing the buffer API.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003096
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003097static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003098
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003099PyTypeObject PyBytes_Type = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003100 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3101 "bytes",
3102 PyBytesObject_SIZE,
3103 sizeof(char),
3104 bytes_dealloc, /* tp_dealloc */
3105 0, /* tp_print */
3106 0, /* tp_getattr */
3107 0, /* tp_setattr */
3108 0, /* tp_reserved */
3109 (reprfunc)bytes_repr, /* tp_repr */
3110 0, /* tp_as_number */
3111 &bytes_as_sequence, /* tp_as_sequence */
3112 &bytes_as_mapping, /* tp_as_mapping */
3113 (hashfunc)bytes_hash, /* tp_hash */
3114 0, /* tp_call */
3115 bytes_str, /* tp_str */
3116 PyObject_GenericGetAttr, /* tp_getattro */
3117 0, /* tp_setattro */
3118 &bytes_as_buffer, /* tp_as_buffer */
3119 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3120 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3121 bytes_doc, /* tp_doc */
3122 0, /* tp_traverse */
3123 0, /* tp_clear */
3124 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3125 0, /* tp_weaklistoffset */
3126 bytes_iter, /* tp_iter */
3127 0, /* tp_iternext */
3128 bytes_methods, /* tp_methods */
3129 0, /* tp_members */
3130 0, /* tp_getset */
3131 &PyBaseObject_Type, /* tp_base */
3132 0, /* tp_dict */
3133 0, /* tp_descr_get */
3134 0, /* tp_descr_set */
3135 0, /* tp_dictoffset */
3136 0, /* tp_init */
3137 0, /* tp_alloc */
3138 bytes_new, /* tp_new */
3139 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003140};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003141
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003142void
3143PyBytes_Concat(register PyObject **pv, register PyObject *w)
3144{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003145 register PyObject *v;
3146 assert(pv != NULL);
3147 if (*pv == NULL)
3148 return;
3149 if (w == NULL) {
3150 Py_DECREF(*pv);
3151 *pv = NULL;
3152 return;
3153 }
3154 v = bytes_concat(*pv, w);
3155 Py_DECREF(*pv);
3156 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003157}
3158
3159void
3160PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
3161{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003162 PyBytes_Concat(pv, w);
3163 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003164}
3165
3166
3167/* The following function breaks the notion that strings are immutable:
3168 it changes the size of a string. We get away with this only if there
3169 is only one module referencing the object. You can also think of it
3170 as creating a new string object and destroying the old one, only
3171 more efficiently. In any case, don't use this if the string may
3172 already be known to some other part of the code...
3173 Note that if there's not enough memory to resize the string, the original
3174 string object at *pv is deallocated, *pv is set to NULL, an "out of
3175 memory" exception is set, and -1 is returned. Else (on success) 0 is
3176 returned, and the value in *pv may or may not be the same as on input.
3177 As always, an extra byte is allocated for a trailing \0 byte (newsize
3178 does *not* include that), and a trailing \0 byte is stored.
3179*/
3180
3181int
3182_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3183{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003184 register PyObject *v;
3185 register PyBytesObject *sv;
3186 v = *pv;
3187 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3188 *pv = 0;
3189 Py_DECREF(v);
3190 PyErr_BadInternalCall();
3191 return -1;
3192 }
3193 /* XXX UNREF/NEWREF interface should be more symmetrical */
3194 _Py_DEC_REFTOTAL;
3195 _Py_ForgetReference(v);
3196 *pv = (PyObject *)
3197 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
3198 if (*pv == NULL) {
3199 PyObject_Del(v);
3200 PyErr_NoMemory();
3201 return -1;
3202 }
3203 _Py_NewReference(*pv);
3204 sv = (PyBytesObject *) *pv;
3205 Py_SIZE(sv) = newsize;
3206 sv->ob_sval[newsize] = '\0';
3207 sv->ob_shash = -1; /* invalidate cached hash value */
3208 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003209}
3210
3211/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
3212 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3213 * Python's regular ints.
3214 * Return value: a new PyString*, or NULL if error.
3215 * . *pbuf is set to point into it,
3216 * *plen set to the # of chars following that.
3217 * Caller must decref it when done using pbuf.
3218 * The string starting at *pbuf is of the form
3219 * "-"? ("0x" | "0X")? digit+
3220 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3221 * set in flags. The case of hex digits will be correct,
3222 * There will be at least prec digits, zero-filled on the left if
3223 * necessary to get that many.
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003224 * val object to be converted
3225 * flags bitmask of format flags; only F_ALT is looked at
3226 * prec minimum number of digits; 0-fill on left if needed
3227 * type a character in [duoxX]; u acts the same as d
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003228 *
3229 * CAUTION: o, x and X conversions on regular ints can never
3230 * produce a '-' sign, but can for Python's unbounded ints.
3231 */
3232PyObject*
3233_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003234 char **pbuf, int *plen)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003235{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003236 PyObject *result = NULL;
3237 char *buf;
3238 Py_ssize_t i;
3239 int sign; /* 1 if '-', else 0 */
3240 int len; /* number of characters */
3241 Py_ssize_t llen;
3242 int numdigits; /* len == numnondigits + numdigits */
3243 int numnondigits = 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003244
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003245 /* Avoid exceeding SSIZE_T_MAX */
3246 if (prec > INT_MAX-3) {
3247 PyErr_SetString(PyExc_OverflowError,
3248 "precision too large");
3249 return NULL;
3250 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003251
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003252 switch (type) {
3253 case 'd':
3254 case 'u':
3255 /* Special-case boolean: we want 0/1 */
3256 if (PyBool_Check(val))
3257 result = PyNumber_ToBase(val, 10);
3258 else
3259 result = Py_TYPE(val)->tp_str(val);
3260 break;
3261 case 'o':
3262 numnondigits = 2;
3263 result = PyNumber_ToBase(val, 8);
3264 break;
3265 case 'x':
3266 case 'X':
3267 numnondigits = 2;
3268 result = PyNumber_ToBase(val, 16);
3269 break;
3270 default:
3271 assert(!"'type' not in [duoxX]");
3272 }
3273 if (!result)
3274 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003275
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003276 buf = _PyUnicode_AsString(result);
3277 if (!buf) {
3278 Py_DECREF(result);
3279 return NULL;
3280 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003281
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003282 /* To modify the string in-place, there can only be one reference. */
3283 if (Py_REFCNT(result) != 1) {
3284 PyErr_BadInternalCall();
3285 return NULL;
3286 }
3287 llen = PyUnicode_GetSize(result);
3288 if (llen > INT_MAX) {
3289 PyErr_SetString(PyExc_ValueError,
3290 "string too large in _PyBytes_FormatLong");
3291 return NULL;
3292 }
3293 len = (int)llen;
3294 if (buf[len-1] == 'L') {
3295 --len;
3296 buf[len] = '\0';
3297 }
3298 sign = buf[0] == '-';
3299 numnondigits += sign;
3300 numdigits = len - numnondigits;
3301 assert(numdigits > 0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003302
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003303 /* Get rid of base marker unless F_ALT */
3304 if (((flags & F_ALT) == 0 &&
3305 (type == 'o' || type == 'x' || type == 'X'))) {
3306 assert(buf[sign] == '0');
3307 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
3308 buf[sign+1] == 'o');
3309 numnondigits -= 2;
3310 buf += 2;
3311 len -= 2;
3312 if (sign)
3313 buf[0] = '-';
3314 assert(len == numnondigits + numdigits);
3315 assert(numdigits > 0);
3316 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003317
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003318 /* Fill with leading zeroes to meet minimum width. */
3319 if (prec > numdigits) {
3320 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
3321 numnondigits + prec);
3322 char *b1;
3323 if (!r1) {
3324 Py_DECREF(result);
3325 return NULL;
3326 }
3327 b1 = PyBytes_AS_STRING(r1);
3328 for (i = 0; i < numnondigits; ++i)
3329 *b1++ = *buf++;
3330 for (i = 0; i < prec - numdigits; i++)
3331 *b1++ = '0';
3332 for (i = 0; i < numdigits; i++)
3333 *b1++ = *buf++;
3334 *b1 = '\0';
3335 Py_DECREF(result);
3336 result = r1;
3337 buf = PyBytes_AS_STRING(result);
3338 len = numnondigits + prec;
3339 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003340
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003341 /* Fix up case for hex conversions. */
3342 if (type == 'X') {
3343 /* Need to convert all lower case letters to upper case.
3344 and need to convert 0x to 0X (and -0x to -0X). */
3345 for (i = 0; i < len; i++)
3346 if (buf[i] >= 'a' && buf[i] <= 'x')
3347 buf[i] -= 'a'-'A';
3348 }
3349 *pbuf = buf;
3350 *plen = len;
3351 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003352}
3353
3354void
3355PyBytes_Fini(void)
3356{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003357 int i;
3358 for (i = 0; i < UCHAR_MAX + 1; i++) {
3359 Py_XDECREF(characters[i]);
3360 characters[i] = NULL;
3361 }
3362 Py_XDECREF(nullstring);
3363 nullstring = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003364}
3365
Benjamin Peterson4116f362008-05-27 00:36:20 +00003366/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003367
3368typedef struct {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003369 PyObject_HEAD
3370 Py_ssize_t it_index;
3371 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003372} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003373
3374static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003375striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003376{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003377 _PyObject_GC_UNTRACK(it);
3378 Py_XDECREF(it->it_seq);
3379 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003380}
3381
3382static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003383striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003384{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003385 Py_VISIT(it->it_seq);
3386 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003387}
3388
3389static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003390striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003391{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003392 PyBytesObject *seq;
3393 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003394
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003395 assert(it != NULL);
3396 seq = it->it_seq;
3397 if (seq == NULL)
3398 return NULL;
3399 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003400
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003401 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3402 item = PyLong_FromLong(
3403 (unsigned char)seq->ob_sval[it->it_index]);
3404 if (item != NULL)
3405 ++it->it_index;
3406 return item;
3407 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003408
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003409 Py_DECREF(seq);
3410 it->it_seq = NULL;
3411 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003412}
3413
3414static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003415striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003416{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003417 Py_ssize_t len = 0;
3418 if (it->it_seq)
3419 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3420 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003421}
3422
3423PyDoc_STRVAR(length_hint_doc,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003424 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003425
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003426static PyMethodDef striter_methods[] = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003427 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3428 length_hint_doc},
3429 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003430};
3431
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003432PyTypeObject PyBytesIter_Type = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003433 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3434 "bytes_iterator", /* tp_name */
3435 sizeof(striterobject), /* tp_basicsize */
3436 0, /* tp_itemsize */
3437 /* methods */
3438 (destructor)striter_dealloc, /* tp_dealloc */
3439 0, /* tp_print */
3440 0, /* tp_getattr */
3441 0, /* tp_setattr */
3442 0, /* tp_reserved */
3443 0, /* tp_repr */
3444 0, /* tp_as_number */
3445 0, /* tp_as_sequence */
3446 0, /* tp_as_mapping */
3447 0, /* tp_hash */
3448 0, /* tp_call */
3449 0, /* tp_str */
3450 PyObject_GenericGetAttr, /* tp_getattro */
3451 0, /* tp_setattro */
3452 0, /* tp_as_buffer */
3453 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3454 0, /* tp_doc */
3455 (traverseproc)striter_traverse, /* tp_traverse */
3456 0, /* tp_clear */
3457 0, /* tp_richcompare */
3458 0, /* tp_weaklistoffset */
3459 PyObject_SelfIter, /* tp_iter */
3460 (iternextfunc)striter_next, /* tp_iternext */
3461 striter_methods, /* tp_methods */
3462 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003463};
3464
3465static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003466bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003467{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003468 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003469
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003470 if (!PyBytes_Check(seq)) {
3471 PyErr_BadInternalCall();
3472 return NULL;
3473 }
3474 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3475 if (it == NULL)
3476 return NULL;
3477 it->it_index = 0;
3478 Py_INCREF(seq);
3479 it->it_seq = (PyBytesObject *)seq;
3480 _PyObject_GC_TRACK(it);
3481 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003482}