blob: a0c121ededf5653734fad5d87355dfbb6b94f662 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroua57aae72010-06-09 16:58:35 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
44 For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
45 parameter `size' denotes number of characters to allocate, not counting any
46 null terminating character.
47
48 For PyBytes_FromString(), the parameter `str' points to a null-terminated
49 string containing exactly `size' bytes.
50
51 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
52 either NULL or else points to a string containing at least `size' bytes.
53 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
54 not have to be null-terminated. (Therefore it is safe to construct a
55 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
56 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
57 bytes (setting the last byte to the null terminating character) and you can
58 fill in the data yourself. If `str' is non-NULL then the resulting
59 PyString object must be treated as immutable and you must not fill in nor
60 alter the data yourself, since the strings may be shared.
61
62 The PyObject member `op->ob_size', which denotes the number of "extra
63 items" in a variable-size object, will contain the number of bytes
64 allocated for string data, not counting the null terminating character. It
65 is therefore equal to the equal to the `size' parameter (for
66 PyBytes_FromStringAndSize()) or the length of the string in the `str'
67 parameter (for PyBytes_FromString()).
68*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000069PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000071{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000072 register PyBytesObject *op;
73 if (size < 0) {
74 PyErr_SetString(PyExc_SystemError,
75 "Negative size passed to PyBytes_FromStringAndSize");
76 return NULL;
77 }
78 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000079#ifdef COUNT_ALLOCS
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000080 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000081#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000082 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85 if (size == 1 && str != NULL &&
86 (op = characters[*str & UCHAR_MAX]) != NULL)
87 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000088#ifdef COUNT_ALLOCS
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000089 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000091 Py_INCREF(op);
92 return (PyObject *)op;
93 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000094
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000095 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
96 PyErr_SetString(PyExc_OverflowError,
97 "byte string is too large");
98 return NULL;
99 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +0000100
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000101 /* Inline PyObject_NewVar */
102 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
103 if (op == NULL)
104 return PyErr_NoMemory();
105 PyObject_INIT_VAR(op, &PyBytes_Type, size);
106 op->ob_shash = -1;
107 if (str != NULL)
108 Py_MEMCPY(op->ob_sval, str, size);
109 op->ob_sval[size] = '\0';
110 /* share short strings */
111 if (size == 0) {
112 nullstring = op;
113 Py_INCREF(op);
114 } else if (size == 1 && str != NULL) {
115 characters[*str & UCHAR_MAX] = op;
116 Py_INCREF(op);
117 }
118 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000119}
120
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000121PyObject *
122PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000123{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000124 register size_t size;
125 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000126
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000127 assert(str != NULL);
128 size = strlen(str);
129 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
130 PyErr_SetString(PyExc_OverflowError,
131 "byte string is too long");
132 return NULL;
133 }
134 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000135#ifdef COUNT_ALLOCS
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000136 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000137#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
141 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#ifdef COUNT_ALLOCS
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000143 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000144#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000148
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000149 /* Inline PyObject_NewVar */
150 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
151 if (op == NULL)
152 return PyErr_NoMemory();
153 PyObject_INIT_VAR(op, &PyBytes_Type, size);
154 op->ob_shash = -1;
155 Py_MEMCPY(op->ob_sval, str, size+1);
156 /* share short strings */
157 if (size == 0) {
158 nullstring = op;
159 Py_INCREF(op);
160 } else if (size == 1) {
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000165}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000166
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167PyObject *
168PyBytes_FromFormatV(const char *format, va_list vargs)
169{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000175
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000176#ifdef VA_LIST_IS_ARRAY
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000177 Py_MEMCPY(count, vargs, sizeof(va_list));
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000178#else
179#ifdef __va_copy
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000180 __va_copy(count, vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000181#else
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000182 count = vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000183#endif
184#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
187 if (*f == '%') {
188 const char* p = f;
189 while (*++f && *f != '%' && !ISALPHA(*f))
190 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000191
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000192 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
193 * they don't affect the amount of space we reserve.
194 */
195 if ((*f == 'l' || *f == 'z') &&
196 (f[1] == 'd' || f[1] == 'u'))
197 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000198
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000199 switch (*f) {
200 case 'c':
201 (void)va_arg(count, int);
202 /* fall through... */
203 case '%':
204 n++;
205 break;
206 case 'd': case 'u': case 'i': case 'x':
207 (void) va_arg(count, int);
208 /* 20 bytes is enough to hold a 64-bit
209 integer. Decimal takes the most space.
210 This isn't enough for octal. */
211 n += 20;
212 break;
213 case 's':
214 s = va_arg(count, char*);
215 n += strlen(s);
216 break;
217 case 'p':
218 (void) va_arg(count, int);
219 /* maximum 64-bit pointer representation:
220 * 0xffffffffffffffff
221 * so 19 characters is enough.
222 * XXX I count 18 -- what's the extra for?
223 */
224 n += 19;
225 break;
226 default:
227 /* if we stumble upon an unknown
228 formatting code, copy the rest of
229 the format string to the output
230 string. (we cannot just skip the
231 code, since there's no way to know
232 what's in the argument list) */
233 n += strlen(p);
234 goto expand;
235 }
236 } else
237 n++;
238 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000239 expand:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000240 /* step 2: fill the buffer */
241 /* Since we've analyzed how much space we need for the worst case,
242 use sprintf directly instead of the slower PyOS_snprintf. */
243 string = PyBytes_FromStringAndSize(NULL, n);
244 if (!string)
245 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000246
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000247 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000248
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000249 for (f = format; *f; f++) {
250 if (*f == '%') {
251 const char* p = f++;
252 Py_ssize_t i;
253 int longflag = 0;
254 int size_tflag = 0;
255 /* parse the width.precision part (we're only
256 interested in the precision value, if any) */
257 n = 0;
258 while (ISDIGIT(*f))
259 n = (n*10) + *f++ - '0';
260 if (*f == '.') {
261 f++;
262 n = 0;
263 while (ISDIGIT(*f))
264 n = (n*10) + *f++ - '0';
265 }
266 while (*f && *f != '%' && !ISALPHA(*f))
267 f++;
268 /* handle the long flag, but only for %ld and %lu.
269 others can be added when necessary. */
270 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
271 longflag = 1;
272 ++f;
273 }
274 /* handle the size_t flag. */
275 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
276 size_tflag = 1;
277 ++f;
278 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000279
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000280 switch (*f) {
281 case 'c':
282 *s++ = va_arg(vargs, int);
283 break;
284 case 'd':
285 if (longflag)
286 sprintf(s, "%ld", va_arg(vargs, long));
287 else if (size_tflag)
288 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
289 va_arg(vargs, Py_ssize_t));
290 else
291 sprintf(s, "%d", va_arg(vargs, int));
292 s += strlen(s);
293 break;
294 case 'u':
295 if (longflag)
296 sprintf(s, "%lu",
297 va_arg(vargs, unsigned long));
298 else if (size_tflag)
299 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
300 va_arg(vargs, size_t));
301 else
302 sprintf(s, "%u",
303 va_arg(vargs, unsigned int));
304 s += strlen(s);
305 break;
306 case 'i':
307 sprintf(s, "%i", va_arg(vargs, int));
308 s += strlen(s);
309 break;
310 case 'x':
311 sprintf(s, "%x", va_arg(vargs, int));
312 s += strlen(s);
313 break;
314 case 's':
315 p = va_arg(vargs, char*);
316 i = strlen(p);
317 if (n > 0 && i > n)
318 i = n;
319 Py_MEMCPY(s, p, i);
320 s += i;
321 break;
322 case 'p':
323 sprintf(s, "%p", va_arg(vargs, void*));
324 /* %p is ill-defined: ensure leading 0x. */
325 if (s[1] == 'X')
326 s[1] = 'x';
327 else if (s[1] != 'x') {
328 memmove(s+2, s, strlen(s)+1);
329 s[0] = '0';
330 s[1] = 'x';
331 }
332 s += strlen(s);
333 break;
334 case '%':
335 *s++ = '%';
336 break;
337 default:
338 strcpy(s, p);
339 s += strlen(s);
340 goto end;
341 }
342 } else
343 *s++ = *f;
344 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000345
346 end:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000347 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
348 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000349}
350
351PyObject *
352PyBytes_FromFormat(const char *format, ...)
353{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000354 PyObject* ret;
355 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000356
357#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000358 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000359#else
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000360 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000362 ret = PyBytes_FromFormatV(format, vargs);
363 va_end(vargs);
364 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000365}
366
367static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000368bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000369{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000370 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000371}
372
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000373/* Unescape a backslash-escaped string. If unicode is non-zero,
374 the string is a u-literal. If recode_encoding is non-zero,
375 the string is UTF-8 encoded and should be re-encoded in the
376 specified encoding. */
377
378PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000379 Py_ssize_t len,
380 const char *errors,
381 Py_ssize_t unicode,
382 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000383{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000384 int c;
385 char *p, *buf;
386 const char *end;
387 PyObject *v;
388 Py_ssize_t newlen = recode_encoding ? 4*len:len;
389 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
390 if (v == NULL)
391 return NULL;
392 p = buf = PyBytes_AsString(v);
393 end = s + len;
394 while (s < end) {
395 if (*s != '\\') {
396 non_esc:
397 if (recode_encoding && (*s & 0x80)) {
398 PyObject *u, *w;
399 char *r;
400 const char* t;
401 Py_ssize_t rn;
402 t = s;
403 /* Decode non-ASCII bytes as UTF-8. */
404 while (t < end && (*t & 0x80)) t++;
405 u = PyUnicode_DecodeUTF8(s, t - s, errors);
406 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000407
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000408 /* Recode them in target encoding. */
409 w = PyUnicode_AsEncodedString(
410 u, recode_encoding, errors);
411 Py_DECREF(u);
412 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000413
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000414 /* Append bytes to output buffer. */
415 assert(PyBytes_Check(w));
416 r = PyBytes_AS_STRING(w);
417 rn = PyBytes_GET_SIZE(w);
418 Py_MEMCPY(p, r, rn);
419 p += rn;
420 Py_DECREF(w);
421 s = t;
422 } else {
423 *p++ = *s++;
424 }
425 continue;
426 }
427 s++;
428 if (s==end) {
429 PyErr_SetString(PyExc_ValueError,
430 "Trailing \\ in string");
431 goto failed;
432 }
433 switch (*s++) {
434 /* XXX This assumes ASCII! */
435 case '\n': break;
436 case '\\': *p++ = '\\'; break;
437 case '\'': *p++ = '\''; break;
438 case '\"': *p++ = '\"'; break;
439 case 'b': *p++ = '\b'; break;
440 case 'f': *p++ = '\014'; break; /* FF */
441 case 't': *p++ = '\t'; break;
442 case 'n': *p++ = '\n'; break;
443 case 'r': *p++ = '\r'; break;
444 case 'v': *p++ = '\013'; break; /* VT */
445 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
446 case '0': case '1': case '2': case '3':
447 case '4': case '5': case '6': case '7':
448 c = s[-1] - '0';
449 if (s < end && '0' <= *s && *s <= '7') {
450 c = (c<<3) + *s++ - '0';
451 if (s < end && '0' <= *s && *s <= '7')
452 c = (c<<3) + *s++ - '0';
453 }
454 *p++ = c;
455 break;
456 case 'x':
457 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
458 unsigned int x = 0;
459 c = Py_CHARMASK(*s);
460 s++;
461 if (ISDIGIT(c))
462 x = c - '0';
463 else if (ISLOWER(c))
464 x = 10 + c - 'a';
465 else
466 x = 10 + c - 'A';
467 x = x << 4;
468 c = Py_CHARMASK(*s);
469 s++;
470 if (ISDIGIT(c))
471 x += c - '0';
472 else if (ISLOWER(c))
473 x += 10 + c - 'a';
474 else
475 x += 10 + c - 'A';
476 *p++ = x;
477 break;
478 }
479 if (!errors || strcmp(errors, "strict") == 0) {
480 PyErr_SetString(PyExc_ValueError,
481 "invalid \\x escape");
482 goto failed;
483 }
484 if (strcmp(errors, "replace") == 0) {
485 *p++ = '?';
486 } else if (strcmp(errors, "ignore") == 0)
487 /* do nothing */;
488 else {
489 PyErr_Format(PyExc_ValueError,
490 "decoding error; unknown "
491 "error handling code: %.400s",
492 errors);
493 goto failed;
494 }
495 default:
496 *p++ = '\\';
497 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200498 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000499 UTF-8 bytes may follow. */
500 }
501 }
502 if (p-buf < newlen)
503 _PyBytes_Resize(&v, p - buf);
504 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000505 failed:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000506 Py_DECREF(v);
507 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000508}
509
510/* -------------------------------------------------------------------- */
511/* object api */
512
513Py_ssize_t
514PyBytes_Size(register PyObject *op)
515{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000516 if (!PyBytes_Check(op)) {
517 PyErr_Format(PyExc_TypeError,
518 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
519 return -1;
520 }
521 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000522}
523
524char *
525PyBytes_AsString(register PyObject *op)
526{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000527 if (!PyBytes_Check(op)) {
528 PyErr_Format(PyExc_TypeError,
529 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
530 return NULL;
531 }
532 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000533}
534
535int
536PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000537 register char **s,
538 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000539{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000540 if (s == NULL) {
541 PyErr_BadInternalCall();
542 return -1;
543 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000544
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000545 if (!PyBytes_Check(obj)) {
546 PyErr_Format(PyExc_TypeError,
547 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
548 return -1;
549 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000550
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000551 *s = PyBytes_AS_STRING(obj);
552 if (len != NULL)
553 *len = PyBytes_GET_SIZE(obj);
554 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
555 PyErr_SetString(PyExc_TypeError,
556 "expected bytes with no null");
557 return -1;
558 }
559 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000560}
Neal Norwitz6968b052007-02-27 19:02:19 +0000561
562/* -------------------------------------------------------------------- */
563/* Methods */
564
Eric Smith0923d1d2009-04-16 20:16:10 +0000565#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000566#define STRINGLIB_CHAR char
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000567
Neal Norwitz6968b052007-02-27 19:02:19 +0000568#define STRINGLIB_CMP memcmp
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000569#define STRINGLIB_LEN PyBytes_GET_SIZE
570#define STRINGLIB_NEW PyBytes_FromStringAndSize
571#define STRINGLIB_STR PyBytes_AS_STRING
572/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
573
574#define STRINGLIB_EMPTY nullstring
575#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
576#define STRINGLIB_MUTABLE 0
Neal Norwitz6968b052007-02-27 19:02:19 +0000577
578#include "stringlib/fastsearch.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000579
Neal Norwitz6968b052007-02-27 19:02:19 +0000580#include "stringlib/count.h"
581#include "stringlib/find.h"
582#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000583#include "stringlib/ctype.h"
584#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000585
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000586#define _Py_InsertThousandsGrouping _PyBytes_InsertThousandsGrouping
Eric Smitha3b1ac82009-04-03 14:45:06 +0000587#define _Py_InsertThousandsGroupingLocale _PyBytes_InsertThousandsGroupingLocale
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000588#include "stringlib/localeutil.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000589
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000590PyObject *
591PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000592{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000593 static const char *hexdigits = "0123456789abcdef";
594 register PyBytesObject* op = (PyBytesObject*) obj;
595 Py_ssize_t length = Py_SIZE(op);
596 size_t newsize = 3 + 4 * length;
597 PyObject *v;
598 if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
599 PyErr_SetString(PyExc_OverflowError,
600 "bytes object is too large to make repr");
601 return NULL;
602 }
603 v = PyUnicode_FromUnicode(NULL, newsize);
604 if (v == NULL) {
605 return NULL;
606 }
607 else {
608 register Py_ssize_t i;
609 register Py_UNICODE c;
610 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
611 int quote;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000612
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000613 /* Figure out which quote to use; single is preferred */
614 quote = '\'';
615 if (smartquotes) {
616 char *test, *start;
617 start = PyBytes_AS_STRING(op);
618 for (test = start; test < start+length; ++test) {
619 if (*test == '"') {
620 quote = '\''; /* back to single */
621 goto decided;
622 }
623 else if (*test == '\'')
624 quote = '"';
625 }
626 decided:
627 ;
628 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000629
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000630 *p++ = 'b', *p++ = quote;
631 for (i = 0; i < length; i++) {
632 /* There's at least enough room for a hex escape
633 and a closing quote. */
634 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
635 c = op->ob_sval[i];
636 if (c == quote || c == '\\')
637 *p++ = '\\', *p++ = c;
638 else if (c == '\t')
639 *p++ = '\\', *p++ = 't';
640 else if (c == '\n')
641 *p++ = '\\', *p++ = 'n';
642 else if (c == '\r')
643 *p++ = '\\', *p++ = 'r';
644 else if (c < ' ' || c >= 0x7f) {
645 *p++ = '\\';
646 *p++ = 'x';
647 *p++ = hexdigits[(c & 0xf0) >> 4];
648 *p++ = hexdigits[c & 0xf];
649 }
650 else
651 *p++ = c;
652 }
653 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
654 *p++ = quote;
655 *p = '\0';
656 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
657 Py_DECREF(v);
658 return NULL;
659 }
660 return v;
661 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000662}
663
Neal Norwitz6968b052007-02-27 19:02:19 +0000664static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000665bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000666{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000667 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000668}
669
Neal Norwitz6968b052007-02-27 19:02:19 +0000670static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000671bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000672{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000673 if (Py_BytesWarningFlag) {
674 if (PyErr_WarnEx(PyExc_BytesWarning,
675 "str() on a bytes instance", 1))
676 return NULL;
677 }
678 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000679}
680
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000681static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000682bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000683{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000684 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000685}
Neal Norwitz6968b052007-02-27 19:02:19 +0000686
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000687/* This is also used by PyBytes_Concat() */
688static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000689bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000690{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000691 Py_ssize_t size;
692 Py_buffer va, vb;
693 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000694
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000695 va.len = -1;
696 vb.len = -1;
697 if (_getbuffer(a, &va) < 0 ||
698 _getbuffer(b, &vb) < 0) {
699 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
700 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
701 goto done;
702 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000703
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000704 /* Optimize end cases */
705 if (va.len == 0 && PyBytes_CheckExact(b)) {
706 result = b;
707 Py_INCREF(result);
708 goto done;
709 }
710 if (vb.len == 0 && PyBytes_CheckExact(a)) {
711 result = a;
712 Py_INCREF(result);
713 goto done;
714 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000715
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000716 size = va.len + vb.len;
717 if (size < 0) {
718 PyErr_NoMemory();
719 goto done;
720 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000721
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000722 result = PyBytes_FromStringAndSize(NULL, size);
723 if (result != NULL) {
724 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
725 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
726 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000727
728 done:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000729 if (va.len != -1)
730 PyBuffer_Release(&va);
731 if (vb.len != -1)
732 PyBuffer_Release(&vb);
733 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000734}
Neal Norwitz6968b052007-02-27 19:02:19 +0000735
736static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000737bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000738{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000739 register Py_ssize_t i;
740 register Py_ssize_t j;
741 register Py_ssize_t size;
742 register PyBytesObject *op;
743 size_t nbytes;
744 if (n < 0)
745 n = 0;
746 /* watch out for overflows: the size can overflow int,
747 * and the # of bytes needed can overflow size_t
748 */
749 size = Py_SIZE(a) * n;
750 if (n && size / n != Py_SIZE(a)) {
751 PyErr_SetString(PyExc_OverflowError,
752 "repeated bytes are too long");
753 return NULL;
754 }
755 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
756 Py_INCREF(a);
757 return (PyObject *)a;
758 }
759 nbytes = (size_t)size;
760 if (nbytes + PyBytesObject_SIZE <= nbytes) {
761 PyErr_SetString(PyExc_OverflowError,
762 "repeated bytes are too long");
763 return NULL;
764 }
765 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
766 if (op == NULL)
767 return PyErr_NoMemory();
768 PyObject_INIT_VAR(op, &PyBytes_Type, size);
769 op->ob_shash = -1;
770 op->ob_sval[size] = '\0';
771 if (Py_SIZE(a) == 1 && n > 0) {
772 memset(op->ob_sval, a->ob_sval[0] , n);
773 return (PyObject *) op;
774 }
775 i = 0;
776 if (i < size) {
777 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
778 i = Py_SIZE(a);
779 }
780 while (i < size) {
781 j = (i <= size-i) ? i : size-i;
782 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
783 i += j;
784 }
785 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000786}
787
Guido van Rossum98297ee2007-11-06 21:34:58 +0000788static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000789bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000790{
791 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
792 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroua57aae72010-06-09 16:58:35 +0000793 Py_buffer varg;
Antoine Pitroubc760d92010-08-15 17:46:50 +0000794 Py_ssize_t pos;
Antoine Pitroua57aae72010-06-09 16:58:35 +0000795 PyErr_Clear();
796 if (_getbuffer(arg, &varg) < 0)
797 return -1;
798 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
799 varg.buf, varg.len, 0);
800 PyBuffer_Release(&varg);
801 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000802 }
803 if (ival < 0 || ival >= 256) {
Antoine Pitroua57aae72010-06-09 16:58:35 +0000804 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
805 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000806 }
807
Antoine Pitroubc760d92010-08-15 17:46:50 +0000808 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000809}
810
Neal Norwitz6968b052007-02-27 19:02:19 +0000811static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000812bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000813{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000814 if (i < 0 || i >= Py_SIZE(a)) {
815 PyErr_SetString(PyExc_IndexError, "index out of range");
816 return NULL;
817 }
818 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000819}
820
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000821static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000822bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000823{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000824 int c;
825 Py_ssize_t len_a, len_b;
826 Py_ssize_t min_len;
827 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000828
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000829 /* Make sure both arguments are strings. */
830 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
831 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
832 (PyObject_IsInstance((PyObject*)a,
833 (PyObject*)&PyUnicode_Type) ||
834 PyObject_IsInstance((PyObject*)b,
835 (PyObject*)&PyUnicode_Type))) {
836 if (PyErr_WarnEx(PyExc_BytesWarning,
837 "Comparison between bytes and string", 1))
838 return NULL;
839 }
840 result = Py_NotImplemented;
841 goto out;
842 }
843 if (a == b) {
844 switch (op) {
845 case Py_EQ:case Py_LE:case Py_GE:
846 result = Py_True;
847 goto out;
848 case Py_NE:case Py_LT:case Py_GT:
849 result = Py_False;
850 goto out;
851 }
852 }
853 if (op == Py_EQ) {
854 /* Supporting Py_NE here as well does not save
855 much time, since Py_NE is rarely used. */
856 if (Py_SIZE(a) == Py_SIZE(b)
857 && (a->ob_sval[0] == b->ob_sval[0]
858 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
859 result = Py_True;
860 } else {
861 result = Py_False;
862 }
863 goto out;
864 }
865 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
866 min_len = (len_a < len_b) ? len_a : len_b;
867 if (min_len > 0) {
868 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
869 if (c==0)
870 c = memcmp(a->ob_sval, b->ob_sval, min_len);
871 } else
872 c = 0;
873 if (c == 0)
874 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
875 switch (op) {
876 case Py_LT: c = c < 0; break;
877 case Py_LE: c = c <= 0; break;
878 case Py_EQ: assert(0); break; /* unreachable */
879 case Py_NE: c = c != 0; break;
880 case Py_GT: c = c > 0; break;
881 case Py_GE: c = c >= 0; break;
882 default:
883 result = Py_NotImplemented;
884 goto out;
885 }
886 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000887 out:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000888 Py_INCREF(result);
889 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000890}
891
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000892static long
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000893bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000894{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000895 register Py_ssize_t len;
896 register unsigned char *p;
897 register long x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000898
Benjamin Petersonf6622c82012-04-09 14:53:07 -0400899#ifdef Py_DEBUG
Benjamin Peterson69e97272012-02-21 11:08:50 -0500900 assert(_Py_HashSecret_Initialized);
Benjamin Petersonf6622c82012-04-09 14:53:07 -0400901#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000902 if (a->ob_shash != -1)
903 return a->ob_shash;
904 len = Py_SIZE(a);
Georg Brandl2daf6ae2012-02-20 19:54:16 +0100905 /*
906 We make the hash of the empty string be 0, rather than using
907 (prefix ^ suffix), since this slightly obfuscates the hash secret
908 */
909 if (len == 0) {
910 a->ob_shash = 0;
911 return 0;
912 }
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000913 p = (unsigned char *) a->ob_sval;
Georg Brandl2daf6ae2012-02-20 19:54:16 +0100914 x = _Py_HashSecret.prefix;
915 x ^= *p << 7;
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000916 while (--len >= 0)
917 x = (1000003*x) ^ *p++;
918 x ^= Py_SIZE(a);
Georg Brandl2daf6ae2012-02-20 19:54:16 +0100919 x ^= _Py_HashSecret.suffix;
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000920 if (x == -1)
921 x = -2;
922 a->ob_shash = x;
923 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000924}
925
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000926static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000927bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000928{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000929 if (PyIndex_Check(item)) {
930 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
931 if (i == -1 && PyErr_Occurred())
932 return NULL;
933 if (i < 0)
934 i += PyBytes_GET_SIZE(self);
935 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
936 PyErr_SetString(PyExc_IndexError,
937 "index out of range");
938 return NULL;
939 }
940 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
941 }
942 else if (PySlice_Check(item)) {
943 Py_ssize_t start, stop, step, slicelength, cur, i;
944 char* source_buf;
945 char* result_buf;
946 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000947
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000948 if (PySlice_GetIndicesEx((PySliceObject*)item,
949 PyBytes_GET_SIZE(self),
950 &start, &stop, &step, &slicelength) < 0) {
951 return NULL;
952 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000953
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000954 if (slicelength <= 0) {
955 return PyBytes_FromStringAndSize("", 0);
956 }
957 else if (start == 0 && step == 1 &&
958 slicelength == PyBytes_GET_SIZE(self) &&
959 PyBytes_CheckExact(self)) {
960 Py_INCREF(self);
961 return (PyObject *)self;
962 }
963 else if (step == 1) {
964 return PyBytes_FromStringAndSize(
965 PyBytes_AS_STRING(self) + start,
966 slicelength);
967 }
968 else {
969 source_buf = PyBytes_AS_STRING(self);
970 result = PyBytes_FromStringAndSize(NULL, slicelength);
971 if (result == NULL)
972 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000973
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000974 result_buf = PyBytes_AS_STRING(result);
975 for (cur = start, i = 0; i < slicelength;
976 cur += step, i++) {
977 result_buf[i] = source_buf[cur];
978 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000979
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000980 return result;
981 }
982 }
983 else {
984 PyErr_Format(PyExc_TypeError,
985 "byte indices must be integers, not %.200s",
986 Py_TYPE(item)->tp_name);
987 return NULL;
988 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000989}
990
991static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000992bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000993{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000994 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
995 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000996}
997
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000998static PySequenceMethods bytes_as_sequence = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000999 (lenfunc)bytes_length, /*sq_length*/
1000 (binaryfunc)bytes_concat, /*sq_concat*/
1001 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1002 (ssizeargfunc)bytes_item, /*sq_item*/
1003 0, /*sq_slice*/
1004 0, /*sq_ass_item*/
1005 0, /*sq_ass_slice*/
1006 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001007};
1008
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001009static PyMappingMethods bytes_as_mapping = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001010 (lenfunc)bytes_length,
1011 (binaryfunc)bytes_subscript,
1012 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001013};
1014
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001015static PyBufferProcs bytes_as_buffer = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001016 (getbufferproc)bytes_buffer_getbuffer,
1017 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001018};
1019
1020
1021#define LEFTSTRIP 0
1022#define RIGHTSTRIP 1
1023#define BOTHSTRIP 2
1024
1025/* Arrays indexed by above */
1026static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1027
1028#define STRIPNAME(i) (stripformat[i]+3)
1029
Neal Norwitz6968b052007-02-27 19:02:19 +00001030
1031/* Don't call if length < 2 */
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001032#define Py_STRING_MATCH(target, offset, pattern, length) \
1033 (target[offset] == pattern[0] && \
1034 target[offset+length-1] == pattern[length-1] && \
Neal Norwitz6968b052007-02-27 19:02:19 +00001035 !memcmp(target+offset+1, pattern+1, length-2) )
1036
1037
Neal Norwitz6968b052007-02-27 19:02:19 +00001038/* Overallocate the initial list to reduce the number of reallocs for small
1039 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1040 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1041 text (roughly 11 words per line) and field delimited data (usually 1-10
1042 fields). For large strings the split algorithms are bandwidth limited
1043 so increasing the preallocation likely will not improve things.*/
1044
1045#define MAX_PREALLOC 12
1046
1047/* 5 splits gives 6 elements */
1048#define PREALLOC_SIZE(maxsplit) \
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001049 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Neal Norwitz6968b052007-02-27 19:02:19 +00001050
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001051#define SPLIT_ADD(data, left, right) { \
1052 str = PyBytes_FromStringAndSize((data) + (left), \
1053 (right) - (left)); \
1054 if (str == NULL) \
1055 goto onError; \
1056 if (count < MAX_PREALLOC) { \
1057 PyList_SET_ITEM(list, count, str); \
1058 } else { \
1059 if (PyList_Append(list, str)) { \
1060 Py_DECREF(str); \
1061 goto onError; \
1062 } \
1063 else \
1064 Py_DECREF(str); \
1065 } \
1066 count++; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001067
1068/* Always force the list to the expected size. */
Christian Heimes90aa7642007-12-19 02:45:37 +00001069#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Neal Norwitz6968b052007-02-27 19:02:19 +00001070
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001071#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1072#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1073#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1074#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001075
1076Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001077split_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001078{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001079 const char *s = PyBytes_AS_STRING(self);
1080 Py_ssize_t i, j, count=0;
1081 PyObject *str;
1082 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001083
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001084 if (list == NULL)
1085 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001086
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001087 i = j = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00001088
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001089 while (maxsplit-- > 0) {
1090 SKIP_SPACE(s, i, len);
1091 if (i==len) break;
1092 j = i; i++;
1093 SKIP_NONSPACE(s, i, len);
1094 if (j == 0 && i == len && PyBytes_CheckExact(self)) {
1095 /* No whitespace in self, so just use it as list[0] */
1096 Py_INCREF(self);
1097 PyList_SET_ITEM(list, 0, (PyObject *)self);
1098 count++;
1099 break;
1100 }
1101 SPLIT_ADD(s, j, i);
1102 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001103
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001104 if (i < len) {
1105 /* Only occurs when maxsplit was reached */
1106 /* Skip any remaining whitespace and copy to end of string */
1107 SKIP_SPACE(s, i, len);
1108 if (i != len)
1109 SPLIT_ADD(s, i, len);
1110 }
1111 FIX_PREALLOC_SIZE(list);
1112 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001113 onError:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001114 Py_DECREF(list);
1115 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001116}
1117
Guido van Rossum8f950672007-09-10 16:53:45 +00001118Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001119split_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001120{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001121 const char *s = PyBytes_AS_STRING(self);
1122 register Py_ssize_t i, j, count=0;
1123 PyObject *str;
1124 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001125
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001126 if (list == NULL)
1127 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001128
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001129 i = j = 0;
1130 while ((j < len) && (maxcount-- > 0)) {
1131 for(; j<len; j++) {
1132 /* I found that using memchr makes no difference */
1133 if (s[j] == ch) {
1134 SPLIT_ADD(s, i, j);
1135 i = j = j + 1;
1136 break;
1137 }
1138 }
1139 }
1140 if (i == 0 && count == 0 && PyBytes_CheckExact(self)) {
1141 /* ch not in self, so just use self as list[0] */
1142 Py_INCREF(self);
1143 PyList_SET_ITEM(list, 0, (PyObject *)self);
1144 count++;
1145 }
1146 else if (i <= len) {
1147 SPLIT_ADD(s, i, len);
1148 }
1149 FIX_PREALLOC_SIZE(list);
1150 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001151
1152 onError:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001153 Py_DECREF(list);
1154 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001155}
1156
Neal Norwitz6968b052007-02-27 19:02:19 +00001157PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001158"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001159\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001160Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001161If sep is not specified or is None, B is split on ASCII whitespace\n\
1162characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001163If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001164
1165static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001166bytes_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001167{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001168 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1169 Py_ssize_t maxsplit = -1, count=0;
1170 const char *s = PyBytes_AS_STRING(self), *sub;
1171 Py_buffer vsub;
1172 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001173#ifdef USE_FAST
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001174 Py_ssize_t pos;
Neal Norwitz6968b052007-02-27 19:02:19 +00001175#endif
1176
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001177 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1178 return NULL;
1179 if (maxsplit < 0)
1180 maxsplit = PY_SSIZE_T_MAX;
1181 if (subobj == Py_None)
1182 return split_whitespace(self, len, maxsplit);
1183 if (_getbuffer(subobj, &vsub) < 0)
1184 return NULL;
1185 sub = vsub.buf;
1186 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001187
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001188 if (n == 0) {
1189 PyErr_SetString(PyExc_ValueError, "empty separator");
1190 PyBuffer_Release(&vsub);
1191 return NULL;
1192 }
1193 else if (n == 1) {
1194 list = split_char(self, len, sub[0], maxsplit);
1195 PyBuffer_Release(&vsub);
1196 return list;
1197 }
Guido van Rossum8f950672007-09-10 16:53:45 +00001198
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001199 list = PyList_New(PREALLOC_SIZE(maxsplit));
1200 if (list == NULL) {
1201 PyBuffer_Release(&vsub);
1202 return NULL;
1203 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001204
1205#ifdef USE_FAST
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001206 i = j = 0;
1207 while (maxsplit-- > 0) {
1208 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1209 if (pos < 0)
1210 break;
1211 j = i+pos;
1212 SPLIT_ADD(s, i, j);
1213 i = j + n;
1214 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001215#else
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001216 i = j = 0;
1217 while ((j+n <= len) && (maxsplit-- > 0)) {
1218 for (; j+n <= len; j++) {
1219 if (Py_STRING_MATCH(s, j, sub, n)) {
1220 SPLIT_ADD(s, i, j);
1221 i = j = j + n;
1222 break;
1223 }
1224 }
1225 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001226#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001227 SPLIT_ADD(s, i, len);
1228 FIX_PREALLOC_SIZE(list);
1229 PyBuffer_Release(&vsub);
1230 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001231
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001232 onError:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001233 Py_DECREF(list);
1234 PyBuffer_Release(&vsub);
1235 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001236}
1237
Neal Norwitz6968b052007-02-27 19:02:19 +00001238PyDoc_STRVAR(partition__doc__,
1239"B.partition(sep) -> (head, sep, tail)\n\
1240\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001241Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001242the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001243found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001244
1245static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001246bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001247{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001248 const char *sep;
1249 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001250
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001251 if (PyBytes_Check(sep_obj)) {
1252 sep = PyBytes_AS_STRING(sep_obj);
1253 sep_len = PyBytes_GET_SIZE(sep_obj);
1254 }
1255 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1256 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001257
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001258 return stringlib_partition(
1259 (PyObject*) self,
1260 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1261 sep_obj, sep, sep_len
1262 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001263}
1264
1265PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti4c81fbb2010-01-25 12:02:24 +00001266"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001267\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001268Search for the separator sep in B, starting at the end of B,\n\
1269and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001270part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001271bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001272
1273static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001274bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001275{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001276 const char *sep;
1277 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001278
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001279 if (PyBytes_Check(sep_obj)) {
1280 sep = PyBytes_AS_STRING(sep_obj);
1281 sep_len = PyBytes_GET_SIZE(sep_obj);
1282 }
1283 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1284 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001285
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001286 return stringlib_rpartition(
1287 (PyObject*) self,
1288 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1289 sep_obj, sep, sep_len
1290 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001291}
1292
1293Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001294rsplit_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001295{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001296 const char *s = PyBytes_AS_STRING(self);
1297 Py_ssize_t i, j, count=0;
1298 PyObject *str;
1299 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001300
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001301 if (list == NULL)
1302 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001303
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001304 i = j = len-1;
Neal Norwitz6968b052007-02-27 19:02:19 +00001305
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001306 while (maxsplit-- > 0) {
1307 RSKIP_SPACE(s, i);
1308 if (i<0) break;
1309 j = i; i--;
1310 RSKIP_NONSPACE(s, i);
1311 if (j == len-1 && i < 0 && PyBytes_CheckExact(self)) {
1312 /* No whitespace in self, so just use it as list[0] */
1313 Py_INCREF(self);
1314 PyList_SET_ITEM(list, 0, (PyObject *)self);
1315 count++;
1316 break;
1317 }
1318 SPLIT_ADD(s, i + 1, j + 1);
1319 }
1320 if (i >= 0) {
1321 /* Only occurs when maxsplit was reached. Skip any remaining
1322 whitespace and copy to beginning of string. */
1323 RSKIP_SPACE(s, i);
1324 if (i >= 0)
1325 SPLIT_ADD(s, 0, i + 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001326
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001327 }
1328 FIX_PREALLOC_SIZE(list);
1329 if (PyList_Reverse(list) < 0)
1330 goto onError;
1331 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001332 onError:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001333 Py_DECREF(list);
1334 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001335}
1336
Guido van Rossum8f950672007-09-10 16:53:45 +00001337Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001338rsplit_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001339{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001340 const char *s = PyBytes_AS_STRING(self);
1341 register Py_ssize_t i, j, count=0;
1342 PyObject *str;
1343 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001344
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001345 if (list == NULL)
1346 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001347
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001348 i = j = len - 1;
1349 while ((i >= 0) && (maxcount-- > 0)) {
1350 for (; i >= 0; i--) {
1351 if (s[i] == ch) {
1352 SPLIT_ADD(s, i + 1, j + 1);
1353 j = i = i - 1;
1354 break;
1355 }
1356 }
1357 }
1358 if (i < 0 && count == 0 && PyBytes_CheckExact(self)) {
1359 /* ch not in self, so just use self as list[0] */
1360 Py_INCREF(self);
1361 PyList_SET_ITEM(list, 0, (PyObject *)self);
1362 count++;
1363 }
1364 else if (j >= -1) {
1365 SPLIT_ADD(s, 0, j + 1);
1366 }
1367 FIX_PREALLOC_SIZE(list);
1368 if (PyList_Reverse(list) < 0)
1369 goto onError;
1370 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001371
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001372 onError:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001373 Py_DECREF(list);
1374 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001375}
1376
Neal Norwitz6968b052007-02-27 19:02:19 +00001377PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001378"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001379\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001380Return a list of the sections in B, using sep as the delimiter,\n\
1381starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001382If sep is not given, B is split on ASCII whitespace characters\n\
1383(space, tab, return, newline, formfeed, vertical tab).\n\
1384If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001385
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001386
Neal Norwitz6968b052007-02-27 19:02:19 +00001387static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001388bytes_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001389{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001390 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1391 Py_ssize_t maxsplit = -1, count=0;
1392 const char *s, *sub;
1393 Py_buffer vsub;
1394 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001395
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001396 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1397 return NULL;
1398 if (maxsplit < 0)
1399 maxsplit = PY_SSIZE_T_MAX;
1400 if (subobj == Py_None)
1401 return rsplit_whitespace(self, len, maxsplit);
1402 if (_getbuffer(subobj, &vsub) < 0)
1403 return NULL;
1404 sub = vsub.buf;
1405 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001406
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001407 if (n == 0) {
1408 PyErr_SetString(PyExc_ValueError, "empty separator");
1409 PyBuffer_Release(&vsub);
1410 return NULL;
1411 }
1412 else if (n == 1) {
1413 list = rsplit_char(self, len, sub[0], maxsplit);
1414 PyBuffer_Release(&vsub);
1415 return list;
1416 }
Guido van Rossum8f950672007-09-10 16:53:45 +00001417
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001418 list = PyList_New(PREALLOC_SIZE(maxsplit));
1419 if (list == NULL) {
1420 PyBuffer_Release(&vsub);
1421 return NULL;
1422 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001423
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001424 j = len;
1425 i = j - n;
Neal Norwitz6968b052007-02-27 19:02:19 +00001426
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001427 s = PyBytes_AS_STRING(self);
1428 while ( (i >= 0) && (maxsplit-- > 0) ) {
1429 for (; i>=0; i--) {
1430 if (Py_STRING_MATCH(s, i, sub, n)) {
1431 SPLIT_ADD(s, i + n, j);
1432 j = i;
1433 i -= n;
1434 break;
1435 }
1436 }
1437 }
1438 SPLIT_ADD(s, 0, j);
1439 FIX_PREALLOC_SIZE(list);
1440 if (PyList_Reverse(list) < 0)
1441 goto onError;
1442 PyBuffer_Release(&vsub);
1443 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001444
1445onError:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001446 Py_DECREF(list);
1447 PyBuffer_Release(&vsub);
1448 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001449}
1450
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001451#undef SPLIT_ADD
1452#undef MAX_PREALLOC
1453#undef PREALLOC_SIZE
1454
1455
1456PyDoc_STRVAR(join__doc__,
1457"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001458\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001459Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001460Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1461
Neal Norwitz6968b052007-02-27 19:02:19 +00001462static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001463bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001464{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001465 char *sep = PyBytes_AS_STRING(self);
1466 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1467 PyObject *res = NULL;
1468 char *p;
1469 Py_ssize_t seqlen = 0;
1470 size_t sz = 0;
1471 Py_ssize_t i;
1472 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001473
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001474 seq = PySequence_Fast(orig, "");
1475 if (seq == NULL) {
1476 return NULL;
1477 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001478
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001479 seqlen = PySequence_Size(seq);
1480 if (seqlen == 0) {
1481 Py_DECREF(seq);
1482 return PyBytes_FromString("");
1483 }
1484 if (seqlen == 1) {
1485 item = PySequence_Fast_GET_ITEM(seq, 0);
1486 if (PyBytes_CheckExact(item)) {
1487 Py_INCREF(item);
1488 Py_DECREF(seq);
1489 return item;
1490 }
1491 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001492
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001493 /* There are at least two things to join, or else we have a subclass
1494 * of the builtin types in the sequence.
1495 * Do a pre-pass to figure out the total amount of space we'll
1496 * need (sz), and see whether all argument are bytes.
1497 */
1498 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1499 for (i = 0; i < seqlen; i++) {
1500 const size_t old_sz = sz;
1501 item = PySequence_Fast_GET_ITEM(seq, i);
1502 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1503 PyErr_Format(PyExc_TypeError,
1504 "sequence item %zd: expected bytes,"
1505 " %.80s found",
1506 i, Py_TYPE(item)->tp_name);
1507 Py_DECREF(seq);
1508 return NULL;
1509 }
1510 sz += Py_SIZE(item);
1511 if (i != 0)
1512 sz += seplen;
1513 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1514 PyErr_SetString(PyExc_OverflowError,
1515 "join() result is too long for bytes");
1516 Py_DECREF(seq);
1517 return NULL;
1518 }
1519 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001520
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001521 /* Allocate result space. */
1522 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1523 if (res == NULL) {
1524 Py_DECREF(seq);
1525 return NULL;
1526 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001527
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001528 /* Catenate everything. */
1529 /* I'm not worried about a PyByteArray item growing because there's
1530 nowhere in this function where we release the GIL. */
1531 p = PyBytes_AS_STRING(res);
1532 for (i = 0; i < seqlen; ++i) {
1533 size_t n;
1534 char *q;
1535 if (i) {
1536 Py_MEMCPY(p, sep, seplen);
1537 p += seplen;
1538 }
1539 item = PySequence_Fast_GET_ITEM(seq, i);
1540 n = Py_SIZE(item);
1541 if (PyBytes_Check(item))
1542 q = PyBytes_AS_STRING(item);
1543 else
1544 q = PyByteArray_AS_STRING(item);
1545 Py_MEMCPY(p, q, n);
1546 p += n;
1547 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001548
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001549 Py_DECREF(seq);
1550 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001551}
1552
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001553PyObject *
1554_PyBytes_Join(PyObject *sep, PyObject *x)
1555{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001556 assert(sep != NULL && PyBytes_Check(sep));
1557 assert(x != NULL);
1558 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001559}
1560
1561Py_LOCAL_INLINE(void)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001562bytes_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001563{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001564 if (*end > len)
1565 *end = len;
1566 else if (*end < 0)
1567 *end += len;
1568 if (*end < 0)
1569 *end = 0;
1570 if (*start < 0)
1571 *start += len;
1572 if (*start < 0)
1573 *start = 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001574}
1575
1576Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001577bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001578{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001579 PyObject *subobj;
1580 const char *sub;
1581 Py_ssize_t sub_len;
1582 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001583
Jesus Ceaac451502011-04-20 17:09:23 +02001584 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1585 args, &subobj, &start, &end))
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001586 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001587
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001588 if (PyBytes_Check(subobj)) {
1589 sub = PyBytes_AS_STRING(subobj);
1590 sub_len = PyBytes_GET_SIZE(subobj);
1591 }
1592 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1593 /* XXX - the "expected a character buffer object" is pretty
1594 confusing for a non-expert. remap to something else ? */
1595 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001596
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001597 if (dir > 0)
1598 return stringlib_find_slice(
1599 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1600 sub, sub_len, start, end);
1601 else
1602 return stringlib_rfind_slice(
1603 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1604 sub, sub_len, start, end);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001605}
1606
1607
1608PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001609"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001610\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001611Return the lowest index in S where substring sub is found,\n\
1612such that sub is contained within s[start:end]. Optional\n\
1613arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001614\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001615Return -1 on failure.");
1616
Neal Norwitz6968b052007-02-27 19:02:19 +00001617static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001618bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001619{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001620 Py_ssize_t result = bytes_find_internal(self, args, +1);
1621 if (result == -2)
1622 return NULL;
1623 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001624}
1625
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001626
1627PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001628"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001629\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001630Like B.find() but raise ValueError when the substring is not found.");
1631
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001632static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001633bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001634{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001635 Py_ssize_t result = bytes_find_internal(self, args, +1);
1636 if (result == -2)
1637 return NULL;
1638 if (result == -1) {
1639 PyErr_SetString(PyExc_ValueError,
1640 "substring not found");
1641 return NULL;
1642 }
1643 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001644}
1645
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001646
1647PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001648"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001649\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001650Return the highest index in B where substring sub is found,\n\
1651such that sub is contained within s[start:end]. Optional\n\
1652arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001653\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001654Return -1 on failure.");
1655
Neal Norwitz6968b052007-02-27 19:02:19 +00001656static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001657bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001658{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001659 Py_ssize_t result = bytes_find_internal(self, args, -1);
1660 if (result == -2)
1661 return NULL;
1662 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001663}
1664
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001665
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001666PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001667"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001668\n\
1669Like B.rfind() but raise ValueError when the substring is not found.");
1670
1671static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001672bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001673{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001674 Py_ssize_t result = bytes_find_internal(self, args, -1);
1675 if (result == -2)
1676 return NULL;
1677 if (result == -1) {
1678 PyErr_SetString(PyExc_ValueError,
1679 "substring not found");
1680 return NULL;
1681 }
1682 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001683}
1684
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001685
1686Py_LOCAL_INLINE(PyObject *)
1687do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001688{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001689 Py_buffer vsep;
1690 char *s = PyBytes_AS_STRING(self);
1691 Py_ssize_t len = PyBytes_GET_SIZE(self);
1692 char *sep;
1693 Py_ssize_t seplen;
1694 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001695
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001696 if (_getbuffer(sepobj, &vsep) < 0)
1697 return NULL;
1698 sep = vsep.buf;
1699 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001700
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001701 i = 0;
1702 if (striptype != RIGHTSTRIP) {
1703 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1704 i++;
1705 }
1706 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001707
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001708 j = len;
1709 if (striptype != LEFTSTRIP) {
1710 do {
1711 j--;
1712 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1713 j++;
1714 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001715
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001716 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001717
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001718 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1719 Py_INCREF(self);
1720 return (PyObject*)self;
1721 }
1722 else
1723 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001724}
1725
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001726
1727Py_LOCAL_INLINE(PyObject *)
1728do_strip(PyBytesObject *self, int striptype)
1729{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001730 char *s = PyBytes_AS_STRING(self);
1731 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001732
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001733 i = 0;
1734 if (striptype != RIGHTSTRIP) {
1735 while (i < len && ISSPACE(s[i])) {
1736 i++;
1737 }
1738 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001739
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001740 j = len;
1741 if (striptype != LEFTSTRIP) {
1742 do {
1743 j--;
1744 } while (j >= i && ISSPACE(s[j]));
1745 j++;
1746 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001747
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001748 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1749 Py_INCREF(self);
1750 return (PyObject*)self;
1751 }
1752 else
1753 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001754}
1755
1756
1757Py_LOCAL_INLINE(PyObject *)
1758do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1759{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001760 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001761
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001762 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1763 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001764
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001765 if (sep != NULL && sep != Py_None) {
1766 return do_xstrip(self, striptype, sep);
1767 }
1768 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001769}
1770
1771
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001772PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001773"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001774\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001775Strip leading and trailing bytes contained in the argument.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001776If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001777static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001778bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001779{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001780 if (PyTuple_GET_SIZE(args) == 0)
1781 return do_strip(self, BOTHSTRIP); /* Common case */
1782 else
1783 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001784}
1785
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001786
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001787PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001788"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001789\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001790Strip leading bytes contained in the argument.\n\
1791If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001792static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001793bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001794{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001795 if (PyTuple_GET_SIZE(args) == 0)
1796 return do_strip(self, LEFTSTRIP); /* Common case */
1797 else
1798 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001799}
1800
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001801
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001802PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001803"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001804\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001805Strip trailing bytes contained in the argument.\n\
1806If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001807static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001808bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001809{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001810 if (PyTuple_GET_SIZE(args) == 0)
1811 return do_strip(self, RIGHTSTRIP); /* Common case */
1812 else
1813 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001814}
Neal Norwitz6968b052007-02-27 19:02:19 +00001815
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001816
1817PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001818"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001819\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001820Return the number of non-overlapping occurrences of substring sub in\n\
1821string S[start:end]. Optional arguments start and end are interpreted\n\
1822as in slice notation.");
1823
1824static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001825bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001826{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001827 PyObject *sub_obj;
1828 const char *str = PyBytes_AS_STRING(self), *sub;
1829 Py_ssize_t sub_len;
1830 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001831
Jesus Ceaac451502011-04-20 17:09:23 +02001832 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001833 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001834
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001835 if (PyBytes_Check(sub_obj)) {
1836 sub = PyBytes_AS_STRING(sub_obj);
1837 sub_len = PyBytes_GET_SIZE(sub_obj);
1838 }
1839 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1840 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001841
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001842 bytes_adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001843
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001844 return PyLong_FromSsize_t(
1845 stringlib_count(str + start, end - start, sub, sub_len)
1846 );
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001847}
1848
1849
1850PyDoc_STRVAR(translate__doc__,
1851"B.translate(table[, deletechars]) -> bytes\n\
1852\n\
1853Return a copy of B, where all characters occurring in the\n\
1854optional argument deletechars are removed, and the remaining\n\
1855characters have been mapped through the given translation\n\
1856table, which must be a bytes object of length 256.");
1857
1858static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001859bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001860{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001861 register char *input, *output;
1862 const char *table;
1863 register Py_ssize_t i, c, changed = 0;
1864 PyObject *input_obj = (PyObject*)self;
1865 const char *output_start, *del_table=NULL;
1866 Py_ssize_t inlen, tablen, dellen = 0;
1867 PyObject *result;
1868 int trans_table[256];
1869 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001870
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001871 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1872 &tableobj, &delobj))
1873 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001874
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001875 if (PyBytes_Check(tableobj)) {
1876 table = PyBytes_AS_STRING(tableobj);
1877 tablen = PyBytes_GET_SIZE(tableobj);
1878 }
1879 else if (tableobj == Py_None) {
1880 table = NULL;
1881 tablen = 256;
1882 }
1883 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1884 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001885
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001886 if (tablen != 256) {
1887 PyErr_SetString(PyExc_ValueError,
1888 "translation table must be 256 characters long");
1889 return NULL;
1890 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001891
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001892 if (delobj != NULL) {
1893 if (PyBytes_Check(delobj)) {
1894 del_table = PyBytes_AS_STRING(delobj);
1895 dellen = PyBytes_GET_SIZE(delobj);
1896 }
1897 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1898 return NULL;
1899 }
1900 else {
1901 del_table = NULL;
1902 dellen = 0;
1903 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001904
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001905 inlen = PyBytes_GET_SIZE(input_obj);
1906 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1907 if (result == NULL)
1908 return NULL;
1909 output_start = output = PyBytes_AsString(result);
1910 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001911
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001912 if (dellen == 0 && table != NULL) {
1913 /* If no deletions are required, use faster code */
1914 for (i = inlen; --i >= 0; ) {
1915 c = Py_CHARMASK(*input++);
1916 if (Py_CHARMASK((*output++ = table[c])) != c)
1917 changed = 1;
1918 }
1919 if (changed || !PyBytes_CheckExact(input_obj))
1920 return result;
1921 Py_DECREF(result);
1922 Py_INCREF(input_obj);
1923 return input_obj;
1924 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001925
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001926 if (table == NULL) {
1927 for (i = 0; i < 256; i++)
1928 trans_table[i] = Py_CHARMASK(i);
1929 } else {
1930 for (i = 0; i < 256; i++)
1931 trans_table[i] = Py_CHARMASK(table[i]);
1932 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001933
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001934 for (i = 0; i < dellen; i++)
1935 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001936
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001937 for (i = inlen; --i >= 0; ) {
1938 c = Py_CHARMASK(*input++);
1939 if (trans_table[c] != -1)
1940 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1941 continue;
1942 changed = 1;
1943 }
1944 if (!changed && PyBytes_CheckExact(input_obj)) {
1945 Py_DECREF(result);
1946 Py_INCREF(input_obj);
1947 return input_obj;
1948 }
1949 /* Fix the size of the resulting string */
1950 if (inlen > 0)
1951 _PyBytes_Resize(&result, output - output_start);
1952 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001953}
1954
1955
Georg Brandlabc38772009-04-12 15:51:51 +00001956static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001957bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001958{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001959 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001960}
1961
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001962#define FORWARD 1
1963#define REVERSE -1
1964
1965/* find and count characters and substrings */
1966
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001967#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001968 ((char *)memchr((const void *)(target), c, target_len))
1969
1970/* String ops must return a string. */
1971/* If the object is subclass of string, create a copy */
1972Py_LOCAL(PyBytesObject *)
1973return_self(PyBytesObject *self)
1974{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001975 if (PyBytes_CheckExact(self)) {
1976 Py_INCREF(self);
1977 return self;
1978 }
1979 return (PyBytesObject *)PyBytes_FromStringAndSize(
1980 PyBytes_AS_STRING(self),
1981 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001982}
1983
1984Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitroubc760d92010-08-15 17:46:50 +00001985countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001986{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001987 Py_ssize_t count=0;
1988 const char *start=target;
1989 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001990
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001991 while ( (start=findchar(start, end-start, c)) != NULL ) {
1992 count++;
1993 if (count >= maxcount)
1994 break;
1995 start += 1;
1996 }
1997 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001998}
1999
2000Py_LOCAL(Py_ssize_t)
2001findstring(const char *target, Py_ssize_t target_len,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002002 const char *pattern, Py_ssize_t pattern_len,
2003 Py_ssize_t start,
2004 Py_ssize_t end,
2005 int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002006{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002007 if (start < 0) {
2008 start += target_len;
2009 if (start < 0)
2010 start = 0;
2011 }
2012 if (end > target_len) {
2013 end = target_len;
2014 } else if (end < 0) {
2015 end += target_len;
2016 if (end < 0)
2017 end = 0;
2018 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002019
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002020 /* zero-length substrings always match at the first attempt */
2021 if (pattern_len == 0)
2022 return (direction > 0) ? start : end;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002023
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002024 end -= pattern_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002025
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002026 if (direction < 0) {
2027 for (; end >= start; end--)
2028 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2029 return end;
2030 } else {
2031 for (; start <= end; start++)
2032 if (Py_STRING_MATCH(target, start,pattern,pattern_len))
2033 return start;
2034 }
2035 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002036}
2037
2038Py_LOCAL_INLINE(Py_ssize_t)
2039countstring(const char *target, Py_ssize_t target_len,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002040 const char *pattern, Py_ssize_t pattern_len,
2041 Py_ssize_t start,
2042 Py_ssize_t end,
2043 int direction, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002044{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002045 Py_ssize_t count=0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002046
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002047 if (start < 0) {
2048 start += target_len;
2049 if (start < 0)
2050 start = 0;
2051 }
2052 if (end > target_len) {
2053 end = target_len;
2054 } else if (end < 0) {
2055 end += target_len;
2056 if (end < 0)
2057 end = 0;
2058 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002059
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002060 /* zero-length substrings match everywhere */
2061 if (pattern_len == 0 || maxcount == 0) {
2062 if (target_len+1 < maxcount)
2063 return target_len+1;
2064 return maxcount;
2065 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002066
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002067 end -= pattern_len;
2068 if (direction < 0) {
2069 for (; (end >= start); end--)
2070 if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
2071 count++;
2072 if (--maxcount <= 0) break;
2073 end -= pattern_len-1;
2074 }
2075 } else {
2076 for (; (start <= end); start++)
2077 if (Py_STRING_MATCH(target, start,
2078 pattern, pattern_len)) {
2079 count++;
2080 if (--maxcount <= 0)
2081 break;
2082 start += pattern_len-1;
2083 }
2084 }
2085 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002086}
2087
2088
2089/* Algorithms for different cases of string replacement */
2090
2091/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2092Py_LOCAL(PyBytesObject *)
2093replace_interleave(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002094 const char *to_s, Py_ssize_t to_len,
2095 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002096{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002097 char *self_s, *result_s;
2098 Py_ssize_t self_len, result_len;
2099 Py_ssize_t count, i, product;
2100 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002101
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002102 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002103
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002104 /* 1 at the end plus 1 after every character */
2105 count = self_len+1;
2106 if (maxcount < count)
2107 count = maxcount;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002108
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002109 /* Check for overflow */
2110 /* result_len = count * to_len + self_len; */
2111 product = count * to_len;
2112 if (product / to_len != count) {
2113 PyErr_SetString(PyExc_OverflowError,
2114 "replacement bytes are too long");
2115 return NULL;
2116 }
2117 result_len = product + self_len;
2118 if (result_len < 0) {
2119 PyErr_SetString(PyExc_OverflowError,
2120 "replacement bytes are too long");
2121 return NULL;
2122 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002123
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002124 if (! (result = (PyBytesObject *)
2125 PyBytes_FromStringAndSize(NULL, result_len)) )
2126 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002127
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002128 self_s = PyBytes_AS_STRING(self);
2129 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002130
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002131 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002132
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002133 /* Lay the first one down (guaranteed this will occur) */
2134 Py_MEMCPY(result_s, to_s, to_len);
2135 result_s += to_len;
2136 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002137
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002138 for (i=0; i<count; i++) {
2139 *result_s++ = *self_s++;
2140 Py_MEMCPY(result_s, to_s, to_len);
2141 result_s += to_len;
2142 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002143
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002144 /* Copy the rest of the original string */
2145 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002146
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002147 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002148}
2149
2150/* Special case for deleting a single character */
2151/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2152Py_LOCAL(PyBytesObject *)
2153replace_delete_single_character(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002154 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002155{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002156 char *self_s, *result_s;
2157 char *start, *next, *end;
2158 Py_ssize_t self_len, result_len;
2159 Py_ssize_t count;
2160 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002161
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002162 self_len = PyBytes_GET_SIZE(self);
2163 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002164
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002165 count = countchar(self_s, self_len, from_c, maxcount);
2166 if (count == 0) {
2167 return return_self(self);
2168 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002169
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002170 result_len = self_len - count; /* from_len == 1 */
2171 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002172
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002173 if ( (result = (PyBytesObject *)
2174 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2175 return NULL;
2176 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002177
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002178 start = self_s;
2179 end = self_s + self_len;
2180 while (count-- > 0) {
2181 next = findchar(start, end-start, from_c);
2182 if (next == NULL)
2183 break;
2184 Py_MEMCPY(result_s, start, next-start);
2185 result_s += (next-start);
2186 start = next+1;
2187 }
2188 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002189
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002190 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002191}
2192
2193/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2194
2195Py_LOCAL(PyBytesObject *)
2196replace_delete_substring(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002197 const char *from_s, Py_ssize_t from_len,
2198 Py_ssize_t maxcount) {
2199 char *self_s, *result_s;
2200 char *start, *next, *end;
2201 Py_ssize_t self_len, result_len;
2202 Py_ssize_t count, offset;
2203 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002204
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002205 self_len = PyBytes_GET_SIZE(self);
2206 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002207
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002208 count = countstring(self_s, self_len,
2209 from_s, from_len,
2210 0, self_len, 1,
2211 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002212
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002213 if (count == 0) {
2214 /* no matches */
2215 return return_self(self);
2216 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002217
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002218 result_len = self_len - (count * from_len);
2219 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002220
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002221 if ( (result = (PyBytesObject *)
2222 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2223 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002224
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002225 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002226
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002227 start = self_s;
2228 end = self_s + self_len;
2229 while (count-- > 0) {
2230 offset = findstring(start, end-start,
2231 from_s, from_len,
2232 0, end-start, FORWARD);
2233 if (offset == -1)
2234 break;
2235 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002236
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002237 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002238
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002239 result_s += (next-start);
2240 start = next+from_len;
2241 }
2242 Py_MEMCPY(result_s, start, end-start);
2243 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002244}
2245
2246/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2247Py_LOCAL(PyBytesObject *)
2248replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002249 char from_c, char to_c,
2250 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002251{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002252 char *self_s, *result_s, *start, *end, *next;
2253 Py_ssize_t self_len;
2254 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002255
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002256 /* The result string will be the same size */
2257 self_s = PyBytes_AS_STRING(self);
2258 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002259
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002260 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002261
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002262 if (next == NULL) {
2263 /* No matches; return the original string */
2264 return return_self(self);
2265 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002266
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002267 /* Need to make a new string */
2268 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2269 if (result == NULL)
2270 return NULL;
2271 result_s = PyBytes_AS_STRING(result);
2272 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002273
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002274 /* change everything in-place, starting with this one */
2275 start = result_s + (next-self_s);
2276 *start = to_c;
2277 start++;
2278 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002279
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002280 while (--maxcount > 0) {
2281 next = findchar(start, end-start, from_c);
2282 if (next == NULL)
2283 break;
2284 *next = to_c;
2285 start = next+1;
2286 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002287
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002288 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002289}
2290
2291/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2292Py_LOCAL(PyBytesObject *)
2293replace_substring_in_place(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002294 const char *from_s, Py_ssize_t from_len,
2295 const char *to_s, Py_ssize_t to_len,
2296 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002297{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002298 char *result_s, *start, *end;
2299 char *self_s;
2300 Py_ssize_t self_len, offset;
2301 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002302
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002303 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002304
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002305 self_s = PyBytes_AS_STRING(self);
2306 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002307
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002308 offset = findstring(self_s, self_len,
2309 from_s, from_len,
2310 0, self_len, FORWARD);
2311 if (offset == -1) {
2312 /* No matches; return the original string */
2313 return return_self(self);
2314 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002315
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002316 /* Need to make a new string */
2317 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2318 if (result == NULL)
2319 return NULL;
2320 result_s = PyBytes_AS_STRING(result);
2321 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002322
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002323 /* change everything in-place, starting with this one */
2324 start = result_s + offset;
2325 Py_MEMCPY(start, to_s, from_len);
2326 start += from_len;
2327 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002328
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002329 while ( --maxcount > 0) {
2330 offset = findstring(start, end-start,
2331 from_s, from_len,
2332 0, end-start, FORWARD);
2333 if (offset==-1)
2334 break;
2335 Py_MEMCPY(start+offset, to_s, from_len);
2336 start += offset+from_len;
2337 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002338
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002339 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002340}
2341
2342/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2343Py_LOCAL(PyBytesObject *)
2344replace_single_character(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002345 char from_c,
2346 const char *to_s, Py_ssize_t to_len,
2347 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002348{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002349 char *self_s, *result_s;
2350 char *start, *next, *end;
2351 Py_ssize_t self_len, result_len;
2352 Py_ssize_t count, product;
2353 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002354
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002355 self_s = PyBytes_AS_STRING(self);
2356 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002357
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002358 count = countchar(self_s, self_len, from_c, maxcount);
2359 if (count == 0) {
2360 /* no matches, return unchanged */
2361 return return_self(self);
2362 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002363
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002364 /* use the difference between current and new, hence the "-1" */
2365 /* result_len = self_len + count * (to_len-1) */
2366 product = count * (to_len-1);
2367 if (product / (to_len-1) != count) {
2368 PyErr_SetString(PyExc_OverflowError,
2369 "replacement bytes are too long");
2370 return NULL;
2371 }
2372 result_len = self_len + product;
2373 if (result_len < 0) {
2374 PyErr_SetString(PyExc_OverflowError,
2375 "replacment bytes are too long");
2376 return NULL;
2377 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002378
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002379 if ( (result = (PyBytesObject *)
2380 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2381 return NULL;
2382 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002383
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002384 start = self_s;
2385 end = self_s + self_len;
2386 while (count-- > 0) {
2387 next = findchar(start, end-start, from_c);
2388 if (next == NULL)
2389 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002390
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002391 if (next == start) {
2392 /* replace with the 'to' */
2393 Py_MEMCPY(result_s, to_s, to_len);
2394 result_s += to_len;
2395 start += 1;
2396 } else {
2397 /* copy the unchanged old then the 'to' */
2398 Py_MEMCPY(result_s, start, next-start);
2399 result_s += (next-start);
2400 Py_MEMCPY(result_s, to_s, to_len);
2401 result_s += to_len;
2402 start = next+1;
2403 }
2404 }
2405 /* Copy the remainder of the remaining string */
2406 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002407
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002408 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002409}
2410
2411/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2412Py_LOCAL(PyBytesObject *)
2413replace_substring(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002414 const char *from_s, Py_ssize_t from_len,
2415 const char *to_s, Py_ssize_t to_len,
2416 Py_ssize_t maxcount) {
2417 char *self_s, *result_s;
2418 char *start, *next, *end;
2419 Py_ssize_t self_len, result_len;
2420 Py_ssize_t count, offset, product;
2421 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002422
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002423 self_s = PyBytes_AS_STRING(self);
2424 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002425
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002426 count = countstring(self_s, self_len,
2427 from_s, from_len,
2428 0, self_len, FORWARD, maxcount);
2429 if (count == 0) {
2430 /* no matches, return unchanged */
2431 return return_self(self);
2432 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002433
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002434 /* Check for overflow */
2435 /* result_len = self_len + count * (to_len-from_len) */
2436 product = count * (to_len-from_len);
2437 if (product / (to_len-from_len) != count) {
2438 PyErr_SetString(PyExc_OverflowError,
2439 "replacement bytes are too long");
2440 return NULL;
2441 }
2442 result_len = self_len + product;
2443 if (result_len < 0) {
2444 PyErr_SetString(PyExc_OverflowError,
2445 "replacement bytes are too long");
2446 return NULL;
2447 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002448
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002449 if ( (result = (PyBytesObject *)
2450 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2451 return NULL;
2452 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002453
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002454 start = self_s;
2455 end = self_s + self_len;
2456 while (count-- > 0) {
2457 offset = findstring(start, end-start,
2458 from_s, from_len,
2459 0, end-start, FORWARD);
2460 if (offset == -1)
2461 break;
2462 next = start+offset;
2463 if (next == start) {
2464 /* replace with the 'to' */
2465 Py_MEMCPY(result_s, to_s, to_len);
2466 result_s += to_len;
2467 start += from_len;
2468 } else {
2469 /* copy the unchanged old then the 'to' */
2470 Py_MEMCPY(result_s, start, next-start);
2471 result_s += (next-start);
2472 Py_MEMCPY(result_s, to_s, to_len);
2473 result_s += to_len;
2474 start = next+from_len;
2475 }
2476 }
2477 /* Copy the remainder of the remaining string */
2478 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002479
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002480 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002481}
2482
2483
2484Py_LOCAL(PyBytesObject *)
2485replace(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002486 const char *from_s, Py_ssize_t from_len,
2487 const char *to_s, Py_ssize_t to_len,
2488 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002489{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002490 if (maxcount < 0) {
2491 maxcount = PY_SSIZE_T_MAX;
2492 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2493 /* nothing to do; return the original string */
2494 return return_self(self);
2495 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002496
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002497 if (maxcount == 0 ||
2498 (from_len == 0 && to_len == 0)) {
2499 /* nothing to do; return the original string */
2500 return return_self(self);
2501 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002502
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002503 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002504
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002505 if (from_len == 0) {
2506 /* insert the 'to' string everywhere. */
2507 /* >>> "Python".replace("", ".") */
2508 /* '.P.y.t.h.o.n.' */
2509 return replace_interleave(self, to_s, to_len, maxcount);
2510 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002511
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002512 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2513 /* point for an empty self string to generate a non-empty string */
2514 /* Special case so the remaining code always gets a non-empty string */
2515 if (PyBytes_GET_SIZE(self) == 0) {
2516 return return_self(self);
2517 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002518
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002519 if (to_len == 0) {
2520 /* delete all occurrences of 'from' string */
2521 if (from_len == 1) {
2522 return replace_delete_single_character(
2523 self, from_s[0], maxcount);
2524 } else {
2525 return replace_delete_substring(self, from_s,
2526 from_len, maxcount);
2527 }
2528 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002529
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002530 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002531
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002532 if (from_len == to_len) {
2533 if (from_len == 1) {
2534 return replace_single_character_in_place(
2535 self,
2536 from_s[0],
2537 to_s[0],
2538 maxcount);
2539 } else {
2540 return replace_substring_in_place(
2541 self, from_s, from_len, to_s, to_len,
2542 maxcount);
2543 }
2544 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002545
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002546 /* Otherwise use the more generic algorithms */
2547 if (from_len == 1) {
2548 return replace_single_character(self, from_s[0],
2549 to_s, to_len, maxcount);
2550 } else {
2551 /* len('from')>=2, len('to')>=1 */
2552 return replace_substring(self, from_s, from_len, to_s, to_len,
2553 maxcount);
2554 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002555}
2556
2557PyDoc_STRVAR(replace__doc__,
2558"B.replace(old, new[, count]) -> bytes\n\
2559\n\
2560Return a copy of B with all occurrences of subsection\n\
2561old replaced by new. If the optional argument count is\n\
Senthil Kumaranf7734202010-09-08 13:00:07 +00002562given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002563
2564static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002565bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002566{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002567 Py_ssize_t count = -1;
2568 PyObject *from, *to;
2569 const char *from_s, *to_s;
2570 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002571
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002572 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2573 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002574
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002575 if (PyBytes_Check(from)) {
2576 from_s = PyBytes_AS_STRING(from);
2577 from_len = PyBytes_GET_SIZE(from);
2578 }
2579 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2580 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002581
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002582 if (PyBytes_Check(to)) {
2583 to_s = PyBytes_AS_STRING(to);
2584 to_len = PyBytes_GET_SIZE(to);
2585 }
2586 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2587 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002588
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002589 return (PyObject *)replace((PyBytesObject *) self,
2590 from_s, from_len,
2591 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002592}
2593
2594/** End DALKE **/
2595
2596/* Matches the end (direction >= 0) or start (direction < 0) of self
2597 * against substr, using the start and end arguments. Returns
2598 * -1 on error, 0 if not found and 1 if found.
2599 */
2600Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002601_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002602 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002603{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002604 Py_ssize_t len = PyBytes_GET_SIZE(self);
2605 Py_ssize_t slen;
2606 const char* sub;
2607 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002608
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002609 if (PyBytes_Check(substr)) {
2610 sub = PyBytes_AS_STRING(substr);
2611 slen = PyBytes_GET_SIZE(substr);
2612 }
2613 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2614 return -1;
2615 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002616
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002617 bytes_adjust_indices(&start, &end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002618
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002619 if (direction < 0) {
2620 /* startswith */
2621 if (start+slen > len)
2622 return 0;
2623 } else {
2624 /* endswith */
2625 if (end-start < slen || start > len)
2626 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002627
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002628 if (end-slen > start)
2629 start = end - slen;
2630 }
2631 if (end-start >= slen)
2632 return ! memcmp(str+start, sub, slen);
2633 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002634}
2635
2636
2637PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002638"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002639\n\
2640Return True if B starts with the specified prefix, False otherwise.\n\
2641With optional start, test B beginning at that position.\n\
2642With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002643prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002644
2645static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002646bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002647{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002648 Py_ssize_t start = 0;
2649 Py_ssize_t end = PY_SSIZE_T_MAX;
2650 PyObject *subobj;
2651 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002652
Jesus Ceaac451502011-04-20 17:09:23 +02002653 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002654 return NULL;
2655 if (PyTuple_Check(subobj)) {
2656 Py_ssize_t i;
2657 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2658 result = _bytes_tailmatch(self,
2659 PyTuple_GET_ITEM(subobj, i),
2660 start, end, -1);
2661 if (result == -1)
2662 return NULL;
2663 else if (result) {
2664 Py_RETURN_TRUE;
2665 }
2666 }
2667 Py_RETURN_FALSE;
2668 }
2669 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002670 if (result == -1) {
2671 if (PyErr_ExceptionMatches(PyExc_TypeError))
2672 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2673 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002674 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002675 }
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002676 else
2677 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002678}
2679
2680
2681PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002682"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002683\n\
2684Return True if B ends with the specified suffix, False otherwise.\n\
2685With optional start, test B beginning at that position.\n\
2686With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002687suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002688
2689static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002690bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002691{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002692 Py_ssize_t start = 0;
2693 Py_ssize_t end = PY_SSIZE_T_MAX;
2694 PyObject *subobj;
2695 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002696
Jesus Ceaac451502011-04-20 17:09:23 +02002697 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002698 return NULL;
2699 if (PyTuple_Check(subobj)) {
2700 Py_ssize_t i;
2701 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2702 result = _bytes_tailmatch(self,
2703 PyTuple_GET_ITEM(subobj, i),
2704 start, end, +1);
2705 if (result == -1)
2706 return NULL;
2707 else if (result) {
2708 Py_RETURN_TRUE;
2709 }
2710 }
2711 Py_RETURN_FALSE;
2712 }
2713 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002714 if (result == -1) {
2715 if (PyErr_ExceptionMatches(PyExc_TypeError))
2716 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2717 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002718 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002719 }
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002720 else
2721 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002722}
2723
2724
2725PyDoc_STRVAR(decode__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002726"B.decode([encoding[, errors]]) -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002727\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002728Decode S using the codec registered for encoding. encoding defaults\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002729to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002730handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2731a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002732as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002733able to handle UnicodeDecodeErrors.");
2734
2735static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002736bytes_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002737{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002738 const char *encoding = NULL;
2739 const char *errors = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +00002740
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002741 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2742 return NULL;
2743 if (encoding == NULL)
2744 encoding = PyUnicode_GetDefaultEncoding();
2745 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002746}
2747
Guido van Rossum20188312006-05-05 15:15:40 +00002748
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002749PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002750"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002751\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002752Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002753Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002754Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002755
2756static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002757hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002758{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002759 if (c >= 128)
2760 return -1;
2761 if (ISDIGIT(c))
2762 return c - '0';
2763 else {
2764 if (ISUPPER(c))
2765 c = TOLOWER(c);
2766 if (c >= 'a' && c <= 'f')
2767 return c - 'a' + 10;
2768 }
2769 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002770}
2771
2772static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002773bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002774{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002775 PyObject *newstring, *hexobj;
2776 char *buf;
2777 Py_UNICODE *hex;
2778 Py_ssize_t hexlen, byteslen, i, j;
2779 int top, bot;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002780
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002781 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2782 return NULL;
2783 assert(PyUnicode_Check(hexobj));
2784 hexlen = PyUnicode_GET_SIZE(hexobj);
2785 hex = PyUnicode_AS_UNICODE(hexobj);
2786 byteslen = hexlen/2; /* This overestimates if there are spaces */
2787 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2788 if (!newstring)
2789 return NULL;
2790 buf = PyBytes_AS_STRING(newstring);
2791 for (i = j = 0; i < hexlen; i += 2) {
2792 /* skip over spaces in the input */
2793 while (hex[i] == ' ')
2794 i++;
2795 if (i >= hexlen)
2796 break;
2797 top = hex_digit_to_int(hex[i]);
2798 bot = hex_digit_to_int(hex[i+1]);
2799 if (top == -1 || bot == -1) {
2800 PyErr_Format(PyExc_ValueError,
2801 "non-hexadecimal number found in "
2802 "fromhex() arg at position %zd", i);
2803 goto error;
2804 }
2805 buf[j++] = (top << 4) + bot;
2806 }
2807 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2808 goto error;
2809 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002810
2811 error:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002812 Py_XDECREF(newstring);
2813 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002814}
2815
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002816PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002817"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002818
2819static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002820bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002821{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002822 Py_ssize_t res;
2823 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2824 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002825}
2826
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002827
2828static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002829bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002830{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002831 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002832}
2833
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002834
2835static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002836bytes_methods[] = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002837 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2838 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2839 _Py_capitalize__doc__},
2840 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2841 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2842 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode__doc__},
2843 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2844 endswith__doc__},
2845 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2846 expandtabs__doc__},
2847 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2848 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2849 fromhex_doc},
2850 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2851 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2852 _Py_isalnum__doc__},
2853 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2854 _Py_isalpha__doc__},
2855 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2856 _Py_isdigit__doc__},
2857 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2858 _Py_islower__doc__},
2859 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2860 _Py_isspace__doc__},
2861 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2862 _Py_istitle__doc__},
2863 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2864 _Py_isupper__doc__},
2865 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2866 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2867 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2868 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2869 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2870 _Py_maketrans__doc__},
2871 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2872 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2873 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2874 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2875 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2876 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2877 rpartition__doc__},
2878 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2879 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
2880 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2881 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
2882 splitlines__doc__},
2883 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2884 startswith__doc__},
2885 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2886 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2887 _Py_swapcase__doc__},
2888 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2889 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2890 translate__doc__},
2891 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2892 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2893 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2894 sizeof__doc__},
2895 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002896};
2897
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002898static PyObject *
2899str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2900
2901static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002902bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002903{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002904 PyObject *x = NULL;
2905 const char *encoding = NULL;
2906 const char *errors = NULL;
2907 PyObject *new = NULL;
2908 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002909
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002910 if (type != &PyBytes_Type)
2911 return str_subtype_new(type, args, kwds);
2912 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2913 &encoding, &errors))
2914 return NULL;
2915 if (x == NULL) {
2916 if (encoding != NULL || errors != NULL) {
2917 PyErr_SetString(PyExc_TypeError,
2918 "encoding or errors without sequence "
2919 "argument");
2920 return NULL;
2921 }
2922 return PyBytes_FromString("");
2923 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002924
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002925 if (PyUnicode_Check(x)) {
2926 /* Encode via the codec registry */
2927 if (encoding == NULL) {
2928 PyErr_SetString(PyExc_TypeError,
2929 "string argument without an encoding");
2930 return NULL;
2931 }
2932 new = PyUnicode_AsEncodedString(x, encoding, errors);
2933 if (new == NULL)
2934 return NULL;
2935 assert(PyBytes_Check(new));
2936 return new;
2937 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002938
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002939 /* If it's not unicode, there can't be encoding or errors */
2940 if (encoding != NULL || errors != NULL) {
2941 PyErr_SetString(PyExc_TypeError,
2942 "encoding or errors without a string argument");
2943 return NULL;
2944 }
2945 return PyObject_Bytes(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002946}
2947
2948PyObject *
2949PyBytes_FromObject(PyObject *x)
2950{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002951 PyObject *new, *it;
2952 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002953
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002954 if (x == NULL) {
2955 PyErr_BadInternalCall();
2956 return NULL;
2957 }
Benjamin Peterson4b24a422008-08-27 00:28:34 +00002958
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002959 /* Is it an int? */
2960 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2961 if (size == -1 && PyErr_Occurred()) {
2962 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2963 return NULL;
2964 PyErr_Clear();
2965 }
2966 else if (size < 0) {
2967 PyErr_SetString(PyExc_ValueError, "negative count");
2968 return NULL;
2969 }
2970 else {
2971 new = PyBytes_FromStringAndSize(NULL, size);
2972 if (new == NULL) {
2973 return NULL;
2974 }
2975 if (size > 0) {
2976 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2977 }
2978 return new;
2979 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002980
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002981 /* Use the modern buffer interface */
2982 if (PyObject_CheckBuffer(x)) {
2983 Py_buffer view;
2984 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2985 return NULL;
2986 new = PyBytes_FromStringAndSize(NULL, view.len);
2987 if (!new)
2988 goto fail;
2989 /* XXX(brett.cannon): Better way to get to internal buffer? */
2990 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2991 &view, view.len, 'C') < 0)
2992 goto fail;
2993 PyBuffer_Release(&view);
2994 return new;
2995 fail:
2996 Py_XDECREF(new);
2997 PyBuffer_Release(&view);
2998 return NULL;
2999 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003000
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003001 /* For iterator version, create a string object and resize as needed */
3002 /* XXX(gb): is 64 a good value? also, optimize if length is known */
3003 /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
3004 input being a truly long iterator. */
3005 size = 64;
3006 new = PyBytes_FromStringAndSize(NULL, size);
3007 if (new == NULL)
3008 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003009
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003010 /* XXX Optimize this if the arguments is a list, tuple */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003011
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003012 /* Get the iterator */
3013 it = PyObject_GetIter(x);
3014 if (it == NULL)
3015 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003016
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003017 /* Run the iterator to exhaustion */
3018 for (i = 0; ; i++) {
3019 PyObject *item;
3020 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003021
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003022 /* Get the next item */
3023 item = PyIter_Next(it);
3024 if (item == NULL) {
3025 if (PyErr_Occurred())
3026 goto error;
3027 break;
3028 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003029
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003030 /* Interpret it as an int (__index__) */
3031 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3032 Py_DECREF(item);
3033 if (value == -1 && PyErr_Occurred())
3034 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003035
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003036 /* Range check */
3037 if (value < 0 || value >= 256) {
3038 PyErr_SetString(PyExc_ValueError,
3039 "bytes must be in range(0, 256)");
3040 goto error;
3041 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003042
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003043 /* Append the byte */
3044 if (i >= size) {
3045 size *= 2;
3046 if (_PyBytes_Resize(&new, size) < 0)
3047 goto error;
3048 }
Antoine Pitroubc760d92010-08-15 17:46:50 +00003049 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003050 }
3051 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003052
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003053 /* Clean up and return success */
3054 Py_DECREF(it);
3055 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003056
3057 error:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003058 /* Error handling when new != NULL */
3059 Py_XDECREF(it);
3060 Py_DECREF(new);
3061 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003062}
3063
3064static PyObject *
3065str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3066{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003067 PyObject *tmp, *pnew;
3068 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003069
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003070 assert(PyType_IsSubtype(type, &PyBytes_Type));
3071 tmp = bytes_new(&PyBytes_Type, args, kwds);
3072 if (tmp == NULL)
3073 return NULL;
3074 assert(PyBytes_CheckExact(tmp));
3075 n = PyBytes_GET_SIZE(tmp);
3076 pnew = type->tp_alloc(type, n);
3077 if (pnew != NULL) {
3078 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3079 PyBytes_AS_STRING(tmp), n+1);
3080 ((PyBytesObject *)pnew)->ob_shash =
3081 ((PyBytesObject *)tmp)->ob_shash;
3082 }
3083 Py_DECREF(tmp);
3084 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003085}
3086
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003087PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003088"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003089bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003090bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
3091bytes(memory_view) -> bytes\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003092\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003093Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003094 - an iterable yielding integers in range(256)\n\
3095 - a text string encoded using the specified encoding\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003096 - a bytes or a buffer object\n\
3097 - any object implementing the buffer API.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003098
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003099static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003100
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003101PyTypeObject PyBytes_Type = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003102 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3103 "bytes",
3104 PyBytesObject_SIZE,
3105 sizeof(char),
3106 bytes_dealloc, /* tp_dealloc */
3107 0, /* tp_print */
3108 0, /* tp_getattr */
3109 0, /* tp_setattr */
3110 0, /* tp_reserved */
3111 (reprfunc)bytes_repr, /* tp_repr */
3112 0, /* tp_as_number */
3113 &bytes_as_sequence, /* tp_as_sequence */
3114 &bytes_as_mapping, /* tp_as_mapping */
3115 (hashfunc)bytes_hash, /* tp_hash */
3116 0, /* tp_call */
3117 bytes_str, /* tp_str */
3118 PyObject_GenericGetAttr, /* tp_getattro */
3119 0, /* tp_setattro */
3120 &bytes_as_buffer, /* tp_as_buffer */
3121 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3122 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3123 bytes_doc, /* tp_doc */
3124 0, /* tp_traverse */
3125 0, /* tp_clear */
3126 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3127 0, /* tp_weaklistoffset */
3128 bytes_iter, /* tp_iter */
3129 0, /* tp_iternext */
3130 bytes_methods, /* tp_methods */
3131 0, /* tp_members */
3132 0, /* tp_getset */
3133 &PyBaseObject_Type, /* tp_base */
3134 0, /* tp_dict */
3135 0, /* tp_descr_get */
3136 0, /* tp_descr_set */
3137 0, /* tp_dictoffset */
3138 0, /* tp_init */
3139 0, /* tp_alloc */
3140 bytes_new, /* tp_new */
3141 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003142};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003143
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003144void
3145PyBytes_Concat(register PyObject **pv, register PyObject *w)
3146{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003147 register PyObject *v;
3148 assert(pv != NULL);
3149 if (*pv == NULL)
3150 return;
3151 if (w == NULL) {
3152 Py_DECREF(*pv);
3153 *pv = NULL;
3154 return;
3155 }
3156 v = bytes_concat(*pv, w);
3157 Py_DECREF(*pv);
3158 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003159}
3160
3161void
3162PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
3163{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003164 PyBytes_Concat(pv, w);
3165 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003166}
3167
3168
3169/* The following function breaks the notion that strings are immutable:
3170 it changes the size of a string. We get away with this only if there
3171 is only one module referencing the object. You can also think of it
3172 as creating a new string object and destroying the old one, only
3173 more efficiently. In any case, don't use this if the string may
3174 already be known to some other part of the code...
3175 Note that if there's not enough memory to resize the string, the original
3176 string object at *pv is deallocated, *pv is set to NULL, an "out of
3177 memory" exception is set, and -1 is returned. Else (on success) 0 is
3178 returned, and the value in *pv may or may not be the same as on input.
3179 As always, an extra byte is allocated for a trailing \0 byte (newsize
3180 does *not* include that), and a trailing \0 byte is stored.
3181*/
3182
3183int
3184_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3185{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003186 register PyObject *v;
3187 register PyBytesObject *sv;
3188 v = *pv;
3189 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3190 *pv = 0;
3191 Py_DECREF(v);
3192 PyErr_BadInternalCall();
3193 return -1;
3194 }
3195 /* XXX UNREF/NEWREF interface should be more symmetrical */
3196 _Py_DEC_REFTOTAL;
3197 _Py_ForgetReference(v);
3198 *pv = (PyObject *)
3199 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
3200 if (*pv == NULL) {
3201 PyObject_Del(v);
3202 PyErr_NoMemory();
3203 return -1;
3204 }
3205 _Py_NewReference(*pv);
3206 sv = (PyBytesObject *) *pv;
3207 Py_SIZE(sv) = newsize;
3208 sv->ob_sval[newsize] = '\0';
3209 sv->ob_shash = -1; /* invalidate cached hash value */
3210 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003211}
3212
3213/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
3214 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3215 * Python's regular ints.
3216 * Return value: a new PyString*, or NULL if error.
3217 * . *pbuf is set to point into it,
3218 * *plen set to the # of chars following that.
3219 * Caller must decref it when done using pbuf.
3220 * The string starting at *pbuf is of the form
3221 * "-"? ("0x" | "0X")? digit+
3222 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3223 * set in flags. The case of hex digits will be correct,
3224 * There will be at least prec digits, zero-filled on the left if
3225 * necessary to get that many.
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003226 * val object to be converted
3227 * flags bitmask of format flags; only F_ALT is looked at
3228 * prec minimum number of digits; 0-fill on left if needed
3229 * type a character in [duoxX]; u acts the same as d
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003230 *
3231 * CAUTION: o, x and X conversions on regular ints can never
3232 * produce a '-' sign, but can for Python's unbounded ints.
3233 */
3234PyObject*
3235_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003236 char **pbuf, int *plen)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003237{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003238 PyObject *result = NULL;
3239 char *buf;
3240 Py_ssize_t i;
3241 int sign; /* 1 if '-', else 0 */
3242 int len; /* number of characters */
3243 Py_ssize_t llen;
3244 int numdigits; /* len == numnondigits + numdigits */
3245 int numnondigits = 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003246
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003247 /* Avoid exceeding SSIZE_T_MAX */
3248 if (prec > INT_MAX-3) {
3249 PyErr_SetString(PyExc_OverflowError,
3250 "precision too large");
3251 return NULL;
3252 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003253
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003254 switch (type) {
3255 case 'd':
3256 case 'u':
3257 /* Special-case boolean: we want 0/1 */
3258 if (PyBool_Check(val))
3259 result = PyNumber_ToBase(val, 10);
3260 else
3261 result = Py_TYPE(val)->tp_str(val);
3262 break;
3263 case 'o':
3264 numnondigits = 2;
3265 result = PyNumber_ToBase(val, 8);
3266 break;
3267 case 'x':
3268 case 'X':
3269 numnondigits = 2;
3270 result = PyNumber_ToBase(val, 16);
3271 break;
3272 default:
3273 assert(!"'type' not in [duoxX]");
3274 }
3275 if (!result)
3276 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003277
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003278 buf = _PyUnicode_AsString(result);
3279 if (!buf) {
3280 Py_DECREF(result);
3281 return NULL;
3282 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003283
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003284 /* To modify the string in-place, there can only be one reference. */
3285 if (Py_REFCNT(result) != 1) {
3286 PyErr_BadInternalCall();
3287 return NULL;
3288 }
3289 llen = PyUnicode_GetSize(result);
3290 if (llen > INT_MAX) {
3291 PyErr_SetString(PyExc_ValueError,
3292 "string too large in _PyBytes_FormatLong");
3293 return NULL;
3294 }
3295 len = (int)llen;
3296 if (buf[len-1] == 'L') {
3297 --len;
3298 buf[len] = '\0';
3299 }
3300 sign = buf[0] == '-';
3301 numnondigits += sign;
3302 numdigits = len - numnondigits;
3303 assert(numdigits > 0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003304
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003305 /* Get rid of base marker unless F_ALT */
3306 if (((flags & F_ALT) == 0 &&
3307 (type == 'o' || type == 'x' || type == 'X'))) {
3308 assert(buf[sign] == '0');
3309 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
3310 buf[sign+1] == 'o');
3311 numnondigits -= 2;
3312 buf += 2;
3313 len -= 2;
3314 if (sign)
3315 buf[0] = '-';
3316 assert(len == numnondigits + numdigits);
3317 assert(numdigits > 0);
3318 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003319
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003320 /* Fill with leading zeroes to meet minimum width. */
3321 if (prec > numdigits) {
3322 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
3323 numnondigits + prec);
3324 char *b1;
3325 if (!r1) {
3326 Py_DECREF(result);
3327 return NULL;
3328 }
3329 b1 = PyBytes_AS_STRING(r1);
3330 for (i = 0; i < numnondigits; ++i)
3331 *b1++ = *buf++;
3332 for (i = 0; i < prec - numdigits; i++)
3333 *b1++ = '0';
3334 for (i = 0; i < numdigits; i++)
3335 *b1++ = *buf++;
3336 *b1 = '\0';
3337 Py_DECREF(result);
3338 result = r1;
3339 buf = PyBytes_AS_STRING(result);
3340 len = numnondigits + prec;
3341 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003342
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003343 /* Fix up case for hex conversions. */
3344 if (type == 'X') {
3345 /* Need to convert all lower case letters to upper case.
3346 and need to convert 0x to 0X (and -0x to -0X). */
3347 for (i = 0; i < len; i++)
3348 if (buf[i] >= 'a' && buf[i] <= 'x')
3349 buf[i] -= 'a'-'A';
3350 }
3351 *pbuf = buf;
3352 *plen = len;
3353 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003354}
3355
3356void
3357PyBytes_Fini(void)
3358{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003359 int i;
3360 for (i = 0; i < UCHAR_MAX + 1; i++) {
3361 Py_XDECREF(characters[i]);
3362 characters[i] = NULL;
3363 }
3364 Py_XDECREF(nullstring);
3365 nullstring = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003366}
3367
Benjamin Peterson4116f362008-05-27 00:36:20 +00003368/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003369
3370typedef struct {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003371 PyObject_HEAD
3372 Py_ssize_t it_index;
3373 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003374} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003375
3376static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003377striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003378{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003379 _PyObject_GC_UNTRACK(it);
3380 Py_XDECREF(it->it_seq);
3381 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003382}
3383
3384static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003385striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003386{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003387 Py_VISIT(it->it_seq);
3388 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003389}
3390
3391static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003392striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003393{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003394 PyBytesObject *seq;
3395 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003396
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003397 assert(it != NULL);
3398 seq = it->it_seq;
3399 if (seq == NULL)
3400 return NULL;
3401 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003402
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003403 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3404 item = PyLong_FromLong(
3405 (unsigned char)seq->ob_sval[it->it_index]);
3406 if (item != NULL)
3407 ++it->it_index;
3408 return item;
3409 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003410
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003411 Py_DECREF(seq);
3412 it->it_seq = NULL;
3413 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003414}
3415
3416static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003417striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003418{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003419 Py_ssize_t len = 0;
3420 if (it->it_seq)
3421 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3422 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003423}
3424
3425PyDoc_STRVAR(length_hint_doc,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003426 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003427
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003428static PyMethodDef striter_methods[] = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003429 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3430 length_hint_doc},
3431 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003432};
3433
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003434PyTypeObject PyBytesIter_Type = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003435 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3436 "bytes_iterator", /* tp_name */
3437 sizeof(striterobject), /* tp_basicsize */
3438 0, /* tp_itemsize */
3439 /* methods */
3440 (destructor)striter_dealloc, /* tp_dealloc */
3441 0, /* tp_print */
3442 0, /* tp_getattr */
3443 0, /* tp_setattr */
3444 0, /* tp_reserved */
3445 0, /* tp_repr */
3446 0, /* tp_as_number */
3447 0, /* tp_as_sequence */
3448 0, /* tp_as_mapping */
3449 0, /* tp_hash */
3450 0, /* tp_call */
3451 0, /* tp_str */
3452 PyObject_GenericGetAttr, /* tp_getattro */
3453 0, /* tp_setattro */
3454 0, /* tp_as_buffer */
3455 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3456 0, /* tp_doc */
3457 (traverseproc)striter_traverse, /* tp_traverse */
3458 0, /* tp_clear */
3459 0, /* tp_richcompare */
3460 0, /* tp_weaklistoffset */
3461 PyObject_SelfIter, /* tp_iter */
3462 (iternextfunc)striter_next, /* tp_iternext */
3463 striter_methods, /* tp_methods */
3464 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003465};
3466
3467static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003468bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003469{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003470 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003471
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003472 if (!PyBytes_Check(seq)) {
3473 PyErr_BadInternalCall();
3474 return NULL;
3475 }
3476 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3477 if (it == NULL)
3478 return NULL;
3479 it->it_index = 0;
3480 Py_INCREF(seq);
3481 it->it_seq = (PyBytesObject *)seq;
3482 _PyObject_GC_TRACK(it);
3483 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003484}