blob: f2ee1310fa4fc151f28d8dbaa2a9a89a5bcf5d53 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroua57aae72010-06-09 16:58:35 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
44 For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
45 parameter `size' denotes number of characters to allocate, not counting any
46 null terminating character.
47
48 For PyBytes_FromString(), the parameter `str' points to a null-terminated
49 string containing exactly `size' bytes.
50
51 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
52 either NULL or else points to a string containing at least `size' bytes.
53 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
54 not have to be null-terminated. (Therefore it is safe to construct a
55 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
56 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
57 bytes (setting the last byte to the null terminating character) and you can
58 fill in the data yourself. If `str' is non-NULL then the resulting
59 PyString object must be treated as immutable and you must not fill in nor
60 alter the data yourself, since the strings may be shared.
61
62 The PyObject member `op->ob_size', which denotes the number of "extra
63 items" in a variable-size object, will contain the number of bytes
64 allocated for string data, not counting the null terminating character. It
65 is therefore equal to the equal to the `size' parameter (for
66 PyBytes_FromStringAndSize()) or the length of the string in the `str'
67 parameter (for PyBytes_FromString()).
68*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000069PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000071{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000072 register PyBytesObject *op;
73 if (size < 0) {
74 PyErr_SetString(PyExc_SystemError,
75 "Negative size passed to PyBytes_FromStringAndSize");
76 return NULL;
77 }
78 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000079#ifdef COUNT_ALLOCS
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000080 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000081#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000082 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85 if (size == 1 && str != NULL &&
86 (op = characters[*str & UCHAR_MAX]) != NULL)
87 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000088#ifdef COUNT_ALLOCS
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000089 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000091 Py_INCREF(op);
92 return (PyObject *)op;
93 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000094
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000095 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
96 PyErr_SetString(PyExc_OverflowError,
97 "byte string is too large");
98 return NULL;
99 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +0000100
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000101 /* Inline PyObject_NewVar */
102 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
103 if (op == NULL)
104 return PyErr_NoMemory();
105 PyObject_INIT_VAR(op, &PyBytes_Type, size);
106 op->ob_shash = -1;
107 if (str != NULL)
108 Py_MEMCPY(op->ob_sval, str, size);
109 op->ob_sval[size] = '\0';
110 /* share short strings */
111 if (size == 0) {
112 nullstring = op;
113 Py_INCREF(op);
114 } else if (size == 1 && str != NULL) {
115 characters[*str & UCHAR_MAX] = op;
116 Py_INCREF(op);
117 }
118 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000119}
120
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000121PyObject *
122PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000123{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000124 register size_t size;
125 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000126
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000127 assert(str != NULL);
128 size = strlen(str);
129 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
130 PyErr_SetString(PyExc_OverflowError,
131 "byte string is too long");
132 return NULL;
133 }
134 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000135#ifdef COUNT_ALLOCS
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000136 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000137#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
141 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#ifdef COUNT_ALLOCS
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000143 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000144#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000148
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000149 /* Inline PyObject_NewVar */
150 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
151 if (op == NULL)
152 return PyErr_NoMemory();
153 PyObject_INIT_VAR(op, &PyBytes_Type, size);
154 op->ob_shash = -1;
155 Py_MEMCPY(op->ob_sval, str, size+1);
156 /* share short strings */
157 if (size == 0) {
158 nullstring = op;
159 Py_INCREF(op);
160 } else if (size == 1) {
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000165}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000166
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167PyObject *
168PyBytes_FromFormatV(const char *format, va_list vargs)
169{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000175
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000176#ifdef VA_LIST_IS_ARRAY
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000177 Py_MEMCPY(count, vargs, sizeof(va_list));
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000178#else
179#ifdef __va_copy
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000180 __va_copy(count, vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000181#else
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000182 count = vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000183#endif
184#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
187 if (*f == '%') {
188 const char* p = f;
189 while (*++f && *f != '%' && !ISALPHA(*f))
190 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000191
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000192 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
193 * they don't affect the amount of space we reserve.
194 */
195 if ((*f == 'l' || *f == 'z') &&
196 (f[1] == 'd' || f[1] == 'u'))
197 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000198
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000199 switch (*f) {
200 case 'c':
201 (void)va_arg(count, int);
202 /* fall through... */
203 case '%':
204 n++;
205 break;
206 case 'd': case 'u': case 'i': case 'x':
207 (void) va_arg(count, int);
208 /* 20 bytes is enough to hold a 64-bit
209 integer. Decimal takes the most space.
210 This isn't enough for octal. */
211 n += 20;
212 break;
213 case 's':
214 s = va_arg(count, char*);
215 n += strlen(s);
216 break;
217 case 'p':
218 (void) va_arg(count, int);
219 /* maximum 64-bit pointer representation:
220 * 0xffffffffffffffff
221 * so 19 characters is enough.
222 * XXX I count 18 -- what's the extra for?
223 */
224 n += 19;
225 break;
226 default:
227 /* if we stumble upon an unknown
228 formatting code, copy the rest of
229 the format string to the output
230 string. (we cannot just skip the
231 code, since there's no way to know
232 what's in the argument list) */
233 n += strlen(p);
234 goto expand;
235 }
236 } else
237 n++;
238 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000239 expand:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000240 /* step 2: fill the buffer */
241 /* Since we've analyzed how much space we need for the worst case,
242 use sprintf directly instead of the slower PyOS_snprintf. */
243 string = PyBytes_FromStringAndSize(NULL, n);
244 if (!string)
245 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000246
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000247 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000248
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000249 for (f = format; *f; f++) {
250 if (*f == '%') {
251 const char* p = f++;
252 Py_ssize_t i;
253 int longflag = 0;
254 int size_tflag = 0;
255 /* parse the width.precision part (we're only
256 interested in the precision value, if any) */
257 n = 0;
258 while (ISDIGIT(*f))
259 n = (n*10) + *f++ - '0';
260 if (*f == '.') {
261 f++;
262 n = 0;
263 while (ISDIGIT(*f))
264 n = (n*10) + *f++ - '0';
265 }
266 while (*f && *f != '%' && !ISALPHA(*f))
267 f++;
268 /* handle the long flag, but only for %ld and %lu.
269 others can be added when necessary. */
270 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
271 longflag = 1;
272 ++f;
273 }
274 /* handle the size_t flag. */
275 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
276 size_tflag = 1;
277 ++f;
278 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000279
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000280 switch (*f) {
281 case 'c':
282 *s++ = va_arg(vargs, int);
283 break;
284 case 'd':
285 if (longflag)
286 sprintf(s, "%ld", va_arg(vargs, long));
287 else if (size_tflag)
288 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
289 va_arg(vargs, Py_ssize_t));
290 else
291 sprintf(s, "%d", va_arg(vargs, int));
292 s += strlen(s);
293 break;
294 case 'u':
295 if (longflag)
296 sprintf(s, "%lu",
297 va_arg(vargs, unsigned long));
298 else if (size_tflag)
299 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
300 va_arg(vargs, size_t));
301 else
302 sprintf(s, "%u",
303 va_arg(vargs, unsigned int));
304 s += strlen(s);
305 break;
306 case 'i':
307 sprintf(s, "%i", va_arg(vargs, int));
308 s += strlen(s);
309 break;
310 case 'x':
311 sprintf(s, "%x", va_arg(vargs, int));
312 s += strlen(s);
313 break;
314 case 's':
315 p = va_arg(vargs, char*);
316 i = strlen(p);
317 if (n > 0 && i > n)
318 i = n;
319 Py_MEMCPY(s, p, i);
320 s += i;
321 break;
322 case 'p':
323 sprintf(s, "%p", va_arg(vargs, void*));
324 /* %p is ill-defined: ensure leading 0x. */
325 if (s[1] == 'X')
326 s[1] = 'x';
327 else if (s[1] != 'x') {
328 memmove(s+2, s, strlen(s)+1);
329 s[0] = '0';
330 s[1] = 'x';
331 }
332 s += strlen(s);
333 break;
334 case '%':
335 *s++ = '%';
336 break;
337 default:
338 strcpy(s, p);
339 s += strlen(s);
340 goto end;
341 }
342 } else
343 *s++ = *f;
344 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000345
346 end:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000347 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
348 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000349}
350
351PyObject *
352PyBytes_FromFormat(const char *format, ...)
353{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000354 PyObject* ret;
355 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000356
357#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000358 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000359#else
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000360 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000362 ret = PyBytes_FromFormatV(format, vargs);
363 va_end(vargs);
364 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000365}
366
367static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000368bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000369{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000370 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000371}
372
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000373/* Unescape a backslash-escaped string. If unicode is non-zero,
374 the string is a u-literal. If recode_encoding is non-zero,
375 the string is UTF-8 encoded and should be re-encoded in the
376 specified encoding. */
377
378PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000379 Py_ssize_t len,
380 const char *errors,
381 Py_ssize_t unicode,
382 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000383{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000384 int c;
385 char *p, *buf;
386 const char *end;
387 PyObject *v;
388 Py_ssize_t newlen = recode_encoding ? 4*len:len;
389 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
390 if (v == NULL)
391 return NULL;
392 p = buf = PyBytes_AsString(v);
393 end = s + len;
394 while (s < end) {
395 if (*s != '\\') {
396 non_esc:
397 if (recode_encoding && (*s & 0x80)) {
398 PyObject *u, *w;
399 char *r;
400 const char* t;
401 Py_ssize_t rn;
402 t = s;
403 /* Decode non-ASCII bytes as UTF-8. */
404 while (t < end && (*t & 0x80)) t++;
405 u = PyUnicode_DecodeUTF8(s, t - s, errors);
406 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000407
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000408 /* Recode them in target encoding. */
409 w = PyUnicode_AsEncodedString(
410 u, recode_encoding, errors);
411 Py_DECREF(u);
412 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000413
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000414 /* Append bytes to output buffer. */
415 assert(PyBytes_Check(w));
416 r = PyBytes_AS_STRING(w);
417 rn = PyBytes_GET_SIZE(w);
418 Py_MEMCPY(p, r, rn);
419 p += rn;
420 Py_DECREF(w);
421 s = t;
422 } else {
423 *p++ = *s++;
424 }
425 continue;
426 }
427 s++;
428 if (s==end) {
429 PyErr_SetString(PyExc_ValueError,
430 "Trailing \\ in string");
431 goto failed;
432 }
433 switch (*s++) {
434 /* XXX This assumes ASCII! */
435 case '\n': break;
436 case '\\': *p++ = '\\'; break;
437 case '\'': *p++ = '\''; break;
438 case '\"': *p++ = '\"'; break;
439 case 'b': *p++ = '\b'; break;
440 case 'f': *p++ = '\014'; break; /* FF */
441 case 't': *p++ = '\t'; break;
442 case 'n': *p++ = '\n'; break;
443 case 'r': *p++ = '\r'; break;
444 case 'v': *p++ = '\013'; break; /* VT */
445 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
446 case '0': case '1': case '2': case '3':
447 case '4': case '5': case '6': case '7':
448 c = s[-1] - '0';
449 if (s < end && '0' <= *s && *s <= '7') {
450 c = (c<<3) + *s++ - '0';
451 if (s < end && '0' <= *s && *s <= '7')
452 c = (c<<3) + *s++ - '0';
453 }
454 *p++ = c;
455 break;
456 case 'x':
457 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
458 unsigned int x = 0;
459 c = Py_CHARMASK(*s);
460 s++;
461 if (ISDIGIT(c))
462 x = c - '0';
463 else if (ISLOWER(c))
464 x = 10 + c - 'a';
465 else
466 x = 10 + c - 'A';
467 x = x << 4;
468 c = Py_CHARMASK(*s);
469 s++;
470 if (ISDIGIT(c))
471 x += c - '0';
472 else if (ISLOWER(c))
473 x += 10 + c - 'a';
474 else
475 x += 10 + c - 'A';
476 *p++ = x;
477 break;
478 }
479 if (!errors || strcmp(errors, "strict") == 0) {
480 PyErr_SetString(PyExc_ValueError,
481 "invalid \\x escape");
482 goto failed;
483 }
484 if (strcmp(errors, "replace") == 0) {
485 *p++ = '?';
486 } else if (strcmp(errors, "ignore") == 0)
487 /* do nothing */;
488 else {
489 PyErr_Format(PyExc_ValueError,
490 "decoding error; unknown "
491 "error handling code: %.400s",
492 errors);
493 goto failed;
494 }
495 default:
496 *p++ = '\\';
497 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200498 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000499 UTF-8 bytes may follow. */
500 }
501 }
502 if (p-buf < newlen)
503 _PyBytes_Resize(&v, p - buf);
504 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000505 failed:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000506 Py_DECREF(v);
507 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000508}
509
510/* -------------------------------------------------------------------- */
511/* object api */
512
513Py_ssize_t
514PyBytes_Size(register PyObject *op)
515{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000516 if (!PyBytes_Check(op)) {
517 PyErr_Format(PyExc_TypeError,
518 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
519 return -1;
520 }
521 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000522}
523
524char *
525PyBytes_AsString(register PyObject *op)
526{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000527 if (!PyBytes_Check(op)) {
528 PyErr_Format(PyExc_TypeError,
529 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
530 return NULL;
531 }
532 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000533}
534
535int
536PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000537 register char **s,
538 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000539{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000540 if (s == NULL) {
541 PyErr_BadInternalCall();
542 return -1;
543 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000544
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000545 if (!PyBytes_Check(obj)) {
546 PyErr_Format(PyExc_TypeError,
547 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
548 return -1;
549 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000550
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000551 *s = PyBytes_AS_STRING(obj);
552 if (len != NULL)
553 *len = PyBytes_GET_SIZE(obj);
554 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
555 PyErr_SetString(PyExc_TypeError,
556 "expected bytes with no null");
557 return -1;
558 }
559 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000560}
Neal Norwitz6968b052007-02-27 19:02:19 +0000561
562/* -------------------------------------------------------------------- */
563/* Methods */
564
Eric Smith0923d1d2009-04-16 20:16:10 +0000565#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000566#define STRINGLIB_CHAR char
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000567
Neal Norwitz6968b052007-02-27 19:02:19 +0000568#define STRINGLIB_CMP memcmp
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000569#define STRINGLIB_LEN PyBytes_GET_SIZE
570#define STRINGLIB_NEW PyBytes_FromStringAndSize
571#define STRINGLIB_STR PyBytes_AS_STRING
572/* #define STRINGLIB_WANT_CONTAINS_OBJ 1 */
573
574#define STRINGLIB_EMPTY nullstring
575#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
576#define STRINGLIB_MUTABLE 0
Neal Norwitz6968b052007-02-27 19:02:19 +0000577
578#include "stringlib/fastsearch.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000579
Neal Norwitz6968b052007-02-27 19:02:19 +0000580#include "stringlib/count.h"
581#include "stringlib/find.h"
582#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000583#include "stringlib/ctype.h"
584#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000585
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000586#define _Py_InsertThousandsGrouping _PyBytes_InsertThousandsGrouping
Eric Smitha3b1ac82009-04-03 14:45:06 +0000587#define _Py_InsertThousandsGroupingLocale _PyBytes_InsertThousandsGroupingLocale
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000588#include "stringlib/localeutil.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000589
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000590PyObject *
591PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000592{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000593 static const char *hexdigits = "0123456789abcdef";
594 register PyBytesObject* op = (PyBytesObject*) obj;
595 Py_ssize_t length = Py_SIZE(op);
596 size_t newsize = 3 + 4 * length;
597 PyObject *v;
598 if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
599 PyErr_SetString(PyExc_OverflowError,
600 "bytes object is too large to make repr");
601 return NULL;
602 }
603 v = PyUnicode_FromUnicode(NULL, newsize);
604 if (v == NULL) {
605 return NULL;
606 }
607 else {
608 register Py_ssize_t i;
609 register Py_UNICODE c;
610 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
611 int quote;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000612
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000613 /* Figure out which quote to use; single is preferred */
614 quote = '\'';
615 if (smartquotes) {
616 char *test, *start;
617 start = PyBytes_AS_STRING(op);
618 for (test = start; test < start+length; ++test) {
619 if (*test == '"') {
620 quote = '\''; /* back to single */
621 goto decided;
622 }
623 else if (*test == '\'')
624 quote = '"';
625 }
626 decided:
627 ;
628 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000629
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000630 *p++ = 'b', *p++ = quote;
631 for (i = 0; i < length; i++) {
632 /* There's at least enough room for a hex escape
633 and a closing quote. */
634 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
635 c = op->ob_sval[i];
636 if (c == quote || c == '\\')
637 *p++ = '\\', *p++ = c;
638 else if (c == '\t')
639 *p++ = '\\', *p++ = 't';
640 else if (c == '\n')
641 *p++ = '\\', *p++ = 'n';
642 else if (c == '\r')
643 *p++ = '\\', *p++ = 'r';
644 else if (c < ' ' || c >= 0x7f) {
645 *p++ = '\\';
646 *p++ = 'x';
647 *p++ = hexdigits[(c & 0xf0) >> 4];
648 *p++ = hexdigits[c & 0xf];
649 }
650 else
651 *p++ = c;
652 }
653 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
654 *p++ = quote;
655 *p = '\0';
656 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
657 Py_DECREF(v);
658 return NULL;
659 }
660 return v;
661 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000662}
663
Neal Norwitz6968b052007-02-27 19:02:19 +0000664static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000665bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000666{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000667 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000668}
669
Neal Norwitz6968b052007-02-27 19:02:19 +0000670static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000671bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000672{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000673 if (Py_BytesWarningFlag) {
674 if (PyErr_WarnEx(PyExc_BytesWarning,
675 "str() on a bytes instance", 1))
676 return NULL;
677 }
678 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000679}
680
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000681static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000682bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000683{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000684 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000685}
Neal Norwitz6968b052007-02-27 19:02:19 +0000686
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000687/* This is also used by PyBytes_Concat() */
688static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000689bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000690{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000691 Py_ssize_t size;
692 Py_buffer va, vb;
693 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000694
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000695 va.len = -1;
696 vb.len = -1;
697 if (_getbuffer(a, &va) < 0 ||
698 _getbuffer(b, &vb) < 0) {
699 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
700 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
701 goto done;
702 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000703
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000704 /* Optimize end cases */
705 if (va.len == 0 && PyBytes_CheckExact(b)) {
706 result = b;
707 Py_INCREF(result);
708 goto done;
709 }
710 if (vb.len == 0 && PyBytes_CheckExact(a)) {
711 result = a;
712 Py_INCREF(result);
713 goto done;
714 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000715
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000716 size = va.len + vb.len;
717 if (size < 0) {
718 PyErr_NoMemory();
719 goto done;
720 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000721
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000722 result = PyBytes_FromStringAndSize(NULL, size);
723 if (result != NULL) {
724 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
725 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
726 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000727
728 done:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000729 if (va.len != -1)
730 PyBuffer_Release(&va);
731 if (vb.len != -1)
732 PyBuffer_Release(&vb);
733 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000734}
Neal Norwitz6968b052007-02-27 19:02:19 +0000735
736static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000737bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000738{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000739 register Py_ssize_t i;
740 register Py_ssize_t j;
741 register Py_ssize_t size;
742 register PyBytesObject *op;
743 size_t nbytes;
744 if (n < 0)
745 n = 0;
746 /* watch out for overflows: the size can overflow int,
747 * and the # of bytes needed can overflow size_t
748 */
749 size = Py_SIZE(a) * n;
750 if (n && size / n != Py_SIZE(a)) {
751 PyErr_SetString(PyExc_OverflowError,
752 "repeated bytes are too long");
753 return NULL;
754 }
755 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
756 Py_INCREF(a);
757 return (PyObject *)a;
758 }
759 nbytes = (size_t)size;
760 if (nbytes + PyBytesObject_SIZE <= nbytes) {
761 PyErr_SetString(PyExc_OverflowError,
762 "repeated bytes are too long");
763 return NULL;
764 }
765 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
766 if (op == NULL)
767 return PyErr_NoMemory();
768 PyObject_INIT_VAR(op, &PyBytes_Type, size);
769 op->ob_shash = -1;
770 op->ob_sval[size] = '\0';
771 if (Py_SIZE(a) == 1 && n > 0) {
772 memset(op->ob_sval, a->ob_sval[0] , n);
773 return (PyObject *) op;
774 }
775 i = 0;
776 if (i < size) {
777 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
778 i = Py_SIZE(a);
779 }
780 while (i < size) {
781 j = (i <= size-i) ? i : size-i;
782 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
783 i += j;
784 }
785 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000786}
787
Guido van Rossum98297ee2007-11-06 21:34:58 +0000788static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000789bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000790{
791 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
792 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroua57aae72010-06-09 16:58:35 +0000793 Py_buffer varg;
Antoine Pitroubc760d92010-08-15 17:46:50 +0000794 Py_ssize_t pos;
Antoine Pitroua57aae72010-06-09 16:58:35 +0000795 PyErr_Clear();
796 if (_getbuffer(arg, &varg) < 0)
797 return -1;
798 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
799 varg.buf, varg.len, 0);
800 PyBuffer_Release(&varg);
801 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000802 }
803 if (ival < 0 || ival >= 256) {
Antoine Pitroua57aae72010-06-09 16:58:35 +0000804 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
805 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000806 }
807
Antoine Pitroubc760d92010-08-15 17:46:50 +0000808 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000809}
810
Neal Norwitz6968b052007-02-27 19:02:19 +0000811static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000812bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000813{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000814 if (i < 0 || i >= Py_SIZE(a)) {
815 PyErr_SetString(PyExc_IndexError, "index out of range");
816 return NULL;
817 }
818 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000819}
820
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000821static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000822bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000823{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000824 int c;
825 Py_ssize_t len_a, len_b;
826 Py_ssize_t min_len;
827 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000828
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000829 /* Make sure both arguments are strings. */
830 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
831 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
832 (PyObject_IsInstance((PyObject*)a,
833 (PyObject*)&PyUnicode_Type) ||
834 PyObject_IsInstance((PyObject*)b,
835 (PyObject*)&PyUnicode_Type))) {
836 if (PyErr_WarnEx(PyExc_BytesWarning,
837 "Comparison between bytes and string", 1))
838 return NULL;
839 }
840 result = Py_NotImplemented;
841 goto out;
842 }
843 if (a == b) {
844 switch (op) {
845 case Py_EQ:case Py_LE:case Py_GE:
846 result = Py_True;
847 goto out;
848 case Py_NE:case Py_LT:case Py_GT:
849 result = Py_False;
850 goto out;
851 }
852 }
853 if (op == Py_EQ) {
854 /* Supporting Py_NE here as well does not save
855 much time, since Py_NE is rarely used. */
856 if (Py_SIZE(a) == Py_SIZE(b)
857 && (a->ob_sval[0] == b->ob_sval[0]
858 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
859 result = Py_True;
860 } else {
861 result = Py_False;
862 }
863 goto out;
864 }
865 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
866 min_len = (len_a < len_b) ? len_a : len_b;
867 if (min_len > 0) {
868 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
869 if (c==0)
870 c = memcmp(a->ob_sval, b->ob_sval, min_len);
871 } else
872 c = 0;
873 if (c == 0)
874 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
875 switch (op) {
876 case Py_LT: c = c < 0; break;
877 case Py_LE: c = c <= 0; break;
878 case Py_EQ: assert(0); break; /* unreachable */
879 case Py_NE: c = c != 0; break;
880 case Py_GT: c = c > 0; break;
881 case Py_GE: c = c >= 0; break;
882 default:
883 result = Py_NotImplemented;
884 goto out;
885 }
886 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000887 out:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000888 Py_INCREF(result);
889 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000890}
891
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000892static long
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000893bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000894{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000895 register Py_ssize_t len;
896 register unsigned char *p;
897 register long x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000898
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000899 if (a->ob_shash != -1)
900 return a->ob_shash;
901 len = Py_SIZE(a);
902 p = (unsigned char *) a->ob_sval;
903 x = *p << 7;
904 while (--len >= 0)
905 x = (1000003*x) ^ *p++;
906 x ^= Py_SIZE(a);
907 if (x == -1)
908 x = -2;
909 a->ob_shash = x;
910 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000911}
912
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000913static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000914bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000915{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000916 if (PyIndex_Check(item)) {
917 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
918 if (i == -1 && PyErr_Occurred())
919 return NULL;
920 if (i < 0)
921 i += PyBytes_GET_SIZE(self);
922 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
923 PyErr_SetString(PyExc_IndexError,
924 "index out of range");
925 return NULL;
926 }
927 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
928 }
929 else if (PySlice_Check(item)) {
930 Py_ssize_t start, stop, step, slicelength, cur, i;
931 char* source_buf;
932 char* result_buf;
933 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000934
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000935 if (PySlice_GetIndicesEx((PySliceObject*)item,
936 PyBytes_GET_SIZE(self),
937 &start, &stop, &step, &slicelength) < 0) {
938 return NULL;
939 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000940
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000941 if (slicelength <= 0) {
942 return PyBytes_FromStringAndSize("", 0);
943 }
944 else if (start == 0 && step == 1 &&
945 slicelength == PyBytes_GET_SIZE(self) &&
946 PyBytes_CheckExact(self)) {
947 Py_INCREF(self);
948 return (PyObject *)self;
949 }
950 else if (step == 1) {
951 return PyBytes_FromStringAndSize(
952 PyBytes_AS_STRING(self) + start,
953 slicelength);
954 }
955 else {
956 source_buf = PyBytes_AS_STRING(self);
957 result = PyBytes_FromStringAndSize(NULL, slicelength);
958 if (result == NULL)
959 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000960
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000961 result_buf = PyBytes_AS_STRING(result);
962 for (cur = start, i = 0; i < slicelength;
963 cur += step, i++) {
964 result_buf[i] = source_buf[cur];
965 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000966
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000967 return result;
968 }
969 }
970 else {
971 PyErr_Format(PyExc_TypeError,
972 "byte indices must be integers, not %.200s",
973 Py_TYPE(item)->tp_name);
974 return NULL;
975 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000976}
977
978static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000979bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000980{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000981 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
982 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000983}
984
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000985static PySequenceMethods bytes_as_sequence = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000986 (lenfunc)bytes_length, /*sq_length*/
987 (binaryfunc)bytes_concat, /*sq_concat*/
988 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
989 (ssizeargfunc)bytes_item, /*sq_item*/
990 0, /*sq_slice*/
991 0, /*sq_ass_item*/
992 0, /*sq_ass_slice*/
993 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000994};
995
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000996static PyMappingMethods bytes_as_mapping = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000997 (lenfunc)bytes_length,
998 (binaryfunc)bytes_subscript,
999 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001000};
1001
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001002static PyBufferProcs bytes_as_buffer = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001003 (getbufferproc)bytes_buffer_getbuffer,
1004 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001005};
1006
1007
1008#define LEFTSTRIP 0
1009#define RIGHTSTRIP 1
1010#define BOTHSTRIP 2
1011
1012/* Arrays indexed by above */
1013static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1014
1015#define STRIPNAME(i) (stripformat[i]+3)
1016
Neal Norwitz6968b052007-02-27 19:02:19 +00001017
1018/* Don't call if length < 2 */
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001019#define Py_STRING_MATCH(target, offset, pattern, length) \
1020 (target[offset] == pattern[0] && \
1021 target[offset+length-1] == pattern[length-1] && \
Neal Norwitz6968b052007-02-27 19:02:19 +00001022 !memcmp(target+offset+1, pattern+1, length-2) )
1023
1024
Neal Norwitz6968b052007-02-27 19:02:19 +00001025/* Overallocate the initial list to reduce the number of reallocs for small
1026 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1027 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1028 text (roughly 11 words per line) and field delimited data (usually 1-10
1029 fields). For large strings the split algorithms are bandwidth limited
1030 so increasing the preallocation likely will not improve things.*/
1031
1032#define MAX_PREALLOC 12
1033
1034/* 5 splits gives 6 elements */
1035#define PREALLOC_SIZE(maxsplit) \
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001036 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Neal Norwitz6968b052007-02-27 19:02:19 +00001037
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001038#define SPLIT_ADD(data, left, right) { \
1039 str = PyBytes_FromStringAndSize((data) + (left), \
1040 (right) - (left)); \
1041 if (str == NULL) \
1042 goto onError; \
1043 if (count < MAX_PREALLOC) { \
1044 PyList_SET_ITEM(list, count, str); \
1045 } else { \
1046 if (PyList_Append(list, str)) { \
1047 Py_DECREF(str); \
1048 goto onError; \
1049 } \
1050 else \
1051 Py_DECREF(str); \
1052 } \
1053 count++; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001054
1055/* Always force the list to the expected size. */
Christian Heimes90aa7642007-12-19 02:45:37 +00001056#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Neal Norwitz6968b052007-02-27 19:02:19 +00001057
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001058#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1059#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1060#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1061#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001062
1063Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001064split_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001065{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001066 const char *s = PyBytes_AS_STRING(self);
1067 Py_ssize_t i, j, count=0;
1068 PyObject *str;
1069 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001070
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001071 if (list == NULL)
1072 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001073
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001074 i = j = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00001075
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001076 while (maxsplit-- > 0) {
1077 SKIP_SPACE(s, i, len);
1078 if (i==len) break;
1079 j = i; i++;
1080 SKIP_NONSPACE(s, i, len);
1081 if (j == 0 && i == len && PyBytes_CheckExact(self)) {
1082 /* No whitespace in self, so just use it as list[0] */
1083 Py_INCREF(self);
1084 PyList_SET_ITEM(list, 0, (PyObject *)self);
1085 count++;
1086 break;
1087 }
1088 SPLIT_ADD(s, j, i);
1089 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001090
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001091 if (i < len) {
1092 /* Only occurs when maxsplit was reached */
1093 /* Skip any remaining whitespace and copy to end of string */
1094 SKIP_SPACE(s, i, len);
1095 if (i != len)
1096 SPLIT_ADD(s, i, len);
1097 }
1098 FIX_PREALLOC_SIZE(list);
1099 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001100 onError:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001101 Py_DECREF(list);
1102 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001103}
1104
Guido van Rossum8f950672007-09-10 16:53:45 +00001105Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001106split_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001107{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001108 const char *s = PyBytes_AS_STRING(self);
1109 register Py_ssize_t i, j, count=0;
1110 PyObject *str;
1111 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001112
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001113 if (list == NULL)
1114 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001115
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001116 i = j = 0;
1117 while ((j < len) && (maxcount-- > 0)) {
1118 for(; j<len; j++) {
1119 /* I found that using memchr makes no difference */
1120 if (s[j] == ch) {
1121 SPLIT_ADD(s, i, j);
1122 i = j = j + 1;
1123 break;
1124 }
1125 }
1126 }
1127 if (i == 0 && count == 0 && PyBytes_CheckExact(self)) {
1128 /* ch not in self, so just use self as list[0] */
1129 Py_INCREF(self);
1130 PyList_SET_ITEM(list, 0, (PyObject *)self);
1131 count++;
1132 }
1133 else if (i <= len) {
1134 SPLIT_ADD(s, i, len);
1135 }
1136 FIX_PREALLOC_SIZE(list);
1137 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001138
1139 onError:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001140 Py_DECREF(list);
1141 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001142}
1143
Neal Norwitz6968b052007-02-27 19:02:19 +00001144PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001145"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001146\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001147Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001148If sep is not specified or is None, B is split on ASCII whitespace\n\
1149characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001150If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001151
1152static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001153bytes_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001154{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001155 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1156 Py_ssize_t maxsplit = -1, count=0;
1157 const char *s = PyBytes_AS_STRING(self), *sub;
1158 Py_buffer vsub;
1159 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001160#ifdef USE_FAST
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001161 Py_ssize_t pos;
Neal Norwitz6968b052007-02-27 19:02:19 +00001162#endif
1163
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001164 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1165 return NULL;
1166 if (maxsplit < 0)
1167 maxsplit = PY_SSIZE_T_MAX;
1168 if (subobj == Py_None)
1169 return split_whitespace(self, len, maxsplit);
1170 if (_getbuffer(subobj, &vsub) < 0)
1171 return NULL;
1172 sub = vsub.buf;
1173 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001174
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001175 if (n == 0) {
1176 PyErr_SetString(PyExc_ValueError, "empty separator");
1177 PyBuffer_Release(&vsub);
1178 return NULL;
1179 }
1180 else if (n == 1) {
1181 list = split_char(self, len, sub[0], maxsplit);
1182 PyBuffer_Release(&vsub);
1183 return list;
1184 }
Guido van Rossum8f950672007-09-10 16:53:45 +00001185
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001186 list = PyList_New(PREALLOC_SIZE(maxsplit));
1187 if (list == NULL) {
1188 PyBuffer_Release(&vsub);
1189 return NULL;
1190 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001191
1192#ifdef USE_FAST
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001193 i = j = 0;
1194 while (maxsplit-- > 0) {
1195 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1196 if (pos < 0)
1197 break;
1198 j = i+pos;
1199 SPLIT_ADD(s, i, j);
1200 i = j + n;
1201 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001202#else
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001203 i = j = 0;
1204 while ((j+n <= len) && (maxsplit-- > 0)) {
1205 for (; j+n <= len; j++) {
1206 if (Py_STRING_MATCH(s, j, sub, n)) {
1207 SPLIT_ADD(s, i, j);
1208 i = j = j + n;
1209 break;
1210 }
1211 }
1212 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001213#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001214 SPLIT_ADD(s, i, len);
1215 FIX_PREALLOC_SIZE(list);
1216 PyBuffer_Release(&vsub);
1217 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001218
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001219 onError:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001220 Py_DECREF(list);
1221 PyBuffer_Release(&vsub);
1222 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001223}
1224
Neal Norwitz6968b052007-02-27 19:02:19 +00001225PyDoc_STRVAR(partition__doc__,
1226"B.partition(sep) -> (head, sep, tail)\n\
1227\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001228Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001229the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001230found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001231
1232static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001233bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001234{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001235 const char *sep;
1236 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001237
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001238 if (PyBytes_Check(sep_obj)) {
1239 sep = PyBytes_AS_STRING(sep_obj);
1240 sep_len = PyBytes_GET_SIZE(sep_obj);
1241 }
1242 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1243 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001244
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001245 return stringlib_partition(
1246 (PyObject*) self,
1247 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1248 sep_obj, sep, sep_len
1249 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001250}
1251
1252PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti4c81fbb2010-01-25 12:02:24 +00001253"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001254\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001255Search for the separator sep in B, starting at the end of B,\n\
1256and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001257part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001258bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001259
1260static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001261bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001262{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001263 const char *sep;
1264 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001265
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001266 if (PyBytes_Check(sep_obj)) {
1267 sep = PyBytes_AS_STRING(sep_obj);
1268 sep_len = PyBytes_GET_SIZE(sep_obj);
1269 }
1270 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1271 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001272
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001273 return stringlib_rpartition(
1274 (PyObject*) self,
1275 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1276 sep_obj, sep, sep_len
1277 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001278}
1279
1280Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001281rsplit_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001282{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001283 const char *s = PyBytes_AS_STRING(self);
1284 Py_ssize_t i, j, count=0;
1285 PyObject *str;
1286 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001287
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001288 if (list == NULL)
1289 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001290
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001291 i = j = len-1;
Neal Norwitz6968b052007-02-27 19:02:19 +00001292
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001293 while (maxsplit-- > 0) {
1294 RSKIP_SPACE(s, i);
1295 if (i<0) break;
1296 j = i; i--;
1297 RSKIP_NONSPACE(s, i);
1298 if (j == len-1 && i < 0 && PyBytes_CheckExact(self)) {
1299 /* No whitespace in self, so just use it as list[0] */
1300 Py_INCREF(self);
1301 PyList_SET_ITEM(list, 0, (PyObject *)self);
1302 count++;
1303 break;
1304 }
1305 SPLIT_ADD(s, i + 1, j + 1);
1306 }
1307 if (i >= 0) {
1308 /* Only occurs when maxsplit was reached. Skip any remaining
1309 whitespace and copy to beginning of string. */
1310 RSKIP_SPACE(s, i);
1311 if (i >= 0)
1312 SPLIT_ADD(s, 0, i + 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001313
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001314 }
1315 FIX_PREALLOC_SIZE(list);
1316 if (PyList_Reverse(list) < 0)
1317 goto onError;
1318 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001319 onError:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001320 Py_DECREF(list);
1321 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001322}
1323
Guido van Rossum8f950672007-09-10 16:53:45 +00001324Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001325rsplit_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001326{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001327 const char *s = PyBytes_AS_STRING(self);
1328 register Py_ssize_t i, j, count=0;
1329 PyObject *str;
1330 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001331
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001332 if (list == NULL)
1333 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001334
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001335 i = j = len - 1;
1336 while ((i >= 0) && (maxcount-- > 0)) {
1337 for (; i >= 0; i--) {
1338 if (s[i] == ch) {
1339 SPLIT_ADD(s, i + 1, j + 1);
1340 j = i = i - 1;
1341 break;
1342 }
1343 }
1344 }
1345 if (i < 0 && count == 0 && PyBytes_CheckExact(self)) {
1346 /* ch not in self, so just use self as list[0] */
1347 Py_INCREF(self);
1348 PyList_SET_ITEM(list, 0, (PyObject *)self);
1349 count++;
1350 }
1351 else if (j >= -1) {
1352 SPLIT_ADD(s, 0, j + 1);
1353 }
1354 FIX_PREALLOC_SIZE(list);
1355 if (PyList_Reverse(list) < 0)
1356 goto onError;
1357 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001358
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001359 onError:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001360 Py_DECREF(list);
1361 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001362}
1363
Neal Norwitz6968b052007-02-27 19:02:19 +00001364PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001365"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001366\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001367Return a list of the sections in B, using sep as the delimiter,\n\
1368starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001369If sep is not given, B is split on ASCII whitespace characters\n\
1370(space, tab, return, newline, formfeed, vertical tab).\n\
1371If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001372
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001373
Neal Norwitz6968b052007-02-27 19:02:19 +00001374static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001375bytes_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001376{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001377 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1378 Py_ssize_t maxsplit = -1, count=0;
1379 const char *s, *sub;
1380 Py_buffer vsub;
1381 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001382
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001383 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1384 return NULL;
1385 if (maxsplit < 0)
1386 maxsplit = PY_SSIZE_T_MAX;
1387 if (subobj == Py_None)
1388 return rsplit_whitespace(self, len, maxsplit);
1389 if (_getbuffer(subobj, &vsub) < 0)
1390 return NULL;
1391 sub = vsub.buf;
1392 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001393
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001394 if (n == 0) {
1395 PyErr_SetString(PyExc_ValueError, "empty separator");
1396 PyBuffer_Release(&vsub);
1397 return NULL;
1398 }
1399 else if (n == 1) {
1400 list = rsplit_char(self, len, sub[0], maxsplit);
1401 PyBuffer_Release(&vsub);
1402 return list;
1403 }
Guido van Rossum8f950672007-09-10 16:53:45 +00001404
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001405 list = PyList_New(PREALLOC_SIZE(maxsplit));
1406 if (list == NULL) {
1407 PyBuffer_Release(&vsub);
1408 return NULL;
1409 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001410
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001411 j = len;
1412 i = j - n;
Neal Norwitz6968b052007-02-27 19:02:19 +00001413
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001414 s = PyBytes_AS_STRING(self);
1415 while ( (i >= 0) && (maxsplit-- > 0) ) {
1416 for (; i>=0; i--) {
1417 if (Py_STRING_MATCH(s, i, sub, n)) {
1418 SPLIT_ADD(s, i + n, j);
1419 j = i;
1420 i -= n;
1421 break;
1422 }
1423 }
1424 }
1425 SPLIT_ADD(s, 0, j);
1426 FIX_PREALLOC_SIZE(list);
1427 if (PyList_Reverse(list) < 0)
1428 goto onError;
1429 PyBuffer_Release(&vsub);
1430 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001431
1432onError:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001433 Py_DECREF(list);
1434 PyBuffer_Release(&vsub);
1435 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001436}
1437
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001438#undef SPLIT_ADD
1439#undef MAX_PREALLOC
1440#undef PREALLOC_SIZE
1441
1442
1443PyDoc_STRVAR(join__doc__,
1444"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001445\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001446Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001447Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1448
Neal Norwitz6968b052007-02-27 19:02:19 +00001449static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001450bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001451{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001452 char *sep = PyBytes_AS_STRING(self);
1453 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1454 PyObject *res = NULL;
1455 char *p;
1456 Py_ssize_t seqlen = 0;
1457 size_t sz = 0;
1458 Py_ssize_t i;
1459 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001460
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001461 seq = PySequence_Fast(orig, "");
1462 if (seq == NULL) {
1463 return NULL;
1464 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001465
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001466 seqlen = PySequence_Size(seq);
1467 if (seqlen == 0) {
1468 Py_DECREF(seq);
1469 return PyBytes_FromString("");
1470 }
1471 if (seqlen == 1) {
1472 item = PySequence_Fast_GET_ITEM(seq, 0);
1473 if (PyBytes_CheckExact(item)) {
1474 Py_INCREF(item);
1475 Py_DECREF(seq);
1476 return item;
1477 }
1478 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001479
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001480 /* There are at least two things to join, or else we have a subclass
1481 * of the builtin types in the sequence.
1482 * Do a pre-pass to figure out the total amount of space we'll
1483 * need (sz), and see whether all argument are bytes.
1484 */
1485 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1486 for (i = 0; i < seqlen; i++) {
1487 const size_t old_sz = sz;
1488 item = PySequence_Fast_GET_ITEM(seq, i);
1489 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1490 PyErr_Format(PyExc_TypeError,
1491 "sequence item %zd: expected bytes,"
1492 " %.80s found",
1493 i, Py_TYPE(item)->tp_name);
1494 Py_DECREF(seq);
1495 return NULL;
1496 }
1497 sz += Py_SIZE(item);
1498 if (i != 0)
1499 sz += seplen;
1500 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1501 PyErr_SetString(PyExc_OverflowError,
1502 "join() result is too long for bytes");
1503 Py_DECREF(seq);
1504 return NULL;
1505 }
1506 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001507
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001508 /* Allocate result space. */
1509 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1510 if (res == NULL) {
1511 Py_DECREF(seq);
1512 return NULL;
1513 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001514
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001515 /* Catenate everything. */
1516 /* I'm not worried about a PyByteArray item growing because there's
1517 nowhere in this function where we release the GIL. */
1518 p = PyBytes_AS_STRING(res);
1519 for (i = 0; i < seqlen; ++i) {
1520 size_t n;
1521 char *q;
1522 if (i) {
1523 Py_MEMCPY(p, sep, seplen);
1524 p += seplen;
1525 }
1526 item = PySequence_Fast_GET_ITEM(seq, i);
1527 n = Py_SIZE(item);
1528 if (PyBytes_Check(item))
1529 q = PyBytes_AS_STRING(item);
1530 else
1531 q = PyByteArray_AS_STRING(item);
1532 Py_MEMCPY(p, q, n);
1533 p += n;
1534 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001535
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001536 Py_DECREF(seq);
1537 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001538}
1539
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001540PyObject *
1541_PyBytes_Join(PyObject *sep, PyObject *x)
1542{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001543 assert(sep != NULL && PyBytes_Check(sep));
1544 assert(x != NULL);
1545 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001546}
1547
1548Py_LOCAL_INLINE(void)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001549bytes_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001550{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001551 if (*end > len)
1552 *end = len;
1553 else if (*end < 0)
1554 *end += len;
1555 if (*end < 0)
1556 *end = 0;
1557 if (*start < 0)
1558 *start += len;
1559 if (*start < 0)
1560 *start = 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001561}
1562
1563Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001564bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001565{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001566 PyObject *subobj;
1567 const char *sub;
1568 Py_ssize_t sub_len;
1569 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001570
Jesus Ceaac451502011-04-20 17:09:23 +02001571 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1572 args, &subobj, &start, &end))
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001573 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001574
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001575 if (PyBytes_Check(subobj)) {
1576 sub = PyBytes_AS_STRING(subobj);
1577 sub_len = PyBytes_GET_SIZE(subobj);
1578 }
1579 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1580 /* XXX - the "expected a character buffer object" is pretty
1581 confusing for a non-expert. remap to something else ? */
1582 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001583
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001584 if (dir > 0)
1585 return stringlib_find_slice(
1586 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1587 sub, sub_len, start, end);
1588 else
1589 return stringlib_rfind_slice(
1590 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1591 sub, sub_len, start, end);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001592}
1593
1594
1595PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001596"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001597\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001598Return the lowest index in S where substring sub is found,\n\
1599such that sub is contained within s[start:end]. Optional\n\
1600arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001601\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001602Return -1 on failure.");
1603
Neal Norwitz6968b052007-02-27 19:02:19 +00001604static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001605bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001606{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001607 Py_ssize_t result = bytes_find_internal(self, args, +1);
1608 if (result == -2)
1609 return NULL;
1610 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001611}
1612
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001613
1614PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001615"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001616\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001617Like B.find() but raise ValueError when the substring is not found.");
1618
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001619static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001620bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001621{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001622 Py_ssize_t result = bytes_find_internal(self, args, +1);
1623 if (result == -2)
1624 return NULL;
1625 if (result == -1) {
1626 PyErr_SetString(PyExc_ValueError,
1627 "substring not found");
1628 return NULL;
1629 }
1630 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001631}
1632
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001633
1634PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001635"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001636\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001637Return the highest index in B where substring sub is found,\n\
1638such that sub is contained within s[start:end]. Optional\n\
1639arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001640\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001641Return -1 on failure.");
1642
Neal Norwitz6968b052007-02-27 19:02:19 +00001643static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001644bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001645{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001646 Py_ssize_t result = bytes_find_internal(self, args, -1);
1647 if (result == -2)
1648 return NULL;
1649 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001650}
1651
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001652
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001653PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001654"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001655\n\
1656Like B.rfind() but raise ValueError when the substring is not found.");
1657
1658static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001659bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001660{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001661 Py_ssize_t result = bytes_find_internal(self, args, -1);
1662 if (result == -2)
1663 return NULL;
1664 if (result == -1) {
1665 PyErr_SetString(PyExc_ValueError,
1666 "substring not found");
1667 return NULL;
1668 }
1669 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001670}
1671
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001672
1673Py_LOCAL_INLINE(PyObject *)
1674do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001675{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001676 Py_buffer vsep;
1677 char *s = PyBytes_AS_STRING(self);
1678 Py_ssize_t len = PyBytes_GET_SIZE(self);
1679 char *sep;
1680 Py_ssize_t seplen;
1681 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001682
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001683 if (_getbuffer(sepobj, &vsep) < 0)
1684 return NULL;
1685 sep = vsep.buf;
1686 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001687
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001688 i = 0;
1689 if (striptype != RIGHTSTRIP) {
1690 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1691 i++;
1692 }
1693 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001694
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001695 j = len;
1696 if (striptype != LEFTSTRIP) {
1697 do {
1698 j--;
1699 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1700 j++;
1701 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001702
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001703 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001704
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001705 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1706 Py_INCREF(self);
1707 return (PyObject*)self;
1708 }
1709 else
1710 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001711}
1712
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001713
1714Py_LOCAL_INLINE(PyObject *)
1715do_strip(PyBytesObject *self, int striptype)
1716{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001717 char *s = PyBytes_AS_STRING(self);
1718 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001719
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001720 i = 0;
1721 if (striptype != RIGHTSTRIP) {
1722 while (i < len && ISSPACE(s[i])) {
1723 i++;
1724 }
1725 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001726
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001727 j = len;
1728 if (striptype != LEFTSTRIP) {
1729 do {
1730 j--;
1731 } while (j >= i && ISSPACE(s[j]));
1732 j++;
1733 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001734
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001735 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1736 Py_INCREF(self);
1737 return (PyObject*)self;
1738 }
1739 else
1740 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001741}
1742
1743
1744Py_LOCAL_INLINE(PyObject *)
1745do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1746{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001747 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001748
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001749 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1750 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001751
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001752 if (sep != NULL && sep != Py_None) {
1753 return do_xstrip(self, striptype, sep);
1754 }
1755 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001756}
1757
1758
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001759PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001760"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001761\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001762Strip leading and trailing bytes contained in the argument.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001763If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001764static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001765bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001766{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001767 if (PyTuple_GET_SIZE(args) == 0)
1768 return do_strip(self, BOTHSTRIP); /* Common case */
1769 else
1770 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001771}
1772
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001773
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001774PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001775"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001776\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001777Strip leading bytes contained in the argument.\n\
1778If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001779static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001780bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001781{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001782 if (PyTuple_GET_SIZE(args) == 0)
1783 return do_strip(self, LEFTSTRIP); /* Common case */
1784 else
1785 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001786}
1787
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001788
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001789PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001790"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001791\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001792Strip trailing bytes contained in the argument.\n\
1793If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001794static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001795bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001796{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001797 if (PyTuple_GET_SIZE(args) == 0)
1798 return do_strip(self, RIGHTSTRIP); /* Common case */
1799 else
1800 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001801}
Neal Norwitz6968b052007-02-27 19:02:19 +00001802
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001803
1804PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001805"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001806\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001807Return the number of non-overlapping occurrences of substring sub in\n\
1808string S[start:end]. Optional arguments start and end are interpreted\n\
1809as in slice notation.");
1810
1811static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001812bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001813{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001814 PyObject *sub_obj;
1815 const char *str = PyBytes_AS_STRING(self), *sub;
1816 Py_ssize_t sub_len;
1817 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001818
Jesus Ceaac451502011-04-20 17:09:23 +02001819 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001820 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001821
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001822 if (PyBytes_Check(sub_obj)) {
1823 sub = PyBytes_AS_STRING(sub_obj);
1824 sub_len = PyBytes_GET_SIZE(sub_obj);
1825 }
1826 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1827 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001828
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001829 bytes_adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001830
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001831 return PyLong_FromSsize_t(
1832 stringlib_count(str + start, end - start, sub, sub_len)
1833 );
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001834}
1835
1836
1837PyDoc_STRVAR(translate__doc__,
1838"B.translate(table[, deletechars]) -> bytes\n\
1839\n\
1840Return a copy of B, where all characters occurring in the\n\
1841optional argument deletechars are removed, and the remaining\n\
1842characters have been mapped through the given translation\n\
1843table, which must be a bytes object of length 256.");
1844
1845static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001846bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001847{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001848 register char *input, *output;
1849 const char *table;
1850 register Py_ssize_t i, c, changed = 0;
1851 PyObject *input_obj = (PyObject*)self;
1852 const char *output_start, *del_table=NULL;
1853 Py_ssize_t inlen, tablen, dellen = 0;
1854 PyObject *result;
1855 int trans_table[256];
1856 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001857
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001858 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1859 &tableobj, &delobj))
1860 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001861
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001862 if (PyBytes_Check(tableobj)) {
1863 table = PyBytes_AS_STRING(tableobj);
1864 tablen = PyBytes_GET_SIZE(tableobj);
1865 }
1866 else if (tableobj == Py_None) {
1867 table = NULL;
1868 tablen = 256;
1869 }
1870 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1871 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001872
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001873 if (tablen != 256) {
1874 PyErr_SetString(PyExc_ValueError,
1875 "translation table must be 256 characters long");
1876 return NULL;
1877 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001878
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001879 if (delobj != NULL) {
1880 if (PyBytes_Check(delobj)) {
1881 del_table = PyBytes_AS_STRING(delobj);
1882 dellen = PyBytes_GET_SIZE(delobj);
1883 }
1884 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1885 return NULL;
1886 }
1887 else {
1888 del_table = NULL;
1889 dellen = 0;
1890 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001891
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001892 inlen = PyBytes_GET_SIZE(input_obj);
1893 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1894 if (result == NULL)
1895 return NULL;
1896 output_start = output = PyBytes_AsString(result);
1897 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001898
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001899 if (dellen == 0 && table != NULL) {
1900 /* If no deletions are required, use faster code */
1901 for (i = inlen; --i >= 0; ) {
1902 c = Py_CHARMASK(*input++);
1903 if (Py_CHARMASK((*output++ = table[c])) != c)
1904 changed = 1;
1905 }
1906 if (changed || !PyBytes_CheckExact(input_obj))
1907 return result;
1908 Py_DECREF(result);
1909 Py_INCREF(input_obj);
1910 return input_obj;
1911 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001912
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001913 if (table == NULL) {
1914 for (i = 0; i < 256; i++)
1915 trans_table[i] = Py_CHARMASK(i);
1916 } else {
1917 for (i = 0; i < 256; i++)
1918 trans_table[i] = Py_CHARMASK(table[i]);
1919 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001920
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001921 for (i = 0; i < dellen; i++)
1922 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001923
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001924 for (i = inlen; --i >= 0; ) {
1925 c = Py_CHARMASK(*input++);
1926 if (trans_table[c] != -1)
1927 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1928 continue;
1929 changed = 1;
1930 }
1931 if (!changed && PyBytes_CheckExact(input_obj)) {
1932 Py_DECREF(result);
1933 Py_INCREF(input_obj);
1934 return input_obj;
1935 }
1936 /* Fix the size of the resulting string */
1937 if (inlen > 0)
1938 _PyBytes_Resize(&result, output - output_start);
1939 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001940}
1941
1942
Georg Brandlabc38772009-04-12 15:51:51 +00001943static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001944bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001945{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001946 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001947}
1948
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001949#define FORWARD 1
1950#define REVERSE -1
1951
1952/* find and count characters and substrings */
1953
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001954#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001955 ((char *)memchr((const void *)(target), c, target_len))
1956
1957/* String ops must return a string. */
1958/* If the object is subclass of string, create a copy */
1959Py_LOCAL(PyBytesObject *)
1960return_self(PyBytesObject *self)
1961{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001962 if (PyBytes_CheckExact(self)) {
1963 Py_INCREF(self);
1964 return self;
1965 }
1966 return (PyBytesObject *)PyBytes_FromStringAndSize(
1967 PyBytes_AS_STRING(self),
1968 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001969}
1970
1971Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitroubc760d92010-08-15 17:46:50 +00001972countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001973{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001974 Py_ssize_t count=0;
1975 const char *start=target;
1976 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001977
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001978 while ( (start=findchar(start, end-start, c)) != NULL ) {
1979 count++;
1980 if (count >= maxcount)
1981 break;
1982 start += 1;
1983 }
1984 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001985}
1986
1987Py_LOCAL(Py_ssize_t)
1988findstring(const char *target, Py_ssize_t target_len,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001989 const char *pattern, Py_ssize_t pattern_len,
1990 Py_ssize_t start,
1991 Py_ssize_t end,
1992 int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001993{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001994 if (start < 0) {
1995 start += target_len;
1996 if (start < 0)
1997 start = 0;
1998 }
1999 if (end > target_len) {
2000 end = target_len;
2001 } else if (end < 0) {
2002 end += target_len;
2003 if (end < 0)
2004 end = 0;
2005 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002006
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002007 /* zero-length substrings always match at the first attempt */
2008 if (pattern_len == 0)
2009 return (direction > 0) ? start : end;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002010
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002011 end -= pattern_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002012
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002013 if (direction < 0) {
2014 for (; end >= start; end--)
2015 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2016 return end;
2017 } else {
2018 for (; start <= end; start++)
2019 if (Py_STRING_MATCH(target, start,pattern,pattern_len))
2020 return start;
2021 }
2022 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002023}
2024
2025Py_LOCAL_INLINE(Py_ssize_t)
2026countstring(const char *target, Py_ssize_t target_len,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002027 const char *pattern, Py_ssize_t pattern_len,
2028 Py_ssize_t start,
2029 Py_ssize_t end,
2030 int direction, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002031{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002032 Py_ssize_t count=0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002033
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002034 if (start < 0) {
2035 start += target_len;
2036 if (start < 0)
2037 start = 0;
2038 }
2039 if (end > target_len) {
2040 end = target_len;
2041 } else if (end < 0) {
2042 end += target_len;
2043 if (end < 0)
2044 end = 0;
2045 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002046
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002047 /* zero-length substrings match everywhere */
2048 if (pattern_len == 0 || maxcount == 0) {
2049 if (target_len+1 < maxcount)
2050 return target_len+1;
2051 return maxcount;
2052 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002053
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002054 end -= pattern_len;
2055 if (direction < 0) {
2056 for (; (end >= start); end--)
2057 if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
2058 count++;
2059 if (--maxcount <= 0) break;
2060 end -= pattern_len-1;
2061 }
2062 } else {
2063 for (; (start <= end); start++)
2064 if (Py_STRING_MATCH(target, start,
2065 pattern, pattern_len)) {
2066 count++;
2067 if (--maxcount <= 0)
2068 break;
2069 start += pattern_len-1;
2070 }
2071 }
2072 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002073}
2074
2075
2076/* Algorithms for different cases of string replacement */
2077
2078/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2079Py_LOCAL(PyBytesObject *)
2080replace_interleave(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002081 const char *to_s, Py_ssize_t to_len,
2082 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002083{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002084 char *self_s, *result_s;
2085 Py_ssize_t self_len, result_len;
2086 Py_ssize_t count, i, product;
2087 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002088
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002089 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002090
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002091 /* 1 at the end plus 1 after every character */
2092 count = self_len+1;
2093 if (maxcount < count)
2094 count = maxcount;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002095
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002096 /* Check for overflow */
2097 /* result_len = count * to_len + self_len; */
2098 product = count * to_len;
2099 if (product / to_len != count) {
2100 PyErr_SetString(PyExc_OverflowError,
2101 "replacement bytes are too long");
2102 return NULL;
2103 }
2104 result_len = product + self_len;
2105 if (result_len < 0) {
2106 PyErr_SetString(PyExc_OverflowError,
2107 "replacement bytes are too long");
2108 return NULL;
2109 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002110
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002111 if (! (result = (PyBytesObject *)
2112 PyBytes_FromStringAndSize(NULL, result_len)) )
2113 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002114
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002115 self_s = PyBytes_AS_STRING(self);
2116 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002117
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002118 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002119
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002120 /* Lay the first one down (guaranteed this will occur) */
2121 Py_MEMCPY(result_s, to_s, to_len);
2122 result_s += to_len;
2123 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002124
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002125 for (i=0; i<count; i++) {
2126 *result_s++ = *self_s++;
2127 Py_MEMCPY(result_s, to_s, to_len);
2128 result_s += to_len;
2129 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002130
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002131 /* Copy the rest of the original string */
2132 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002133
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002134 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002135}
2136
2137/* Special case for deleting a single character */
2138/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2139Py_LOCAL(PyBytesObject *)
2140replace_delete_single_character(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002141 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002142{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002143 char *self_s, *result_s;
2144 char *start, *next, *end;
2145 Py_ssize_t self_len, result_len;
2146 Py_ssize_t count;
2147 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002148
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002149 self_len = PyBytes_GET_SIZE(self);
2150 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002151
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002152 count = countchar(self_s, self_len, from_c, maxcount);
2153 if (count == 0) {
2154 return return_self(self);
2155 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002156
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002157 result_len = self_len - count; /* from_len == 1 */
2158 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002159
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002160 if ( (result = (PyBytesObject *)
2161 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2162 return NULL;
2163 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002164
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002165 start = self_s;
2166 end = self_s + self_len;
2167 while (count-- > 0) {
2168 next = findchar(start, end-start, from_c);
2169 if (next == NULL)
2170 break;
2171 Py_MEMCPY(result_s, start, next-start);
2172 result_s += (next-start);
2173 start = next+1;
2174 }
2175 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002176
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002177 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002178}
2179
2180/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2181
2182Py_LOCAL(PyBytesObject *)
2183replace_delete_substring(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002184 const char *from_s, Py_ssize_t from_len,
2185 Py_ssize_t maxcount) {
2186 char *self_s, *result_s;
2187 char *start, *next, *end;
2188 Py_ssize_t self_len, result_len;
2189 Py_ssize_t count, offset;
2190 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002191
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002192 self_len = PyBytes_GET_SIZE(self);
2193 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002194
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002195 count = countstring(self_s, self_len,
2196 from_s, from_len,
2197 0, self_len, 1,
2198 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002199
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002200 if (count == 0) {
2201 /* no matches */
2202 return return_self(self);
2203 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002204
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002205 result_len = self_len - (count * from_len);
2206 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002207
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002208 if ( (result = (PyBytesObject *)
2209 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2210 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002211
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002212 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002213
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002214 start = self_s;
2215 end = self_s + self_len;
2216 while (count-- > 0) {
2217 offset = findstring(start, end-start,
2218 from_s, from_len,
2219 0, end-start, FORWARD);
2220 if (offset == -1)
2221 break;
2222 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002223
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002224 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002225
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002226 result_s += (next-start);
2227 start = next+from_len;
2228 }
2229 Py_MEMCPY(result_s, start, end-start);
2230 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002231}
2232
2233/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2234Py_LOCAL(PyBytesObject *)
2235replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002236 char from_c, char to_c,
2237 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002238{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002239 char *self_s, *result_s, *start, *end, *next;
2240 Py_ssize_t self_len;
2241 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002242
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002243 /* The result string will be the same size */
2244 self_s = PyBytes_AS_STRING(self);
2245 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002246
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002247 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002248
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002249 if (next == NULL) {
2250 /* No matches; return the original string */
2251 return return_self(self);
2252 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002253
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002254 /* Need to make a new string */
2255 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2256 if (result == NULL)
2257 return NULL;
2258 result_s = PyBytes_AS_STRING(result);
2259 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002260
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002261 /* change everything in-place, starting with this one */
2262 start = result_s + (next-self_s);
2263 *start = to_c;
2264 start++;
2265 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002266
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002267 while (--maxcount > 0) {
2268 next = findchar(start, end-start, from_c);
2269 if (next == NULL)
2270 break;
2271 *next = to_c;
2272 start = next+1;
2273 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002274
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002275 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002276}
2277
2278/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2279Py_LOCAL(PyBytesObject *)
2280replace_substring_in_place(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002281 const char *from_s, Py_ssize_t from_len,
2282 const char *to_s, Py_ssize_t to_len,
2283 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002284{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002285 char *result_s, *start, *end;
2286 char *self_s;
2287 Py_ssize_t self_len, offset;
2288 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002289
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002290 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002291
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002292 self_s = PyBytes_AS_STRING(self);
2293 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002294
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002295 offset = findstring(self_s, self_len,
2296 from_s, from_len,
2297 0, self_len, FORWARD);
2298 if (offset == -1) {
2299 /* No matches; return the original string */
2300 return return_self(self);
2301 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002302
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002303 /* Need to make a new string */
2304 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2305 if (result == NULL)
2306 return NULL;
2307 result_s = PyBytes_AS_STRING(result);
2308 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002309
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002310 /* change everything in-place, starting with this one */
2311 start = result_s + offset;
2312 Py_MEMCPY(start, to_s, from_len);
2313 start += from_len;
2314 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002315
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002316 while ( --maxcount > 0) {
2317 offset = findstring(start, end-start,
2318 from_s, from_len,
2319 0, end-start, FORWARD);
2320 if (offset==-1)
2321 break;
2322 Py_MEMCPY(start+offset, to_s, from_len);
2323 start += offset+from_len;
2324 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002325
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002326 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002327}
2328
2329/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2330Py_LOCAL(PyBytesObject *)
2331replace_single_character(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002332 char from_c,
2333 const char *to_s, Py_ssize_t to_len,
2334 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002335{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002336 char *self_s, *result_s;
2337 char *start, *next, *end;
2338 Py_ssize_t self_len, result_len;
2339 Py_ssize_t count, product;
2340 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002341
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002342 self_s = PyBytes_AS_STRING(self);
2343 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002344
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002345 count = countchar(self_s, self_len, from_c, maxcount);
2346 if (count == 0) {
2347 /* no matches, return unchanged */
2348 return return_self(self);
2349 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002350
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002351 /* use the difference between current and new, hence the "-1" */
2352 /* result_len = self_len + count * (to_len-1) */
2353 product = count * (to_len-1);
2354 if (product / (to_len-1) != count) {
2355 PyErr_SetString(PyExc_OverflowError,
2356 "replacement bytes are too long");
2357 return NULL;
2358 }
2359 result_len = self_len + product;
2360 if (result_len < 0) {
2361 PyErr_SetString(PyExc_OverflowError,
2362 "replacment bytes are too long");
2363 return NULL;
2364 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002365
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002366 if ( (result = (PyBytesObject *)
2367 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2368 return NULL;
2369 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002370
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002371 start = self_s;
2372 end = self_s + self_len;
2373 while (count-- > 0) {
2374 next = findchar(start, end-start, from_c);
2375 if (next == NULL)
2376 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002377
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002378 if (next == start) {
2379 /* replace with the 'to' */
2380 Py_MEMCPY(result_s, to_s, to_len);
2381 result_s += to_len;
2382 start += 1;
2383 } else {
2384 /* copy the unchanged old then the 'to' */
2385 Py_MEMCPY(result_s, start, next-start);
2386 result_s += (next-start);
2387 Py_MEMCPY(result_s, to_s, to_len);
2388 result_s += to_len;
2389 start = next+1;
2390 }
2391 }
2392 /* Copy the remainder of the remaining string */
2393 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002394
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002395 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002396}
2397
2398/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2399Py_LOCAL(PyBytesObject *)
2400replace_substring(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002401 const char *from_s, Py_ssize_t from_len,
2402 const char *to_s, Py_ssize_t to_len,
2403 Py_ssize_t maxcount) {
2404 char *self_s, *result_s;
2405 char *start, *next, *end;
2406 Py_ssize_t self_len, result_len;
2407 Py_ssize_t count, offset, product;
2408 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002409
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002410 self_s = PyBytes_AS_STRING(self);
2411 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002412
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002413 count = countstring(self_s, self_len,
2414 from_s, from_len,
2415 0, self_len, FORWARD, maxcount);
2416 if (count == 0) {
2417 /* no matches, return unchanged */
2418 return return_self(self);
2419 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002420
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002421 /* Check for overflow */
2422 /* result_len = self_len + count * (to_len-from_len) */
2423 product = count * (to_len-from_len);
2424 if (product / (to_len-from_len) != count) {
2425 PyErr_SetString(PyExc_OverflowError,
2426 "replacement bytes are too long");
2427 return NULL;
2428 }
2429 result_len = self_len + product;
2430 if (result_len < 0) {
2431 PyErr_SetString(PyExc_OverflowError,
2432 "replacement bytes are too long");
2433 return NULL;
2434 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002435
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002436 if ( (result = (PyBytesObject *)
2437 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2438 return NULL;
2439 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002440
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002441 start = self_s;
2442 end = self_s + self_len;
2443 while (count-- > 0) {
2444 offset = findstring(start, end-start,
2445 from_s, from_len,
2446 0, end-start, FORWARD);
2447 if (offset == -1)
2448 break;
2449 next = start+offset;
2450 if (next == start) {
2451 /* replace with the 'to' */
2452 Py_MEMCPY(result_s, to_s, to_len);
2453 result_s += to_len;
2454 start += from_len;
2455 } else {
2456 /* copy the unchanged old then the 'to' */
2457 Py_MEMCPY(result_s, start, next-start);
2458 result_s += (next-start);
2459 Py_MEMCPY(result_s, to_s, to_len);
2460 result_s += to_len;
2461 start = next+from_len;
2462 }
2463 }
2464 /* Copy the remainder of the remaining string */
2465 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002466
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002467 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002468}
2469
2470
2471Py_LOCAL(PyBytesObject *)
2472replace(PyBytesObject *self,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002473 const char *from_s, Py_ssize_t from_len,
2474 const char *to_s, Py_ssize_t to_len,
2475 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002476{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002477 if (maxcount < 0) {
2478 maxcount = PY_SSIZE_T_MAX;
2479 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2480 /* nothing to do; return the original string */
2481 return return_self(self);
2482 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002483
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002484 if (maxcount == 0 ||
2485 (from_len == 0 && to_len == 0)) {
2486 /* nothing to do; return the original string */
2487 return return_self(self);
2488 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002489
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002490 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002491
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002492 if (from_len == 0) {
2493 /* insert the 'to' string everywhere. */
2494 /* >>> "Python".replace("", ".") */
2495 /* '.P.y.t.h.o.n.' */
2496 return replace_interleave(self, to_s, to_len, maxcount);
2497 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002498
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002499 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2500 /* point for an empty self string to generate a non-empty string */
2501 /* Special case so the remaining code always gets a non-empty string */
2502 if (PyBytes_GET_SIZE(self) == 0) {
2503 return return_self(self);
2504 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002505
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002506 if (to_len == 0) {
2507 /* delete all occurrences of 'from' string */
2508 if (from_len == 1) {
2509 return replace_delete_single_character(
2510 self, from_s[0], maxcount);
2511 } else {
2512 return replace_delete_substring(self, from_s,
2513 from_len, maxcount);
2514 }
2515 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002516
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002517 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002518
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002519 if (from_len == to_len) {
2520 if (from_len == 1) {
2521 return replace_single_character_in_place(
2522 self,
2523 from_s[0],
2524 to_s[0],
2525 maxcount);
2526 } else {
2527 return replace_substring_in_place(
2528 self, from_s, from_len, to_s, to_len,
2529 maxcount);
2530 }
2531 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002532
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002533 /* Otherwise use the more generic algorithms */
2534 if (from_len == 1) {
2535 return replace_single_character(self, from_s[0],
2536 to_s, to_len, maxcount);
2537 } else {
2538 /* len('from')>=2, len('to')>=1 */
2539 return replace_substring(self, from_s, from_len, to_s, to_len,
2540 maxcount);
2541 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002542}
2543
2544PyDoc_STRVAR(replace__doc__,
2545"B.replace(old, new[, count]) -> bytes\n\
2546\n\
2547Return a copy of B with all occurrences of subsection\n\
2548old replaced by new. If the optional argument count is\n\
Senthil Kumaranf7734202010-09-08 13:00:07 +00002549given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002550
2551static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002552bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002553{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002554 Py_ssize_t count = -1;
2555 PyObject *from, *to;
2556 const char *from_s, *to_s;
2557 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002558
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002559 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2560 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002561
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002562 if (PyBytes_Check(from)) {
2563 from_s = PyBytes_AS_STRING(from);
2564 from_len = PyBytes_GET_SIZE(from);
2565 }
2566 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2567 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002568
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002569 if (PyBytes_Check(to)) {
2570 to_s = PyBytes_AS_STRING(to);
2571 to_len = PyBytes_GET_SIZE(to);
2572 }
2573 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2574 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002575
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002576 return (PyObject *)replace((PyBytesObject *) self,
2577 from_s, from_len,
2578 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002579}
2580
2581/** End DALKE **/
2582
2583/* Matches the end (direction >= 0) or start (direction < 0) of self
2584 * against substr, using the start and end arguments. Returns
2585 * -1 on error, 0 if not found and 1 if found.
2586 */
2587Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002588_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002589 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002590{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002591 Py_ssize_t len = PyBytes_GET_SIZE(self);
2592 Py_ssize_t slen;
2593 const char* sub;
2594 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002595
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002596 if (PyBytes_Check(substr)) {
2597 sub = PyBytes_AS_STRING(substr);
2598 slen = PyBytes_GET_SIZE(substr);
2599 }
2600 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2601 return -1;
2602 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002603
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002604 bytes_adjust_indices(&start, &end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002605
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002606 if (direction < 0) {
2607 /* startswith */
2608 if (start+slen > len)
2609 return 0;
2610 } else {
2611 /* endswith */
2612 if (end-start < slen || start > len)
2613 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002614
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002615 if (end-slen > start)
2616 start = end - slen;
2617 }
2618 if (end-start >= slen)
2619 return ! memcmp(str+start, sub, slen);
2620 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002621}
2622
2623
2624PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002625"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002626\n\
2627Return True if B starts with the specified prefix, False otherwise.\n\
2628With optional start, test B beginning at that position.\n\
2629With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002630prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002631
2632static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002633bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002634{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002635 Py_ssize_t start = 0;
2636 Py_ssize_t end = PY_SSIZE_T_MAX;
2637 PyObject *subobj;
2638 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002639
Jesus Ceaac451502011-04-20 17:09:23 +02002640 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002641 return NULL;
2642 if (PyTuple_Check(subobj)) {
2643 Py_ssize_t i;
2644 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2645 result = _bytes_tailmatch(self,
2646 PyTuple_GET_ITEM(subobj, i),
2647 start, end, -1);
2648 if (result == -1)
2649 return NULL;
2650 else if (result) {
2651 Py_RETURN_TRUE;
2652 }
2653 }
2654 Py_RETURN_FALSE;
2655 }
2656 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002657 if (result == -1) {
2658 if (PyErr_ExceptionMatches(PyExc_TypeError))
2659 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2660 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002661 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002662 }
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002663 else
2664 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002665}
2666
2667
2668PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002669"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002670\n\
2671Return True if B ends with the specified suffix, False otherwise.\n\
2672With optional start, test B beginning at that position.\n\
2673With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002674suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002675
2676static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002677bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002678{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002679 Py_ssize_t start = 0;
2680 Py_ssize_t end = PY_SSIZE_T_MAX;
2681 PyObject *subobj;
2682 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002683
Jesus Ceaac451502011-04-20 17:09:23 +02002684 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002685 return NULL;
2686 if (PyTuple_Check(subobj)) {
2687 Py_ssize_t i;
2688 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2689 result = _bytes_tailmatch(self,
2690 PyTuple_GET_ITEM(subobj, i),
2691 start, end, +1);
2692 if (result == -1)
2693 return NULL;
2694 else if (result) {
2695 Py_RETURN_TRUE;
2696 }
2697 }
2698 Py_RETURN_FALSE;
2699 }
2700 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002701 if (result == -1) {
2702 if (PyErr_ExceptionMatches(PyExc_TypeError))
2703 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2704 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002705 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002706 }
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002707 else
2708 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002709}
2710
2711
2712PyDoc_STRVAR(decode__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002713"B.decode([encoding[, errors]]) -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002714\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002715Decode S using the codec registered for encoding. encoding defaults\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002716to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002717handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2718a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002719as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002720able to handle UnicodeDecodeErrors.");
2721
2722static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002723bytes_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002724{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002725 const char *encoding = NULL;
2726 const char *errors = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +00002727
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002728 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2729 return NULL;
2730 if (encoding == NULL)
2731 encoding = PyUnicode_GetDefaultEncoding();
2732 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002733}
2734
Guido van Rossum20188312006-05-05 15:15:40 +00002735
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002736PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002737"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002738\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002739Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002740Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002741Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002742
2743static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002744hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002745{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002746 if (c >= 128)
2747 return -1;
2748 if (ISDIGIT(c))
2749 return c - '0';
2750 else {
2751 if (ISUPPER(c))
2752 c = TOLOWER(c);
2753 if (c >= 'a' && c <= 'f')
2754 return c - 'a' + 10;
2755 }
2756 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002757}
2758
2759static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002760bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002761{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002762 PyObject *newstring, *hexobj;
2763 char *buf;
2764 Py_UNICODE *hex;
2765 Py_ssize_t hexlen, byteslen, i, j;
2766 int top, bot;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002767
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002768 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2769 return NULL;
2770 assert(PyUnicode_Check(hexobj));
2771 hexlen = PyUnicode_GET_SIZE(hexobj);
2772 hex = PyUnicode_AS_UNICODE(hexobj);
2773 byteslen = hexlen/2; /* This overestimates if there are spaces */
2774 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2775 if (!newstring)
2776 return NULL;
2777 buf = PyBytes_AS_STRING(newstring);
2778 for (i = j = 0; i < hexlen; i += 2) {
2779 /* skip over spaces in the input */
2780 while (hex[i] == ' ')
2781 i++;
2782 if (i >= hexlen)
2783 break;
2784 top = hex_digit_to_int(hex[i]);
2785 bot = hex_digit_to_int(hex[i+1]);
2786 if (top == -1 || bot == -1) {
2787 PyErr_Format(PyExc_ValueError,
2788 "non-hexadecimal number found in "
2789 "fromhex() arg at position %zd", i);
2790 goto error;
2791 }
2792 buf[j++] = (top << 4) + bot;
2793 }
2794 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2795 goto error;
2796 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002797
2798 error:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002799 Py_XDECREF(newstring);
2800 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002801}
2802
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002803PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002804"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002805
2806static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002807bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002808{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002809 Py_ssize_t res;
2810 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2811 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002812}
2813
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002814
2815static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002816bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002817{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002818 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002819}
2820
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002821
2822static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002823bytes_methods[] = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002824 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2825 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2826 _Py_capitalize__doc__},
2827 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2828 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2829 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode__doc__},
2830 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2831 endswith__doc__},
2832 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2833 expandtabs__doc__},
2834 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2835 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2836 fromhex_doc},
2837 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2838 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2839 _Py_isalnum__doc__},
2840 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2841 _Py_isalpha__doc__},
2842 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2843 _Py_isdigit__doc__},
2844 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2845 _Py_islower__doc__},
2846 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2847 _Py_isspace__doc__},
2848 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2849 _Py_istitle__doc__},
2850 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2851 _Py_isupper__doc__},
2852 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2853 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2854 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2855 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2856 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2857 _Py_maketrans__doc__},
2858 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2859 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2860 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2861 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2862 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2863 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2864 rpartition__doc__},
2865 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2866 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
2867 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2868 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
2869 splitlines__doc__},
2870 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2871 startswith__doc__},
2872 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2873 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2874 _Py_swapcase__doc__},
2875 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2876 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2877 translate__doc__},
2878 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2879 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2880 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2881 sizeof__doc__},
2882 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002883};
2884
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002885static PyObject *
2886str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2887
2888static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002889bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002890{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002891 PyObject *x = NULL;
2892 const char *encoding = NULL;
2893 const char *errors = NULL;
2894 PyObject *new = NULL;
2895 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002896
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002897 if (type != &PyBytes_Type)
2898 return str_subtype_new(type, args, kwds);
2899 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2900 &encoding, &errors))
2901 return NULL;
2902 if (x == NULL) {
2903 if (encoding != NULL || errors != NULL) {
2904 PyErr_SetString(PyExc_TypeError,
2905 "encoding or errors without sequence "
2906 "argument");
2907 return NULL;
2908 }
2909 return PyBytes_FromString("");
2910 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002911
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002912 if (PyUnicode_Check(x)) {
2913 /* Encode via the codec registry */
2914 if (encoding == NULL) {
2915 PyErr_SetString(PyExc_TypeError,
2916 "string argument without an encoding");
2917 return NULL;
2918 }
2919 new = PyUnicode_AsEncodedString(x, encoding, errors);
2920 if (new == NULL)
2921 return NULL;
2922 assert(PyBytes_Check(new));
2923 return new;
2924 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002925
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002926 /* If it's not unicode, there can't be encoding or errors */
2927 if (encoding != NULL || errors != NULL) {
2928 PyErr_SetString(PyExc_TypeError,
2929 "encoding or errors without a string argument");
2930 return NULL;
2931 }
2932 return PyObject_Bytes(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002933}
2934
2935PyObject *
2936PyBytes_FromObject(PyObject *x)
2937{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002938 PyObject *new, *it;
2939 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002940
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002941 if (x == NULL) {
2942 PyErr_BadInternalCall();
2943 return NULL;
2944 }
Benjamin Peterson4b24a422008-08-27 00:28:34 +00002945
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002946 /* Is it an int? */
2947 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2948 if (size == -1 && PyErr_Occurred()) {
2949 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2950 return NULL;
2951 PyErr_Clear();
2952 }
2953 else if (size < 0) {
2954 PyErr_SetString(PyExc_ValueError, "negative count");
2955 return NULL;
2956 }
2957 else {
2958 new = PyBytes_FromStringAndSize(NULL, size);
2959 if (new == NULL) {
2960 return NULL;
2961 }
2962 if (size > 0) {
2963 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2964 }
2965 return new;
2966 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002967
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002968 /* Use the modern buffer interface */
2969 if (PyObject_CheckBuffer(x)) {
2970 Py_buffer view;
2971 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2972 return NULL;
2973 new = PyBytes_FromStringAndSize(NULL, view.len);
2974 if (!new)
2975 goto fail;
2976 /* XXX(brett.cannon): Better way to get to internal buffer? */
2977 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2978 &view, view.len, 'C') < 0)
2979 goto fail;
2980 PyBuffer_Release(&view);
2981 return new;
2982 fail:
2983 Py_XDECREF(new);
2984 PyBuffer_Release(&view);
2985 return NULL;
2986 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002987
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002988 /* For iterator version, create a string object and resize as needed */
2989 /* XXX(gb): is 64 a good value? also, optimize if length is known */
2990 /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
2991 input being a truly long iterator. */
2992 size = 64;
2993 new = PyBytes_FromStringAndSize(NULL, size);
2994 if (new == NULL)
2995 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002996
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002997 /* XXX Optimize this if the arguments is a list, tuple */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002998
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002999 /* Get the iterator */
3000 it = PyObject_GetIter(x);
3001 if (it == NULL)
3002 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003003
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003004 /* Run the iterator to exhaustion */
3005 for (i = 0; ; i++) {
3006 PyObject *item;
3007 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003008
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003009 /* Get the next item */
3010 item = PyIter_Next(it);
3011 if (item == NULL) {
3012 if (PyErr_Occurred())
3013 goto error;
3014 break;
3015 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003016
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003017 /* Interpret it as an int (__index__) */
3018 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3019 Py_DECREF(item);
3020 if (value == -1 && PyErr_Occurred())
3021 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003022
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003023 /* Range check */
3024 if (value < 0 || value >= 256) {
3025 PyErr_SetString(PyExc_ValueError,
3026 "bytes must be in range(0, 256)");
3027 goto error;
3028 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003029
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003030 /* Append the byte */
3031 if (i >= size) {
3032 size *= 2;
3033 if (_PyBytes_Resize(&new, size) < 0)
3034 goto error;
3035 }
Antoine Pitroubc760d92010-08-15 17:46:50 +00003036 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003037 }
3038 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003039
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003040 /* Clean up and return success */
3041 Py_DECREF(it);
3042 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003043
3044 error:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003045 /* Error handling when new != NULL */
3046 Py_XDECREF(it);
3047 Py_DECREF(new);
3048 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003049}
3050
3051static PyObject *
3052str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3053{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003054 PyObject *tmp, *pnew;
3055 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003056
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003057 assert(PyType_IsSubtype(type, &PyBytes_Type));
3058 tmp = bytes_new(&PyBytes_Type, args, kwds);
3059 if (tmp == NULL)
3060 return NULL;
3061 assert(PyBytes_CheckExact(tmp));
3062 n = PyBytes_GET_SIZE(tmp);
3063 pnew = type->tp_alloc(type, n);
3064 if (pnew != NULL) {
3065 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3066 PyBytes_AS_STRING(tmp), n+1);
3067 ((PyBytesObject *)pnew)->ob_shash =
3068 ((PyBytesObject *)tmp)->ob_shash;
3069 }
3070 Py_DECREF(tmp);
3071 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003072}
3073
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003074PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003075"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003076bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003077bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
3078bytes(memory_view) -> bytes\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003079\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003080Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003081 - an iterable yielding integers in range(256)\n\
3082 - a text string encoded using the specified encoding\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003083 - a bytes or a buffer object\n\
3084 - any object implementing the buffer API.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003085
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003086static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003087
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003088PyTypeObject PyBytes_Type = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003089 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3090 "bytes",
3091 PyBytesObject_SIZE,
3092 sizeof(char),
3093 bytes_dealloc, /* tp_dealloc */
3094 0, /* tp_print */
3095 0, /* tp_getattr */
3096 0, /* tp_setattr */
3097 0, /* tp_reserved */
3098 (reprfunc)bytes_repr, /* tp_repr */
3099 0, /* tp_as_number */
3100 &bytes_as_sequence, /* tp_as_sequence */
3101 &bytes_as_mapping, /* tp_as_mapping */
3102 (hashfunc)bytes_hash, /* tp_hash */
3103 0, /* tp_call */
3104 bytes_str, /* tp_str */
3105 PyObject_GenericGetAttr, /* tp_getattro */
3106 0, /* tp_setattro */
3107 &bytes_as_buffer, /* tp_as_buffer */
3108 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3109 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3110 bytes_doc, /* tp_doc */
3111 0, /* tp_traverse */
3112 0, /* tp_clear */
3113 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3114 0, /* tp_weaklistoffset */
3115 bytes_iter, /* tp_iter */
3116 0, /* tp_iternext */
3117 bytes_methods, /* tp_methods */
3118 0, /* tp_members */
3119 0, /* tp_getset */
3120 &PyBaseObject_Type, /* tp_base */
3121 0, /* tp_dict */
3122 0, /* tp_descr_get */
3123 0, /* tp_descr_set */
3124 0, /* tp_dictoffset */
3125 0, /* tp_init */
3126 0, /* tp_alloc */
3127 bytes_new, /* tp_new */
3128 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003129};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003130
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003131void
3132PyBytes_Concat(register PyObject **pv, register PyObject *w)
3133{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003134 register PyObject *v;
3135 assert(pv != NULL);
3136 if (*pv == NULL)
3137 return;
3138 if (w == NULL) {
3139 Py_DECREF(*pv);
3140 *pv = NULL;
3141 return;
3142 }
3143 v = bytes_concat(*pv, w);
3144 Py_DECREF(*pv);
3145 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003146}
3147
3148void
3149PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
3150{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003151 PyBytes_Concat(pv, w);
3152 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003153}
3154
3155
3156/* The following function breaks the notion that strings are immutable:
3157 it changes the size of a string. We get away with this only if there
3158 is only one module referencing the object. You can also think of it
3159 as creating a new string object and destroying the old one, only
3160 more efficiently. In any case, don't use this if the string may
3161 already be known to some other part of the code...
3162 Note that if there's not enough memory to resize the string, the original
3163 string object at *pv is deallocated, *pv is set to NULL, an "out of
3164 memory" exception is set, and -1 is returned. Else (on success) 0 is
3165 returned, and the value in *pv may or may not be the same as on input.
3166 As always, an extra byte is allocated for a trailing \0 byte (newsize
3167 does *not* include that), and a trailing \0 byte is stored.
3168*/
3169
3170int
3171_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3172{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003173 register PyObject *v;
3174 register PyBytesObject *sv;
3175 v = *pv;
3176 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3177 *pv = 0;
3178 Py_DECREF(v);
3179 PyErr_BadInternalCall();
3180 return -1;
3181 }
3182 /* XXX UNREF/NEWREF interface should be more symmetrical */
3183 _Py_DEC_REFTOTAL;
3184 _Py_ForgetReference(v);
3185 *pv = (PyObject *)
3186 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
3187 if (*pv == NULL) {
3188 PyObject_Del(v);
3189 PyErr_NoMemory();
3190 return -1;
3191 }
3192 _Py_NewReference(*pv);
3193 sv = (PyBytesObject *) *pv;
3194 Py_SIZE(sv) = newsize;
3195 sv->ob_sval[newsize] = '\0';
3196 sv->ob_shash = -1; /* invalidate cached hash value */
3197 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003198}
3199
3200/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
3201 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3202 * Python's regular ints.
3203 * Return value: a new PyString*, or NULL if error.
3204 * . *pbuf is set to point into it,
3205 * *plen set to the # of chars following that.
3206 * Caller must decref it when done using pbuf.
3207 * The string starting at *pbuf is of the form
3208 * "-"? ("0x" | "0X")? digit+
3209 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3210 * set in flags. The case of hex digits will be correct,
3211 * There will be at least prec digits, zero-filled on the left if
3212 * necessary to get that many.
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003213 * val object to be converted
3214 * flags bitmask of format flags; only F_ALT is looked at
3215 * prec minimum number of digits; 0-fill on left if needed
3216 * type a character in [duoxX]; u acts the same as d
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003217 *
3218 * CAUTION: o, x and X conversions on regular ints can never
3219 * produce a '-' sign, but can for Python's unbounded ints.
3220 */
3221PyObject*
3222_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003223 char **pbuf, int *plen)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003224{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003225 PyObject *result = NULL;
3226 char *buf;
3227 Py_ssize_t i;
3228 int sign; /* 1 if '-', else 0 */
3229 int len; /* number of characters */
3230 Py_ssize_t llen;
3231 int numdigits; /* len == numnondigits + numdigits */
3232 int numnondigits = 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003233
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003234 /* Avoid exceeding SSIZE_T_MAX */
3235 if (prec > INT_MAX-3) {
3236 PyErr_SetString(PyExc_OverflowError,
3237 "precision too large");
3238 return NULL;
3239 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003240
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003241 switch (type) {
3242 case 'd':
3243 case 'u':
3244 /* Special-case boolean: we want 0/1 */
3245 if (PyBool_Check(val))
3246 result = PyNumber_ToBase(val, 10);
3247 else
3248 result = Py_TYPE(val)->tp_str(val);
3249 break;
3250 case 'o':
3251 numnondigits = 2;
3252 result = PyNumber_ToBase(val, 8);
3253 break;
3254 case 'x':
3255 case 'X':
3256 numnondigits = 2;
3257 result = PyNumber_ToBase(val, 16);
3258 break;
3259 default:
3260 assert(!"'type' not in [duoxX]");
3261 }
3262 if (!result)
3263 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003264
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003265 buf = _PyUnicode_AsString(result);
3266 if (!buf) {
3267 Py_DECREF(result);
3268 return NULL;
3269 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003270
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003271 /* To modify the string in-place, there can only be one reference. */
3272 if (Py_REFCNT(result) != 1) {
3273 PyErr_BadInternalCall();
3274 return NULL;
3275 }
3276 llen = PyUnicode_GetSize(result);
3277 if (llen > INT_MAX) {
3278 PyErr_SetString(PyExc_ValueError,
3279 "string too large in _PyBytes_FormatLong");
3280 return NULL;
3281 }
3282 len = (int)llen;
3283 if (buf[len-1] == 'L') {
3284 --len;
3285 buf[len] = '\0';
3286 }
3287 sign = buf[0] == '-';
3288 numnondigits += sign;
3289 numdigits = len - numnondigits;
3290 assert(numdigits > 0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003291
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003292 /* Get rid of base marker unless F_ALT */
3293 if (((flags & F_ALT) == 0 &&
3294 (type == 'o' || type == 'x' || type == 'X'))) {
3295 assert(buf[sign] == '0');
3296 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
3297 buf[sign+1] == 'o');
3298 numnondigits -= 2;
3299 buf += 2;
3300 len -= 2;
3301 if (sign)
3302 buf[0] = '-';
3303 assert(len == numnondigits + numdigits);
3304 assert(numdigits > 0);
3305 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003306
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003307 /* Fill with leading zeroes to meet minimum width. */
3308 if (prec > numdigits) {
3309 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
3310 numnondigits + prec);
3311 char *b1;
3312 if (!r1) {
3313 Py_DECREF(result);
3314 return NULL;
3315 }
3316 b1 = PyBytes_AS_STRING(r1);
3317 for (i = 0; i < numnondigits; ++i)
3318 *b1++ = *buf++;
3319 for (i = 0; i < prec - numdigits; i++)
3320 *b1++ = '0';
3321 for (i = 0; i < numdigits; i++)
3322 *b1++ = *buf++;
3323 *b1 = '\0';
3324 Py_DECREF(result);
3325 result = r1;
3326 buf = PyBytes_AS_STRING(result);
3327 len = numnondigits + prec;
3328 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003329
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003330 /* Fix up case for hex conversions. */
3331 if (type == 'X') {
3332 /* Need to convert all lower case letters to upper case.
3333 and need to convert 0x to 0X (and -0x to -0X). */
3334 for (i = 0; i < len; i++)
3335 if (buf[i] >= 'a' && buf[i] <= 'x')
3336 buf[i] -= 'a'-'A';
3337 }
3338 *pbuf = buf;
3339 *plen = len;
3340 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003341}
3342
3343void
3344PyBytes_Fini(void)
3345{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003346 int i;
3347 for (i = 0; i < UCHAR_MAX + 1; i++) {
3348 Py_XDECREF(characters[i]);
3349 characters[i] = NULL;
3350 }
3351 Py_XDECREF(nullstring);
3352 nullstring = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003353}
3354
Benjamin Peterson4116f362008-05-27 00:36:20 +00003355/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003356
3357typedef struct {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003358 PyObject_HEAD
3359 Py_ssize_t it_index;
3360 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003361} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003362
3363static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003364striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003365{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003366 _PyObject_GC_UNTRACK(it);
3367 Py_XDECREF(it->it_seq);
3368 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003369}
3370
3371static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003372striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003373{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003374 Py_VISIT(it->it_seq);
3375 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003376}
3377
3378static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003379striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003380{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003381 PyBytesObject *seq;
3382 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003383
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003384 assert(it != NULL);
3385 seq = it->it_seq;
3386 if (seq == NULL)
3387 return NULL;
3388 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003389
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003390 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3391 item = PyLong_FromLong(
3392 (unsigned char)seq->ob_sval[it->it_index]);
3393 if (item != NULL)
3394 ++it->it_index;
3395 return item;
3396 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003397
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003398 Py_DECREF(seq);
3399 it->it_seq = NULL;
3400 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003401}
3402
3403static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003404striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003405{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003406 Py_ssize_t len = 0;
3407 if (it->it_seq)
3408 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3409 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003410}
3411
3412PyDoc_STRVAR(length_hint_doc,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003413 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003414
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003415static PyMethodDef striter_methods[] = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003416 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3417 length_hint_doc},
3418 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003419};
3420
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003421PyTypeObject PyBytesIter_Type = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003422 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3423 "bytes_iterator", /* tp_name */
3424 sizeof(striterobject), /* tp_basicsize */
3425 0, /* tp_itemsize */
3426 /* methods */
3427 (destructor)striter_dealloc, /* tp_dealloc */
3428 0, /* tp_print */
3429 0, /* tp_getattr */
3430 0, /* tp_setattr */
3431 0, /* tp_reserved */
3432 0, /* tp_repr */
3433 0, /* tp_as_number */
3434 0, /* tp_as_sequence */
3435 0, /* tp_as_mapping */
3436 0, /* tp_hash */
3437 0, /* tp_call */
3438 0, /* tp_str */
3439 PyObject_GenericGetAttr, /* tp_getattro */
3440 0, /* tp_setattro */
3441 0, /* tp_as_buffer */
3442 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3443 0, /* tp_doc */
3444 (traverseproc)striter_traverse, /* tp_traverse */
3445 0, /* tp_clear */
3446 0, /* tp_richcompare */
3447 0, /* tp_weaklistoffset */
3448 PyObject_SelfIter, /* tp_iter */
3449 (iternextfunc)striter_next, /* tp_iternext */
3450 striter_methods, /* tp_methods */
3451 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003452};
3453
3454static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003455bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003456{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003457 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003458
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00003459 if (!PyBytes_Check(seq)) {
3460 PyErr_BadInternalCall();
3461 return NULL;
3462 }
3463 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3464 if (it == NULL)
3465 return NULL;
3466 it->it_index = 0;
3467 Py_INCREF(seq);
3468 it->it_seq = (PyBytesObject *)seq;
3469 _PyObject_GC_TRACK(it);
3470 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003471}