blob: 41eee40d55acfa744f2c6bec0e23f2585780cb2b [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
17 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
Christian Heimes90aa7642007-12-19 02:45:37 +000019 Py_TYPE(obj)->tp_name);
Guido van Rossuma74184e2007-08-29 04:05:57 +000020 return -1;
21 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
24 return -1;
25 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
44 For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
45 parameter `size' denotes number of characters to allocate, not counting any
46 null terminating character.
47
48 For PyBytes_FromString(), the parameter `str' points to a null-terminated
49 string containing exactly `size' bytes.
50
51 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
52 either NULL or else points to a string containing at least `size' bytes.
53 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
54 not have to be null-terminated. (Therefore it is safe to construct a
55 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
56 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
57 bytes (setting the last byte to the null terminating character) and you can
58 fill in the data yourself. If `str' is non-NULL then the resulting
59 PyString object must be treated as immutable and you must not fill in nor
60 alter the data yourself, since the strings may be shared.
61
62 The PyObject member `op->ob_size', which denotes the number of "extra
63 items" in a variable-size object, will contain the number of bytes
64 allocated for string data, not counting the null terminating character. It
65 is therefore equal to the equal to the `size' parameter (for
66 PyBytes_FromStringAndSize()) or the length of the string in the `str'
67 parameter (for PyBytes_FromString()).
68*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000069PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000071{
Christian Heimes2c9c7a52008-05-26 13:42:13 +000072 register PyBytesObject *op;
73 if (size < 0) {
74 PyErr_SetString(PyExc_SystemError,
75 "Negative size passed to PyBytes_FromStringAndSize");
76 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +000077 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000078 if (size == 0 && (op = nullstring) != NULL) {
79#ifdef COUNT_ALLOCS
80 null_strings++;
81#endif
82 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85 if (size == 1 && str != NULL &&
86 (op = characters[*str & UCHAR_MAX]) != NULL)
87 {
88#ifdef COUNT_ALLOCS
89 one_strings++;
90#endif
91 Py_INCREF(op);
92 return (PyObject *)op;
93 }
94
Mark Dickinsonfd24b322008-12-06 15:33:31 +000095 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Neal Norwitz3ce5d922008-08-24 07:08:55 +000096 PyErr_SetString(PyExc_OverflowError,
97 "byte string is too large");
98 return NULL;
99 }
100
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000101 /* Inline PyObject_NewVar */
Mark Dickinsonfd24b322008-12-06 15:33:31 +0000102 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000103 if (op == NULL)
104 return PyErr_NoMemory();
105 PyObject_INIT_VAR(op, &PyBytes_Type, size);
106 op->ob_shash = -1;
107 if (str != NULL)
108 Py_MEMCPY(op->ob_sval, str, size);
109 op->ob_sval[size] = '\0';
110 /* share short strings */
111 if (size == 0) {
112 nullstring = op;
113 Py_INCREF(op);
114 } else if (size == 1 && str != NULL) {
115 characters[*str & UCHAR_MAX] = op;
116 Py_INCREF(op);
117 }
118 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000119}
120
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000121PyObject *
122PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000123{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000124 register size_t size;
125 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000126
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000127 assert(str != NULL);
128 size = strlen(str);
Mark Dickinsonfd24b322008-12-06 15:33:31 +0000129 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000130 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000131 "byte string is too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000132 return NULL;
133 }
134 if (size == 0 && (op = nullstring) != NULL) {
135#ifdef COUNT_ALLOCS
136 null_strings++;
137#endif
138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
141 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
142#ifdef COUNT_ALLOCS
143 one_strings++;
144#endif
145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000148
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149 /* Inline PyObject_NewVar */
Mark Dickinsonfd24b322008-12-06 15:33:31 +0000150 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000151 if (op == NULL)
152 return PyErr_NoMemory();
153 PyObject_INIT_VAR(op, &PyBytes_Type, size);
154 op->ob_shash = -1;
155 Py_MEMCPY(op->ob_sval, str, size+1);
156 /* share short strings */
157 if (size == 0) {
158 nullstring = op;
159 Py_INCREF(op);
160 } else if (size == 1) {
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
165}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000166
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167PyObject *
168PyBytes_FromFormatV(const char *format, va_list vargs)
169{
170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000175
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000176#ifdef VA_LIST_IS_ARRAY
177 Py_MEMCPY(count, vargs, sizeof(va_list));
178#else
179#ifdef __va_copy
180 __va_copy(count, vargs);
181#else
182 count = vargs;
183#endif
184#endif
185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
187 if (*f == '%') {
188 const char* p = f;
189 while (*++f && *f != '%' && !ISALPHA(*f))
190 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000191
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000192 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
193 * they don't affect the amount of space we reserve.
194 */
195 if ((*f == 'l' || *f == 'z') &&
196 (f[1] == 'd' || f[1] == 'u'))
197 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000198
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000199 switch (*f) {
200 case 'c':
201 (void)va_arg(count, int);
202 /* fall through... */
203 case '%':
204 n++;
205 break;
206 case 'd': case 'u': case 'i': case 'x':
207 (void) va_arg(count, int);
208 /* 20 bytes is enough to hold a 64-bit
209 integer. Decimal takes the most space.
210 This isn't enough for octal. */
211 n += 20;
212 break;
213 case 's':
214 s = va_arg(count, char*);
215 n += strlen(s);
216 break;
217 case 'p':
218 (void) va_arg(count, int);
219 /* maximum 64-bit pointer representation:
220 * 0xffffffffffffffff
221 * so 19 characters is enough.
222 * XXX I count 18 -- what's the extra for?
223 */
224 n += 19;
225 break;
226 default:
227 /* if we stumble upon an unknown
228 formatting code, copy the rest of
229 the format string to the output
230 string. (we cannot just skip the
231 code, since there's no way to know
232 what's in the argument list) */
233 n += strlen(p);
234 goto expand;
235 }
236 } else
237 n++;
238 }
239 expand:
240 /* step 2: fill the buffer */
241 /* Since we've analyzed how much space we need for the worst case,
242 use sprintf directly instead of the slower PyOS_snprintf. */
243 string = PyBytes_FromStringAndSize(NULL, n);
244 if (!string)
245 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000246
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000247 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000248
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000249 for (f = format; *f; f++) {
250 if (*f == '%') {
251 const char* p = f++;
252 Py_ssize_t i;
253 int longflag = 0;
254 int size_tflag = 0;
255 /* parse the width.precision part (we're only
256 interested in the precision value, if any) */
257 n = 0;
258 while (ISDIGIT(*f))
259 n = (n*10) + *f++ - '0';
260 if (*f == '.') {
261 f++;
262 n = 0;
263 while (ISDIGIT(*f))
264 n = (n*10) + *f++ - '0';
265 }
266 while (*f && *f != '%' && !ISALPHA(*f))
267 f++;
268 /* handle the long flag, but only for %ld and %lu.
269 others can be added when necessary. */
270 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
271 longflag = 1;
272 ++f;
273 }
274 /* handle the size_t flag. */
275 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
276 size_tflag = 1;
277 ++f;
278 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000279
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000280 switch (*f) {
281 case 'c':
282 *s++ = va_arg(vargs, int);
283 break;
284 case 'd':
285 if (longflag)
286 sprintf(s, "%ld", va_arg(vargs, long));
287 else if (size_tflag)
288 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
289 va_arg(vargs, Py_ssize_t));
290 else
291 sprintf(s, "%d", va_arg(vargs, int));
292 s += strlen(s);
293 break;
294 case 'u':
295 if (longflag)
296 sprintf(s, "%lu",
297 va_arg(vargs, unsigned long));
298 else if (size_tflag)
299 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
300 va_arg(vargs, size_t));
301 else
302 sprintf(s, "%u",
303 va_arg(vargs, unsigned int));
304 s += strlen(s);
305 break;
306 case 'i':
307 sprintf(s, "%i", va_arg(vargs, int));
308 s += strlen(s);
309 break;
310 case 'x':
311 sprintf(s, "%x", va_arg(vargs, int));
312 s += strlen(s);
313 break;
314 case 's':
315 p = va_arg(vargs, char*);
316 i = strlen(p);
317 if (n > 0 && i > n)
318 i = n;
319 Py_MEMCPY(s, p, i);
320 s += i;
321 break;
322 case 'p':
323 sprintf(s, "%p", va_arg(vargs, void*));
324 /* %p is ill-defined: ensure leading 0x. */
325 if (s[1] == 'X')
326 s[1] = 'x';
327 else if (s[1] != 'x') {
328 memmove(s+2, s, strlen(s)+1);
329 s[0] = '0';
330 s[1] = 'x';
331 }
332 s += strlen(s);
333 break;
334 case '%':
335 *s++ = '%';
336 break;
337 default:
338 strcpy(s, p);
339 s += strlen(s);
340 goto end;
341 }
342 } else
343 *s++ = *f;
344 }
345
346 end:
347 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
348 return string;
349}
350
351PyObject *
352PyBytes_FromFormat(const char *format, ...)
353{
354 PyObject* ret;
355 va_list vargs;
356
357#ifdef HAVE_STDARG_PROTOTYPES
358 va_start(vargs, format);
359#else
360 va_start(vargs);
361#endif
362 ret = PyBytes_FromFormatV(format, vargs);
363 va_end(vargs);
364 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000365}
366
367static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000368bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000369{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000370 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000371}
372
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000373/* Unescape a backslash-escaped string. If unicode is non-zero,
374 the string is a u-literal. If recode_encoding is non-zero,
375 the string is UTF-8 encoded and should be re-encoded in the
376 specified encoding. */
377
378PyObject *PyBytes_DecodeEscape(const char *s,
379 Py_ssize_t len,
380 const char *errors,
381 Py_ssize_t unicode,
382 const char *recode_encoding)
383{
384 int c;
385 char *p, *buf;
386 const char *end;
387 PyObject *v;
388 Py_ssize_t newlen = recode_encoding ? 4*len:len;
389 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
390 if (v == NULL)
391 return NULL;
392 p = buf = PyBytes_AsString(v);
393 end = s + len;
394 while (s < end) {
395 if (*s != '\\') {
396 non_esc:
397 if (recode_encoding && (*s & 0x80)) {
398 PyObject *u, *w;
399 char *r;
400 const char* t;
401 Py_ssize_t rn;
402 t = s;
403 /* Decode non-ASCII bytes as UTF-8. */
404 while (t < end && (*t & 0x80)) t++;
405 u = PyUnicode_DecodeUTF8(s, t - s, errors);
406 if(!u) goto failed;
407
408 /* Recode them in target encoding. */
409 w = PyUnicode_AsEncodedString(
410 u, recode_encoding, errors);
411 Py_DECREF(u);
412 if (!w) goto failed;
413
414 /* Append bytes to output buffer. */
415 assert(PyBytes_Check(w));
416 r = PyBytes_AS_STRING(w);
417 rn = PyBytes_GET_SIZE(w);
418 Py_MEMCPY(p, r, rn);
419 p += rn;
420 Py_DECREF(w);
421 s = t;
422 } else {
423 *p++ = *s++;
424 }
425 continue;
426 }
427 s++;
428 if (s==end) {
429 PyErr_SetString(PyExc_ValueError,
430 "Trailing \\ in string");
431 goto failed;
432 }
433 switch (*s++) {
434 /* XXX This assumes ASCII! */
435 case '\n': break;
436 case '\\': *p++ = '\\'; break;
437 case '\'': *p++ = '\''; break;
438 case '\"': *p++ = '\"'; break;
439 case 'b': *p++ = '\b'; break;
440 case 'f': *p++ = '\014'; break; /* FF */
441 case 't': *p++ = '\t'; break;
442 case 'n': *p++ = '\n'; break;
443 case 'r': *p++ = '\r'; break;
444 case 'v': *p++ = '\013'; break; /* VT */
445 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
446 case '0': case '1': case '2': case '3':
447 case '4': case '5': case '6': case '7':
448 c = s[-1] - '0';
449 if (s < end && '0' <= *s && *s <= '7') {
450 c = (c<<3) + *s++ - '0';
451 if (s < end && '0' <= *s && *s <= '7')
452 c = (c<<3) + *s++ - '0';
453 }
454 *p++ = c;
455 break;
456 case 'x':
457 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
458 unsigned int x = 0;
459 c = Py_CHARMASK(*s);
460 s++;
461 if (ISDIGIT(c))
462 x = c - '0';
463 else if (ISLOWER(c))
464 x = 10 + c - 'a';
465 else
466 x = 10 + c - 'A';
467 x = x << 4;
468 c = Py_CHARMASK(*s);
469 s++;
470 if (ISDIGIT(c))
471 x += c - '0';
472 else if (ISLOWER(c))
473 x += 10 + c - 'a';
474 else
475 x += 10 + c - 'A';
476 *p++ = x;
477 break;
478 }
479 if (!errors || strcmp(errors, "strict") == 0) {
480 PyErr_SetString(PyExc_ValueError,
481 "invalid \\x escape");
482 goto failed;
483 }
484 if (strcmp(errors, "replace") == 0) {
485 *p++ = '?';
486 } else if (strcmp(errors, "ignore") == 0)
487 /* do nothing */;
488 else {
489 PyErr_Format(PyExc_ValueError,
490 "decoding error; unknown "
491 "error handling code: %.400s",
492 errors);
493 goto failed;
494 }
495 default:
496 *p++ = '\\';
497 s--;
498 goto non_esc; /* an arbitry number of unescaped
499 UTF-8 bytes may follow. */
500 }
501 }
502 if (p-buf < newlen)
503 _PyBytes_Resize(&v, p - buf);
504 return v;
505 failed:
506 Py_DECREF(v);
507 return NULL;
508}
509
510/* -------------------------------------------------------------------- */
511/* object api */
512
513Py_ssize_t
514PyBytes_Size(register PyObject *op)
515{
516 if (!PyBytes_Check(op)) {
517 PyErr_Format(PyExc_TypeError,
518 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
519 return -1;
520 }
521 return Py_SIZE(op);
522}
523
524char *
525PyBytes_AsString(register PyObject *op)
526{
527 if (!PyBytes_Check(op)) {
528 PyErr_Format(PyExc_TypeError,
529 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
530 return NULL;
531 }
532 return ((PyBytesObject *)op)->ob_sval;
533}
534
535int
536PyBytes_AsStringAndSize(register PyObject *obj,
537 register char **s,
538 register Py_ssize_t *len)
539{
540 if (s == NULL) {
541 PyErr_BadInternalCall();
542 return -1;
543 }
544
545 if (!PyBytes_Check(obj)) {
546 PyErr_Format(PyExc_TypeError,
547 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
548 return -1;
549 }
550
551 *s = PyBytes_AS_STRING(obj);
552 if (len != NULL)
553 *len = PyBytes_GET_SIZE(obj);
554 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
555 PyErr_SetString(PyExc_TypeError,
556 "expected bytes with no null");
557 return -1;
558 }
559 return 0;
560}
Neal Norwitz6968b052007-02-27 19:02:19 +0000561
562/* -------------------------------------------------------------------- */
563/* Methods */
564
Eric Smith0923d1d2009-04-16 20:16:10 +0000565#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000566
567#include "stringlib/fastsearch.h"
568#include "stringlib/count.h"
569#include "stringlib/find.h"
570#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000571#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000572
Eric Smith0f78bff2009-11-30 01:01:42 +0000573#define STRINGLIB_MUTABLE 0
574#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000575
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000576PyObject *
577PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000578{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000579 static const char *hexdigits = "0123456789abcdef";
580 register PyBytesObject* op = (PyBytesObject*) obj;
581 Py_ssize_t length = Py_SIZE(op);
582 size_t newsize = 3 + 4 * length;
583 PyObject *v;
584 if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
585 PyErr_SetString(PyExc_OverflowError,
586 "bytes object is too large to make repr");
587 return NULL;
588 }
589 v = PyUnicode_FromUnicode(NULL, newsize);
590 if (v == NULL) {
591 return NULL;
592 }
593 else {
594 register Py_ssize_t i;
595 register Py_UNICODE c;
596 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
597 int quote;
598
599 /* Figure out which quote to use; single is preferred */
600 quote = '\'';
601 if (smartquotes) {
602 char *test, *start;
603 start = PyBytes_AS_STRING(op);
604 for (test = start; test < start+length; ++test) {
605 if (*test == '"') {
606 quote = '\''; /* back to single */
607 goto decided;
608 }
609 else if (*test == '\'')
610 quote = '"';
611 }
612 decided:
613 ;
614 }
615
616 *p++ = 'b', *p++ = quote;
617 for (i = 0; i < length; i++) {
618 /* There's at least enough room for a hex escape
619 and a closing quote. */
620 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
621 c = op->ob_sval[i];
622 if (c == quote || c == '\\')
623 *p++ = '\\', *p++ = c;
624 else if (c == '\t')
625 *p++ = '\\', *p++ = 't';
626 else if (c == '\n')
627 *p++ = '\\', *p++ = 'n';
628 else if (c == '\r')
629 *p++ = '\\', *p++ = 'r';
630 else if (c < ' ' || c >= 0x7f) {
631 *p++ = '\\';
632 *p++ = 'x';
633 *p++ = hexdigits[(c & 0xf0) >> 4];
634 *p++ = hexdigits[c & 0xf];
635 }
636 else
637 *p++ = c;
638 }
639 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
640 *p++ = quote;
641 *p = '\0';
642 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
643 Py_DECREF(v);
644 return NULL;
645 }
646 return v;
647 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000648}
649
Neal Norwitz6968b052007-02-27 19:02:19 +0000650static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000651bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000652{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000653 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000654}
655
Neal Norwitz6968b052007-02-27 19:02:19 +0000656static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000657bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000658{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000659 if (Py_BytesWarningFlag) {
660 if (PyErr_WarnEx(PyExc_BytesWarning,
661 "str() on a bytes instance", 1))
662 return NULL;
663 }
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000664 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000665}
666
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000667static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000668bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000669{
670 return Py_SIZE(a);
671}
Neal Norwitz6968b052007-02-27 19:02:19 +0000672
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000673/* This is also used by PyBytes_Concat() */
674static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000675bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000676{
677 Py_ssize_t size;
678 Py_buffer va, vb;
679 PyObject *result = NULL;
680
681 va.len = -1;
682 vb.len = -1;
683 if (_getbuffer(a, &va) < 0 ||
684 _getbuffer(b, &vb) < 0) {
685 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
686 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
687 goto done;
688 }
689
690 /* Optimize end cases */
691 if (va.len == 0 && PyBytes_CheckExact(b)) {
692 result = b;
693 Py_INCREF(result);
694 goto done;
695 }
696 if (vb.len == 0 && PyBytes_CheckExact(a)) {
697 result = a;
698 Py_INCREF(result);
699 goto done;
700 }
701
702 size = va.len + vb.len;
703 if (size < 0) {
704 PyErr_NoMemory();
705 goto done;
706 }
707
708 result = PyBytes_FromStringAndSize(NULL, size);
709 if (result != NULL) {
710 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
711 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
712 }
713
714 done:
715 if (va.len != -1)
Martin v. Löwis423be952008-08-13 15:53:07 +0000716 PyBuffer_Release(&va);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000717 if (vb.len != -1)
Martin v. Löwis423be952008-08-13 15:53:07 +0000718 PyBuffer_Release(&vb);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000719 return result;
720}
Neal Norwitz6968b052007-02-27 19:02:19 +0000721
722static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000723bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000724{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000725 register Py_ssize_t i;
726 register Py_ssize_t j;
727 register Py_ssize_t size;
728 register PyBytesObject *op;
729 size_t nbytes;
730 if (n < 0)
731 n = 0;
732 /* watch out for overflows: the size can overflow int,
733 * and the # of bytes needed can overflow size_t
734 */
735 size = Py_SIZE(a) * n;
736 if (n && size / n != Py_SIZE(a)) {
737 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000738 "repeated bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000739 return NULL;
740 }
741 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
742 Py_INCREF(a);
743 return (PyObject *)a;
744 }
745 nbytes = (size_t)size;
Mark Dickinsonfd24b322008-12-06 15:33:31 +0000746 if (nbytes + PyBytesObject_SIZE <= nbytes) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000747 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000748 "repeated bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000749 return NULL;
750 }
Mark Dickinsonfd24b322008-12-06 15:33:31 +0000751 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000752 if (op == NULL)
753 return PyErr_NoMemory();
754 PyObject_INIT_VAR(op, &PyBytes_Type, size);
755 op->ob_shash = -1;
756 op->ob_sval[size] = '\0';
757 if (Py_SIZE(a) == 1 && n > 0) {
758 memset(op->ob_sval, a->ob_sval[0] , n);
759 return (PyObject *) op;
760 }
761 i = 0;
762 if (i < size) {
763 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
764 i = Py_SIZE(a);
765 }
766 while (i < size) {
767 j = (i <= size-i) ? i : size-i;
768 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
769 i += j;
770 }
771 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000772}
773
Guido van Rossum98297ee2007-11-06 21:34:58 +0000774static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000775bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000776{
777 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
778 if (ival == -1 && PyErr_Occurred()) {
779 Py_buffer varg;
780 int pos;
781 PyErr_Clear();
782 if (_getbuffer(arg, &varg) < 0)
783 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000784 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
Guido van Rossum98297ee2007-11-06 21:34:58 +0000785 varg.buf, varg.len, 0);
Martin v. Löwis423be952008-08-13 15:53:07 +0000786 PyBuffer_Release(&varg);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000787 return pos >= 0;
788 }
789 if (ival < 0 || ival >= 256) {
790 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
791 return -1;
792 }
793
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000794 return memchr(PyBytes_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000795}
796
Neal Norwitz6968b052007-02-27 19:02:19 +0000797static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000798bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000799{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000800 if (i < 0 || i >= Py_SIZE(a)) {
Benjamin Peterson4116f362008-05-27 00:36:20 +0000801 PyErr_SetString(PyExc_IndexError, "index out of range");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000802 return NULL;
803 }
804 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000805}
806
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000807static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000808bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000809{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000810 int c;
811 Py_ssize_t len_a, len_b;
812 Py_ssize_t min_len;
813 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000814
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000815 /* Make sure both arguments are strings. */
816 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Barry Warsaw9e9dcd62008-10-17 01:50:37 +0000817 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000818 (PyObject_IsInstance((PyObject*)a,
819 (PyObject*)&PyUnicode_Type) ||
820 PyObject_IsInstance((PyObject*)b,
821 (PyObject*)&PyUnicode_Type))) {
822 if (PyErr_WarnEx(PyExc_BytesWarning,
Georg Brandle5d68ac2008-06-04 11:30:26 +0000823 "Comparison between bytes and string", 1))
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000824 return NULL;
825 }
826 result = Py_NotImplemented;
827 goto out;
828 }
829 if (a == b) {
830 switch (op) {
831 case Py_EQ:case Py_LE:case Py_GE:
832 result = Py_True;
833 goto out;
834 case Py_NE:case Py_LT:case Py_GT:
835 result = Py_False;
836 goto out;
837 }
838 }
839 if (op == Py_EQ) {
840 /* Supporting Py_NE here as well does not save
841 much time, since Py_NE is rarely used. */
842 if (Py_SIZE(a) == Py_SIZE(b)
843 && (a->ob_sval[0] == b->ob_sval[0]
844 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
845 result = Py_True;
846 } else {
847 result = Py_False;
848 }
849 goto out;
850 }
851 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
852 min_len = (len_a < len_b) ? len_a : len_b;
853 if (min_len > 0) {
854 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
855 if (c==0)
856 c = memcmp(a->ob_sval, b->ob_sval, min_len);
857 } else
858 c = 0;
859 if (c == 0)
860 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
861 switch (op) {
862 case Py_LT: c = c < 0; break;
863 case Py_LE: c = c <= 0; break;
864 case Py_EQ: assert(0); break; /* unreachable */
865 case Py_NE: c = c != 0; break;
866 case Py_GT: c = c > 0; break;
867 case Py_GE: c = c >= 0; break;
868 default:
869 result = Py_NotImplemented;
870 goto out;
871 }
872 result = c ? Py_True : Py_False;
873 out:
874 Py_INCREF(result);
875 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000876}
877
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000878static long
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000879bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000880{
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000881 register Py_ssize_t len;
882 register unsigned char *p;
883 register long x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000884
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000885 if (a->ob_shash != -1)
886 return a->ob_shash;
887 len = Py_SIZE(a);
888 p = (unsigned char *) a->ob_sval;
889 x = *p << 7;
890 while (--len >= 0)
891 x = (1000003*x) ^ *p++;
892 x ^= Py_SIZE(a);
893 if (x == -1)
894 x = -2;
895 a->ob_shash = x;
896 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000897}
898
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000899static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000900bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000901{
902 if (PyIndex_Check(item)) {
903 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
904 if (i == -1 && PyErr_Occurred())
905 return NULL;
906 if (i < 0)
907 i += PyBytes_GET_SIZE(self);
908 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
909 PyErr_SetString(PyExc_IndexError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000910 "index out of range");
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000911 return NULL;
912 }
913 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
914 }
915 else if (PySlice_Check(item)) {
916 Py_ssize_t start, stop, step, slicelength, cur, i;
917 char* source_buf;
918 char* result_buf;
919 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000920
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000921 if (PySlice_GetIndicesEx((PySliceObject*)item,
922 PyBytes_GET_SIZE(self),
923 &start, &stop, &step, &slicelength) < 0) {
924 return NULL;
925 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000926
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000927 if (slicelength <= 0) {
928 return PyBytes_FromStringAndSize("", 0);
929 }
930 else if (start == 0 && step == 1 &&
931 slicelength == PyBytes_GET_SIZE(self) &&
932 PyBytes_CheckExact(self)) {
933 Py_INCREF(self);
934 return (PyObject *)self;
935 }
936 else if (step == 1) {
937 return PyBytes_FromStringAndSize(
938 PyBytes_AS_STRING(self) + start,
939 slicelength);
940 }
941 else {
Alexandre Vassalottie2641f42009-04-03 06:38:02 +0000942 source_buf = PyBytes_AS_STRING(self);
943 result = PyBytes_FromStringAndSize(NULL, slicelength);
944 if (result == NULL)
945 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000946
Alexandre Vassalottie2641f42009-04-03 06:38:02 +0000947 result_buf = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000948 for (cur = start, i = 0; i < slicelength;
949 cur += step, i++) {
950 result_buf[i] = source_buf[cur];
951 }
952
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000953 return result;
954 }
955 }
956 else {
957 PyErr_Format(PyExc_TypeError,
Benjamin Peterson4116f362008-05-27 00:36:20 +0000958 "byte indices must be integers, not %.200s",
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000959 Py_TYPE(item)->tp_name);
960 return NULL;
961 }
962}
963
964static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000965bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000966{
Martin v. Löwis423be952008-08-13 15:53:07 +0000967 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
Antoine Pitrou2f89aa62008-08-02 21:02:48 +0000968 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000969}
970
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000971static PySequenceMethods bytes_as_sequence = {
972 (lenfunc)bytes_length, /*sq_length*/
973 (binaryfunc)bytes_concat, /*sq_concat*/
974 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
975 (ssizeargfunc)bytes_item, /*sq_item*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000976 0, /*sq_slice*/
977 0, /*sq_ass_item*/
978 0, /*sq_ass_slice*/
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000979 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000980};
981
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000982static PyMappingMethods bytes_as_mapping = {
983 (lenfunc)bytes_length,
984 (binaryfunc)bytes_subscript,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000985 0,
986};
987
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000988static PyBufferProcs bytes_as_buffer = {
989 (getbufferproc)bytes_buffer_getbuffer,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000990 NULL,
991};
992
993
994#define LEFTSTRIP 0
995#define RIGHTSTRIP 1
996#define BOTHSTRIP 2
997
998/* Arrays indexed by above */
999static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1000
1001#define STRIPNAME(i) (stripformat[i]+3)
1002
Neal Norwitz6968b052007-02-27 19:02:19 +00001003
1004/* Don't call if length < 2 */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001005#define Py_STRING_MATCH(target, offset, pattern, length) \
1006 (target[offset] == pattern[0] && \
1007 target[offset+length-1] == pattern[length-1] && \
Neal Norwitz6968b052007-02-27 19:02:19 +00001008 !memcmp(target+offset+1, pattern+1, length-2) )
1009
1010
Neal Norwitz6968b052007-02-27 19:02:19 +00001011/* Overallocate the initial list to reduce the number of reallocs for small
1012 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1013 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1014 text (roughly 11 words per line) and field delimited data (usually 1-10
1015 fields). For large strings the split algorithms are bandwidth limited
1016 so increasing the preallocation likely will not improve things.*/
1017
1018#define MAX_PREALLOC 12
1019
1020/* 5 splits gives 6 elements */
1021#define PREALLOC_SIZE(maxsplit) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001022 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Neal Norwitz6968b052007-02-27 19:02:19 +00001023
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001024#define SPLIT_ADD(data, left, right) { \
1025 str = PyBytes_FromStringAndSize((data) + (left), \
1026 (right) - (left)); \
1027 if (str == NULL) \
1028 goto onError; \
1029 if (count < MAX_PREALLOC) { \
1030 PyList_SET_ITEM(list, count, str); \
1031 } else { \
1032 if (PyList_Append(list, str)) { \
1033 Py_DECREF(str); \
1034 goto onError; \
1035 } \
1036 else \
1037 Py_DECREF(str); \
1038 } \
1039 count++; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001040
1041/* Always force the list to the expected size. */
Christian Heimes90aa7642007-12-19 02:45:37 +00001042#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Neal Norwitz6968b052007-02-27 19:02:19 +00001043
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001044#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1045#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1046#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1047#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Neal Norwitz6968b052007-02-27 19:02:19 +00001048
1049Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001050split_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001051{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001052 const char *s = PyBytes_AS_STRING(self);
1053 Py_ssize_t i, j, count=0;
1054 PyObject *str;
1055 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001056
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001057 if (list == NULL)
1058 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001059
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001060 i = j = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00001061
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001062 while (maxsplit-- > 0) {
1063 SKIP_SPACE(s, i, len);
1064 if (i==len) break;
1065 j = i; i++;
1066 SKIP_NONSPACE(s, i, len);
1067 if (j == 0 && i == len && PyBytes_CheckExact(self)) {
1068 /* No whitespace in self, so just use it as list[0] */
1069 Py_INCREF(self);
1070 PyList_SET_ITEM(list, 0, (PyObject *)self);
1071 count++;
1072 break;
1073 }
1074 SPLIT_ADD(s, j, i);
1075 }
1076
1077 if (i < len) {
1078 /* Only occurs when maxsplit was reached */
1079 /* Skip any remaining whitespace and copy to end of string */
1080 SKIP_SPACE(s, i, len);
1081 if (i != len)
1082 SPLIT_ADD(s, i, len);
1083 }
1084 FIX_PREALLOC_SIZE(list);
1085 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001086 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001087 Py_DECREF(list);
1088 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001089}
1090
Guido van Rossum8f950672007-09-10 16:53:45 +00001091Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001092split_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001093{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001094 const char *s = PyBytes_AS_STRING(self);
1095 register Py_ssize_t i, j, count=0;
1096 PyObject *str;
1097 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001098
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001099 if (list == NULL)
1100 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001101
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001102 i = j = 0;
1103 while ((j < len) && (maxcount-- > 0)) {
1104 for(; j<len; j++) {
1105 /* I found that using memchr makes no difference */
1106 if (s[j] == ch) {
1107 SPLIT_ADD(s, i, j);
1108 i = j = j + 1;
1109 break;
1110 }
1111 }
1112 }
1113 if (i == 0 && count == 0 && PyBytes_CheckExact(self)) {
1114 /* ch not in self, so just use self as list[0] */
1115 Py_INCREF(self);
1116 PyList_SET_ITEM(list, 0, (PyObject *)self);
1117 count++;
1118 }
1119 else if (i <= len) {
1120 SPLIT_ADD(s, i, len);
1121 }
1122 FIX_PREALLOC_SIZE(list);
1123 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001124
1125 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001126 Py_DECREF(list);
1127 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001128}
1129
Neal Norwitz6968b052007-02-27 19:02:19 +00001130PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001131"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001132\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001133Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001134If sep is not specified or is None, B is split on ASCII whitespace\n\
1135characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001136If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001137
1138static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001139bytes_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001140{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001141 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1142 Py_ssize_t maxsplit = -1, count=0;
1143 const char *s = PyBytes_AS_STRING(self), *sub;
1144 Py_buffer vsub;
1145 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001146#ifdef USE_FAST
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001147 Py_ssize_t pos;
Neal Norwitz6968b052007-02-27 19:02:19 +00001148#endif
1149
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001150 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1151 return NULL;
1152 if (maxsplit < 0)
1153 maxsplit = PY_SSIZE_T_MAX;
1154 if (subobj == Py_None)
1155 return split_whitespace(self, len, maxsplit);
1156 if (_getbuffer(subobj, &vsub) < 0)
1157 return NULL;
1158 sub = vsub.buf;
1159 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001160
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001161 if (n == 0) {
1162 PyErr_SetString(PyExc_ValueError, "empty separator");
Martin v. Löwis423be952008-08-13 15:53:07 +00001163 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001164 return NULL;
1165 }
Amaury Forgeot d'Arc20443f32008-08-22 22:05:20 +00001166 else if (n == 1) {
1167 list = split_char(self, len, sub[0], maxsplit);
1168 PyBuffer_Release(&vsub);
1169 return list;
1170 }
Guido van Rossum8f950672007-09-10 16:53:45 +00001171
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001172 list = PyList_New(PREALLOC_SIZE(maxsplit));
1173 if (list == NULL) {
Martin v. Löwis423be952008-08-13 15:53:07 +00001174 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001175 return NULL;
1176 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001177
1178#ifdef USE_FAST
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001179 i = j = 0;
1180 while (maxsplit-- > 0) {
1181 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1182 if (pos < 0)
1183 break;
1184 j = i+pos;
1185 SPLIT_ADD(s, i, j);
1186 i = j + n;
1187 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001188#else
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001189 i = j = 0;
1190 while ((j+n <= len) && (maxsplit-- > 0)) {
1191 for (; j+n <= len; j++) {
1192 if (Py_STRING_MATCH(s, j, sub, n)) {
1193 SPLIT_ADD(s, i, j);
1194 i = j = j + n;
1195 break;
1196 }
1197 }
1198 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001199#endif
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001200 SPLIT_ADD(s, i, len);
1201 FIX_PREALLOC_SIZE(list);
Martin v. Löwis423be952008-08-13 15:53:07 +00001202 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001203 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001204
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001205 onError:
1206 Py_DECREF(list);
Martin v. Löwis423be952008-08-13 15:53:07 +00001207 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001208 return NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001209}
1210
Neal Norwitz6968b052007-02-27 19:02:19 +00001211PyDoc_STRVAR(partition__doc__,
1212"B.partition(sep) -> (head, sep, tail)\n\
1213\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001214Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001215the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001216found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001217
1218static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001219bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001220{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001221 const char *sep;
1222 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001223
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001224 if (PyBytes_Check(sep_obj)) {
1225 sep = PyBytes_AS_STRING(sep_obj);
1226 sep_len = PyBytes_GET_SIZE(sep_obj);
1227 }
1228 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1229 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001230
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001231 return stringlib_partition(
1232 (PyObject*) self,
1233 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1234 sep_obj, sep, sep_len
1235 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001236}
1237
1238PyDoc_STRVAR(rpartition__doc__,
1239"B.rpartition(sep) -> (tail, sep, head)\n\
1240\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001241Search for the separator sep in B, starting at the end of B,\n\
1242and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001243part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001244bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001245
1246static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001247bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001248{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001249 const char *sep;
1250 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001251
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001252 if (PyBytes_Check(sep_obj)) {
1253 sep = PyBytes_AS_STRING(sep_obj);
1254 sep_len = PyBytes_GET_SIZE(sep_obj);
1255 }
1256 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1257 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001258
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001259 return stringlib_rpartition(
1260 (PyObject*) self,
1261 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1262 sep_obj, sep, sep_len
1263 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001264}
1265
1266Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001267rsplit_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Neal Norwitz6968b052007-02-27 19:02:19 +00001268{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001269 const char *s = PyBytes_AS_STRING(self);
1270 Py_ssize_t i, j, count=0;
1271 PyObject *str;
1272 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Neal Norwitz6968b052007-02-27 19:02:19 +00001273
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001274 if (list == NULL)
1275 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001276
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001277 i = j = len-1;
Neal Norwitz6968b052007-02-27 19:02:19 +00001278
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001279 while (maxsplit-- > 0) {
1280 RSKIP_SPACE(s, i);
1281 if (i<0) break;
1282 j = i; i--;
1283 RSKIP_NONSPACE(s, i);
1284 if (j == len-1 && i < 0 && PyBytes_CheckExact(self)) {
1285 /* No whitespace in self, so just use it as list[0] */
1286 Py_INCREF(self);
1287 PyList_SET_ITEM(list, 0, (PyObject *)self);
1288 count++;
1289 break;
1290 }
1291 SPLIT_ADD(s, i + 1, j + 1);
1292 }
1293 if (i >= 0) {
1294 /* Only occurs when maxsplit was reached. Skip any remaining
1295 whitespace and copy to beginning of string. */
1296 RSKIP_SPACE(s, i);
1297 if (i >= 0)
1298 SPLIT_ADD(s, 0, i + 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001299
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001300 }
1301 FIX_PREALLOC_SIZE(list);
1302 if (PyList_Reverse(list) < 0)
1303 goto onError;
1304 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001305 onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001306 Py_DECREF(list);
1307 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001308}
1309
Guido van Rossum8f950672007-09-10 16:53:45 +00001310Py_LOCAL_INLINE(PyObject *)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001311rsplit_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Guido van Rossum8f950672007-09-10 16:53:45 +00001312{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001313 const char *s = PyBytes_AS_STRING(self);
1314 register Py_ssize_t i, j, count=0;
1315 PyObject *str;
1316 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Guido van Rossum8f950672007-09-10 16:53:45 +00001317
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001318 if (list == NULL)
1319 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001320
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001321 i = j = len - 1;
1322 while ((i >= 0) && (maxcount-- > 0)) {
1323 for (; i >= 0; i--) {
1324 if (s[i] == ch) {
1325 SPLIT_ADD(s, i + 1, j + 1);
1326 j = i = i - 1;
1327 break;
1328 }
1329 }
1330 }
1331 if (i < 0 && count == 0 && PyBytes_CheckExact(self)) {
1332 /* ch not in self, so just use self as list[0] */
1333 Py_INCREF(self);
1334 PyList_SET_ITEM(list, 0, (PyObject *)self);
1335 count++;
1336 }
1337 else if (j >= -1) {
1338 SPLIT_ADD(s, 0, j + 1);
1339 }
1340 FIX_PREALLOC_SIZE(list);
1341 if (PyList_Reverse(list) < 0)
1342 goto onError;
1343 return list;
Guido van Rossum8f950672007-09-10 16:53:45 +00001344
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001345 onError:
1346 Py_DECREF(list);
1347 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00001348}
1349
Neal Norwitz6968b052007-02-27 19:02:19 +00001350PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001351"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001352\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001353Return a list of the sections in B, using sep as the delimiter,\n\
1354starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001355If sep is not given, B is split on ASCII whitespace characters\n\
1356(space, tab, return, newline, formfeed, vertical tab).\n\
1357If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001358
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001359
Neal Norwitz6968b052007-02-27 19:02:19 +00001360static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001361bytes_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001362{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001363 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
1364 Py_ssize_t maxsplit = -1, count=0;
1365 const char *s, *sub;
1366 Py_buffer vsub;
1367 PyObject *list, *str, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001368
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001369 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1370 return NULL;
1371 if (maxsplit < 0)
1372 maxsplit = PY_SSIZE_T_MAX;
1373 if (subobj == Py_None)
1374 return rsplit_whitespace(self, len, maxsplit);
1375 if (_getbuffer(subobj, &vsub) < 0)
1376 return NULL;
1377 sub = vsub.buf;
1378 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001379
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001380 if (n == 0) {
1381 PyErr_SetString(PyExc_ValueError, "empty separator");
Martin v. Löwis423be952008-08-13 15:53:07 +00001382 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001383 return NULL;
1384 }
Amaury Forgeot d'Arc20443f32008-08-22 22:05:20 +00001385 else if (n == 1) {
1386 list = rsplit_char(self, len, sub[0], maxsplit);
1387 PyBuffer_Release(&vsub);
1388 return list;
1389 }
Guido van Rossum8f950672007-09-10 16:53:45 +00001390
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001391 list = PyList_New(PREALLOC_SIZE(maxsplit));
1392 if (list == NULL) {
Martin v. Löwis423be952008-08-13 15:53:07 +00001393 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001394 return NULL;
1395 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001396
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001397 j = len;
1398 i = j - n;
Neal Norwitz6968b052007-02-27 19:02:19 +00001399
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001400 s = PyBytes_AS_STRING(self);
1401 while ( (i >= 0) && (maxsplit-- > 0) ) {
1402 for (; i>=0; i--) {
1403 if (Py_STRING_MATCH(s, i, sub, n)) {
1404 SPLIT_ADD(s, i + n, j);
1405 j = i;
1406 i -= n;
1407 break;
1408 }
1409 }
1410 }
1411 SPLIT_ADD(s, 0, j);
1412 FIX_PREALLOC_SIZE(list);
1413 if (PyList_Reverse(list) < 0)
1414 goto onError;
Martin v. Löwis423be952008-08-13 15:53:07 +00001415 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001416 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001417
1418onError:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001419 Py_DECREF(list);
Martin v. Löwis423be952008-08-13 15:53:07 +00001420 PyBuffer_Release(&vsub);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001421 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001422}
1423
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001424#undef SPLIT_ADD
1425#undef MAX_PREALLOC
1426#undef PREALLOC_SIZE
1427
1428
1429PyDoc_STRVAR(join__doc__,
1430"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001431\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001432Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001433Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1434
Neal Norwitz6968b052007-02-27 19:02:19 +00001435static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001436bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001437{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001438 char *sep = PyBytes_AS_STRING(self);
1439 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1440 PyObject *res = NULL;
1441 char *p;
1442 Py_ssize_t seqlen = 0;
1443 size_t sz = 0;
1444 Py_ssize_t i;
1445 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001446
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001447 seq = PySequence_Fast(orig, "");
1448 if (seq == NULL) {
1449 return NULL;
1450 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001451
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001452 seqlen = PySequence_Size(seq);
1453 if (seqlen == 0) {
1454 Py_DECREF(seq);
1455 return PyBytes_FromString("");
1456 }
1457 if (seqlen == 1) {
1458 item = PySequence_Fast_GET_ITEM(seq, 0);
1459 if (PyBytes_CheckExact(item)) {
1460 Py_INCREF(item);
1461 Py_DECREF(seq);
1462 return item;
1463 }
1464 }
1465
1466 /* There are at least two things to join, or else we have a subclass
1467 * of the builtin types in the sequence.
1468 * Do a pre-pass to figure out the total amount of space we'll
1469 * need (sz), and see whether all argument are bytes.
1470 */
1471 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1472 for (i = 0; i < seqlen; i++) {
1473 const size_t old_sz = sz;
1474 item = PySequence_Fast_GET_ITEM(seq, i);
1475 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1476 PyErr_Format(PyExc_TypeError,
1477 "sequence item %zd: expected bytes,"
1478 " %.80s found",
1479 i, Py_TYPE(item)->tp_name);
1480 Py_DECREF(seq);
1481 return NULL;
1482 }
1483 sz += Py_SIZE(item);
1484 if (i != 0)
1485 sz += seplen;
1486 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1487 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001488 "join() result is too long for bytes");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001489 Py_DECREF(seq);
1490 return NULL;
1491 }
1492 }
1493
1494 /* Allocate result space. */
1495 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1496 if (res == NULL) {
1497 Py_DECREF(seq);
1498 return NULL;
1499 }
1500
1501 /* Catenate everything. */
1502 /* I'm not worried about a PyByteArray item growing because there's
1503 nowhere in this function where we release the GIL. */
1504 p = PyBytes_AS_STRING(res);
1505 for (i = 0; i < seqlen; ++i) {
1506 size_t n;
1507 char *q;
1508 if (i) {
1509 Py_MEMCPY(p, sep, seplen);
1510 p += seplen;
1511 }
1512 item = PySequence_Fast_GET_ITEM(seq, i);
1513 n = Py_SIZE(item);
1514 if (PyBytes_Check(item))
1515 q = PyBytes_AS_STRING(item);
1516 else
1517 q = PyByteArray_AS_STRING(item);
1518 Py_MEMCPY(p, q, n);
1519 p += n;
1520 }
1521
1522 Py_DECREF(seq);
1523 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001524}
1525
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001526PyObject *
1527_PyBytes_Join(PyObject *sep, PyObject *x)
1528{
1529 assert(sep != NULL && PyBytes_Check(sep));
1530 assert(x != NULL);
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001531 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001532}
1533
1534Py_LOCAL_INLINE(void)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001535bytes_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001536{
1537 if (*end > len)
1538 *end = len;
1539 else if (*end < 0)
1540 *end += len;
1541 if (*end < 0)
1542 *end = 0;
1543 if (*start < 0)
1544 *start += len;
1545 if (*start < 0)
1546 *start = 0;
1547}
1548
1549Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001550bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001551{
1552 PyObject *subobj;
1553 const char *sub;
1554 Py_ssize_t sub_len;
1555 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1556 PyObject *obj_start=Py_None, *obj_end=Py_None;
1557
1558 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1559 &obj_start, &obj_end))
1560 return -2;
1561 /* To support None in "start" and "end" arguments, meaning
1562 the same as if they were not passed.
1563 */
1564 if (obj_start != Py_None)
1565 if (!_PyEval_SliceIndex(obj_start, &start))
1566 return -2;
1567 if (obj_end != Py_None)
1568 if (!_PyEval_SliceIndex(obj_end, &end))
1569 return -2;
1570
1571 if (PyBytes_Check(subobj)) {
1572 sub = PyBytes_AS_STRING(subobj);
1573 sub_len = PyBytes_GET_SIZE(subobj);
1574 }
1575 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1576 /* XXX - the "expected a character buffer object" is pretty
1577 confusing for a non-expert. remap to something else ? */
1578 return -2;
1579
1580 if (dir > 0)
1581 return stringlib_find_slice(
1582 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1583 sub, sub_len, start, end);
1584 else
1585 return stringlib_rfind_slice(
1586 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1587 sub, sub_len, start, end);
1588}
1589
1590
1591PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001592"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001593\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001594Return the lowest index in S where substring sub is found,\n\
1595such that sub is contained within s[start:end]. Optional\n\
1596arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001597\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001598Return -1 on failure.");
1599
Neal Norwitz6968b052007-02-27 19:02:19 +00001600static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001601bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001602{
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001603 Py_ssize_t result = bytes_find_internal(self, args, +1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001604 if (result == -2)
1605 return NULL;
1606 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001607}
1608
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001609
1610PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001611"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001612\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001613Like B.find() but raise ValueError when the substring is not found.");
1614
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001615static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001616bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001617{
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001618 Py_ssize_t result = bytes_find_internal(self, args, +1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001619 if (result == -2)
1620 return NULL;
1621 if (result == -1) {
1622 PyErr_SetString(PyExc_ValueError,
1623 "substring not found");
1624 return NULL;
1625 }
1626 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001627}
1628
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001629
1630PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001631"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001632\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001633Return the highest index in B where substring sub is found,\n\
1634such that sub is contained within s[start:end]. Optional\n\
1635arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001636\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001637Return -1 on failure.");
1638
Neal Norwitz6968b052007-02-27 19:02:19 +00001639static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001640bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001641{
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001642 Py_ssize_t result = bytes_find_internal(self, args, -1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001643 if (result == -2)
1644 return NULL;
1645 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001646}
1647
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001648
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001649PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001650"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001651\n\
1652Like B.rfind() but raise ValueError when the substring is not found.");
1653
1654static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001655bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001656{
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001657 Py_ssize_t result = bytes_find_internal(self, args, -1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001658 if (result == -2)
1659 return NULL;
1660 if (result == -1) {
1661 PyErr_SetString(PyExc_ValueError,
1662 "substring not found");
1663 return NULL;
1664 }
1665 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001666}
1667
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001668
1669Py_LOCAL_INLINE(PyObject *)
1670do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001671{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001672 Py_buffer vsep;
1673 char *s = PyBytes_AS_STRING(self);
1674 Py_ssize_t len = PyBytes_GET_SIZE(self);
1675 char *sep;
1676 Py_ssize_t seplen;
1677 Py_ssize_t i, j;
1678
1679 if (_getbuffer(sepobj, &vsep) < 0)
1680 return NULL;
1681 sep = vsep.buf;
1682 seplen = vsep.len;
1683
1684 i = 0;
1685 if (striptype != RIGHTSTRIP) {
1686 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1687 i++;
1688 }
1689 }
1690
1691 j = len;
1692 if (striptype != LEFTSTRIP) {
1693 do {
1694 j--;
1695 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1696 j++;
1697 }
1698
Martin v. Löwis423be952008-08-13 15:53:07 +00001699 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001700
1701 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1702 Py_INCREF(self);
1703 return (PyObject*)self;
1704 }
1705 else
1706 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001707}
1708
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001709
1710Py_LOCAL_INLINE(PyObject *)
1711do_strip(PyBytesObject *self, int striptype)
1712{
1713 char *s = PyBytes_AS_STRING(self);
1714 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1715
1716 i = 0;
1717 if (striptype != RIGHTSTRIP) {
1718 while (i < len && ISSPACE(s[i])) {
1719 i++;
1720 }
1721 }
1722
1723 j = len;
1724 if (striptype != LEFTSTRIP) {
1725 do {
1726 j--;
1727 } while (j >= i && ISSPACE(s[j]));
1728 j++;
1729 }
1730
1731 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1732 Py_INCREF(self);
1733 return (PyObject*)self;
1734 }
1735 else
1736 return PyBytes_FromStringAndSize(s+i, j-i);
1737}
1738
1739
1740Py_LOCAL_INLINE(PyObject *)
1741do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1742{
1743 PyObject *sep = NULL;
1744
1745 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1746 return NULL;
1747
1748 if (sep != NULL && sep != Py_None) {
1749 return do_xstrip(self, striptype, sep);
1750 }
1751 return do_strip(self, striptype);
1752}
1753
1754
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001755PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001756"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001757\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001758Strip leading and trailing bytes contained in the argument.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001759If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001760static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001761bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001762{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001763 if (PyTuple_GET_SIZE(args) == 0)
1764 return do_strip(self, BOTHSTRIP); /* Common case */
1765 else
1766 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001767}
1768
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001769
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001770PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001771"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001772\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001773Strip leading bytes contained in the argument.\n\
1774If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001775static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001776bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001777{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001778 if (PyTuple_GET_SIZE(args) == 0)
1779 return do_strip(self, LEFTSTRIP); /* Common case */
1780 else
1781 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001782}
1783
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001784
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001785PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001786"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001787\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001788Strip trailing bytes contained in the argument.\n\
1789If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001790static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001791bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001792{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001793 if (PyTuple_GET_SIZE(args) == 0)
1794 return do_strip(self, RIGHTSTRIP); /* Common case */
1795 else
1796 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001797}
Neal Norwitz6968b052007-02-27 19:02:19 +00001798
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001799
1800PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001801"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001802\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001803Return the number of non-overlapping occurrences of substring sub in\n\
1804string S[start:end]. Optional arguments start and end are interpreted\n\
1805as in slice notation.");
1806
1807static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001808bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001809{
1810 PyObject *sub_obj;
1811 const char *str = PyBytes_AS_STRING(self), *sub;
1812 Py_ssize_t sub_len;
1813 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1814
1815 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1816 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1817 return NULL;
1818
1819 if (PyBytes_Check(sub_obj)) {
1820 sub = PyBytes_AS_STRING(sub_obj);
1821 sub_len = PyBytes_GET_SIZE(sub_obj);
1822 }
1823 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1824 return NULL;
1825
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001826 bytes_adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001827
1828 return PyLong_FromSsize_t(
1829 stringlib_count(str + start, end - start, sub, sub_len)
1830 );
1831}
1832
1833
1834PyDoc_STRVAR(translate__doc__,
1835"B.translate(table[, deletechars]) -> bytes\n\
1836\n\
1837Return a copy of B, where all characters occurring in the\n\
1838optional argument deletechars are removed, and the remaining\n\
1839characters have been mapped through the given translation\n\
1840table, which must be a bytes object of length 256.");
1841
1842static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001843bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001844{
1845 register char *input, *output;
1846 const char *table;
1847 register Py_ssize_t i, c, changed = 0;
1848 PyObject *input_obj = (PyObject*)self;
1849 const char *output_start, *del_table=NULL;
1850 Py_ssize_t inlen, tablen, dellen = 0;
1851 PyObject *result;
1852 int trans_table[256];
1853 PyObject *tableobj, *delobj = NULL;
1854
1855 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1856 &tableobj, &delobj))
1857 return NULL;
1858
1859 if (PyBytes_Check(tableobj)) {
1860 table = PyBytes_AS_STRING(tableobj);
1861 tablen = PyBytes_GET_SIZE(tableobj);
1862 }
1863 else if (tableobj == Py_None) {
1864 table = NULL;
1865 tablen = 256;
1866 }
1867 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1868 return NULL;
1869
1870 if (tablen != 256) {
1871 PyErr_SetString(PyExc_ValueError,
1872 "translation table must be 256 characters long");
1873 return NULL;
1874 }
1875
1876 if (delobj != NULL) {
1877 if (PyBytes_Check(delobj)) {
1878 del_table = PyBytes_AS_STRING(delobj);
1879 dellen = PyBytes_GET_SIZE(delobj);
1880 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001881 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1882 return NULL;
1883 }
1884 else {
1885 del_table = NULL;
1886 dellen = 0;
1887 }
1888
1889 inlen = PyBytes_GET_SIZE(input_obj);
1890 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1891 if (result == NULL)
1892 return NULL;
1893 output_start = output = PyBytes_AsString(result);
1894 input = PyBytes_AS_STRING(input_obj);
1895
1896 if (dellen == 0 && table != NULL) {
1897 /* If no deletions are required, use faster code */
1898 for (i = inlen; --i >= 0; ) {
1899 c = Py_CHARMASK(*input++);
1900 if (Py_CHARMASK((*output++ = table[c])) != c)
1901 changed = 1;
1902 }
1903 if (changed || !PyBytes_CheckExact(input_obj))
1904 return result;
1905 Py_DECREF(result);
1906 Py_INCREF(input_obj);
1907 return input_obj;
1908 }
1909
1910 if (table == NULL) {
1911 for (i = 0; i < 256; i++)
1912 trans_table[i] = Py_CHARMASK(i);
1913 } else {
1914 for (i = 0; i < 256; i++)
1915 trans_table[i] = Py_CHARMASK(table[i]);
1916 }
1917
1918 for (i = 0; i < dellen; i++)
1919 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1920
1921 for (i = inlen; --i >= 0; ) {
1922 c = Py_CHARMASK(*input++);
1923 if (trans_table[c] != -1)
1924 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1925 continue;
1926 changed = 1;
1927 }
1928 if (!changed && PyBytes_CheckExact(input_obj)) {
1929 Py_DECREF(result);
1930 Py_INCREF(input_obj);
1931 return input_obj;
1932 }
1933 /* Fix the size of the resulting string */
1934 if (inlen > 0)
1935 _PyBytes_Resize(&result, output - output_start);
1936 return result;
1937}
1938
1939
Georg Brandlabc38772009-04-12 15:51:51 +00001940static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001941bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001942{
1943 return _Py_bytes_maketrans(args);
1944}
1945
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001946#define FORWARD 1
1947#define REVERSE -1
1948
1949/* find and count characters and substrings */
1950
1951#define findchar(target, target_len, c) \
1952 ((char *)memchr((const void *)(target), c, target_len))
1953
1954/* String ops must return a string. */
1955/* If the object is subclass of string, create a copy */
1956Py_LOCAL(PyBytesObject *)
1957return_self(PyBytesObject *self)
1958{
1959 if (PyBytes_CheckExact(self)) {
1960 Py_INCREF(self);
1961 return self;
1962 }
1963 return (PyBytesObject *)PyBytes_FromStringAndSize(
1964 PyBytes_AS_STRING(self),
1965 PyBytes_GET_SIZE(self));
1966}
1967
1968Py_LOCAL_INLINE(Py_ssize_t)
1969countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1970{
1971 Py_ssize_t count=0;
1972 const char *start=target;
1973 const char *end=target+target_len;
1974
1975 while ( (start=findchar(start, end-start, c)) != NULL ) {
1976 count++;
1977 if (count >= maxcount)
1978 break;
1979 start += 1;
1980 }
1981 return count;
1982}
1983
1984Py_LOCAL(Py_ssize_t)
1985findstring(const char *target, Py_ssize_t target_len,
1986 const char *pattern, Py_ssize_t pattern_len,
1987 Py_ssize_t start,
1988 Py_ssize_t end,
1989 int direction)
1990{
1991 if (start < 0) {
1992 start += target_len;
1993 if (start < 0)
1994 start = 0;
1995 }
1996 if (end > target_len) {
1997 end = target_len;
1998 } else if (end < 0) {
1999 end += target_len;
2000 if (end < 0)
2001 end = 0;
2002 }
2003
2004 /* zero-length substrings always match at the first attempt */
2005 if (pattern_len == 0)
2006 return (direction > 0) ? start : end;
2007
2008 end -= pattern_len;
2009
2010 if (direction < 0) {
2011 for (; end >= start; end--)
2012 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2013 return end;
2014 } else {
2015 for (; start <= end; start++)
2016 if (Py_STRING_MATCH(target, start,pattern,pattern_len))
2017 return start;
2018 }
2019 return -1;
2020}
2021
2022Py_LOCAL_INLINE(Py_ssize_t)
2023countstring(const char *target, Py_ssize_t target_len,
2024 const char *pattern, Py_ssize_t pattern_len,
2025 Py_ssize_t start,
2026 Py_ssize_t end,
2027 int direction, Py_ssize_t maxcount)
2028{
2029 Py_ssize_t count=0;
2030
2031 if (start < 0) {
2032 start += target_len;
2033 if (start < 0)
2034 start = 0;
2035 }
2036 if (end > target_len) {
2037 end = target_len;
2038 } else if (end < 0) {
2039 end += target_len;
2040 if (end < 0)
2041 end = 0;
2042 }
2043
2044 /* zero-length substrings match everywhere */
2045 if (pattern_len == 0 || maxcount == 0) {
2046 if (target_len+1 < maxcount)
2047 return target_len+1;
2048 return maxcount;
2049 }
2050
2051 end -= pattern_len;
2052 if (direction < 0) {
2053 for (; (end >= start); end--)
2054 if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
2055 count++;
2056 if (--maxcount <= 0) break;
2057 end -= pattern_len-1;
2058 }
2059 } else {
2060 for (; (start <= end); start++)
2061 if (Py_STRING_MATCH(target, start,
2062 pattern, pattern_len)) {
2063 count++;
2064 if (--maxcount <= 0)
2065 break;
2066 start += pattern_len-1;
2067 }
2068 }
2069 return count;
2070}
2071
2072
2073/* Algorithms for different cases of string replacement */
2074
2075/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2076Py_LOCAL(PyBytesObject *)
2077replace_interleave(PyBytesObject *self,
2078 const char *to_s, Py_ssize_t to_len,
2079 Py_ssize_t maxcount)
2080{
2081 char *self_s, *result_s;
2082 Py_ssize_t self_len, result_len;
2083 Py_ssize_t count, i, product;
2084 PyBytesObject *result;
2085
2086 self_len = PyBytes_GET_SIZE(self);
2087
2088 /* 1 at the end plus 1 after every character */
2089 count = self_len+1;
2090 if (maxcount < count)
2091 count = maxcount;
2092
2093 /* Check for overflow */
2094 /* result_len = count * to_len + self_len; */
2095 product = count * to_len;
2096 if (product / to_len != count) {
2097 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002098 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002099 return NULL;
2100 }
2101 result_len = product + self_len;
2102 if (result_len < 0) {
2103 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002104 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002105 return NULL;
2106 }
2107
2108 if (! (result = (PyBytesObject *)
2109 PyBytes_FromStringAndSize(NULL, result_len)) )
2110 return NULL;
2111
2112 self_s = PyBytes_AS_STRING(self);
2113 result_s = PyBytes_AS_STRING(result);
2114
2115 /* TODO: special case single character, which doesn't need memcpy */
2116
2117 /* Lay the first one down (guaranteed this will occur) */
2118 Py_MEMCPY(result_s, to_s, to_len);
2119 result_s += to_len;
2120 count -= 1;
2121
2122 for (i=0; i<count; i++) {
2123 *result_s++ = *self_s++;
2124 Py_MEMCPY(result_s, to_s, to_len);
2125 result_s += to_len;
2126 }
2127
2128 /* Copy the rest of the original string */
2129 Py_MEMCPY(result_s, self_s, self_len-i);
2130
2131 return result;
2132}
2133
2134/* Special case for deleting a single character */
2135/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2136Py_LOCAL(PyBytesObject *)
2137replace_delete_single_character(PyBytesObject *self,
2138 char from_c, Py_ssize_t maxcount)
2139{
2140 char *self_s, *result_s;
2141 char *start, *next, *end;
2142 Py_ssize_t self_len, result_len;
2143 Py_ssize_t count;
2144 PyBytesObject *result;
2145
2146 self_len = PyBytes_GET_SIZE(self);
2147 self_s = PyBytes_AS_STRING(self);
2148
2149 count = countchar(self_s, self_len, from_c, maxcount);
2150 if (count == 0) {
2151 return return_self(self);
2152 }
2153
2154 result_len = self_len - count; /* from_len == 1 */
2155 assert(result_len>=0);
2156
2157 if ( (result = (PyBytesObject *)
2158 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2159 return NULL;
2160 result_s = PyBytes_AS_STRING(result);
2161
2162 start = self_s;
2163 end = self_s + self_len;
2164 while (count-- > 0) {
2165 next = findchar(start, end-start, from_c);
2166 if (next == NULL)
2167 break;
2168 Py_MEMCPY(result_s, start, next-start);
2169 result_s += (next-start);
2170 start = next+1;
2171 }
2172 Py_MEMCPY(result_s, start, end-start);
2173
2174 return result;
2175}
2176
2177/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2178
2179Py_LOCAL(PyBytesObject *)
2180replace_delete_substring(PyBytesObject *self,
2181 const char *from_s, Py_ssize_t from_len,
2182 Py_ssize_t maxcount) {
2183 char *self_s, *result_s;
2184 char *start, *next, *end;
2185 Py_ssize_t self_len, result_len;
2186 Py_ssize_t count, offset;
2187 PyBytesObject *result;
2188
2189 self_len = PyBytes_GET_SIZE(self);
2190 self_s = PyBytes_AS_STRING(self);
2191
2192 count = countstring(self_s, self_len,
2193 from_s, from_len,
2194 0, self_len, 1,
2195 maxcount);
2196
2197 if (count == 0) {
2198 /* no matches */
2199 return return_self(self);
2200 }
2201
2202 result_len = self_len - (count * from_len);
2203 assert (result_len>=0);
2204
2205 if ( (result = (PyBytesObject *)
2206 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2207 return NULL;
2208
2209 result_s = PyBytes_AS_STRING(result);
2210
2211 start = self_s;
2212 end = self_s + self_len;
2213 while (count-- > 0) {
2214 offset = findstring(start, end-start,
2215 from_s, from_len,
2216 0, end-start, FORWARD);
2217 if (offset == -1)
2218 break;
2219 next = start + offset;
2220
2221 Py_MEMCPY(result_s, start, next-start);
2222
2223 result_s += (next-start);
2224 start = next+from_len;
2225 }
2226 Py_MEMCPY(result_s, start, end-start);
2227 return result;
2228}
2229
2230/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2231Py_LOCAL(PyBytesObject *)
2232replace_single_character_in_place(PyBytesObject *self,
2233 char from_c, char to_c,
2234 Py_ssize_t maxcount)
2235{
2236 char *self_s, *result_s, *start, *end, *next;
2237 Py_ssize_t self_len;
2238 PyBytesObject *result;
2239
2240 /* The result string will be the same size */
2241 self_s = PyBytes_AS_STRING(self);
2242 self_len = PyBytes_GET_SIZE(self);
2243
2244 next = findchar(self_s, self_len, from_c);
2245
2246 if (next == NULL) {
2247 /* No matches; return the original string */
2248 return return_self(self);
2249 }
2250
2251 /* Need to make a new string */
2252 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2253 if (result == NULL)
2254 return NULL;
2255 result_s = PyBytes_AS_STRING(result);
2256 Py_MEMCPY(result_s, self_s, self_len);
2257
2258 /* change everything in-place, starting with this one */
2259 start = result_s + (next-self_s);
2260 *start = to_c;
2261 start++;
2262 end = result_s + self_len;
2263
2264 while (--maxcount > 0) {
2265 next = findchar(start, end-start, from_c);
2266 if (next == NULL)
2267 break;
2268 *next = to_c;
2269 start = next+1;
2270 }
2271
2272 return result;
2273}
2274
2275/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2276Py_LOCAL(PyBytesObject *)
2277replace_substring_in_place(PyBytesObject *self,
2278 const char *from_s, Py_ssize_t from_len,
2279 const char *to_s, Py_ssize_t to_len,
2280 Py_ssize_t maxcount)
2281{
2282 char *result_s, *start, *end;
2283 char *self_s;
2284 Py_ssize_t self_len, offset;
2285 PyBytesObject *result;
2286
2287 /* The result string will be the same size */
2288
2289 self_s = PyBytes_AS_STRING(self);
2290 self_len = PyBytes_GET_SIZE(self);
2291
2292 offset = findstring(self_s, self_len,
2293 from_s, from_len,
2294 0, self_len, FORWARD);
2295 if (offset == -1) {
2296 /* No matches; return the original string */
2297 return return_self(self);
2298 }
2299
2300 /* Need to make a new string */
2301 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2302 if (result == NULL)
2303 return NULL;
2304 result_s = PyBytes_AS_STRING(result);
2305 Py_MEMCPY(result_s, self_s, self_len);
2306
2307 /* change everything in-place, starting with this one */
2308 start = result_s + offset;
2309 Py_MEMCPY(start, to_s, from_len);
2310 start += from_len;
2311 end = result_s + self_len;
2312
2313 while ( --maxcount > 0) {
2314 offset = findstring(start, end-start,
2315 from_s, from_len,
2316 0, end-start, FORWARD);
2317 if (offset==-1)
2318 break;
2319 Py_MEMCPY(start+offset, to_s, from_len);
2320 start += offset+from_len;
2321 }
2322
2323 return result;
2324}
2325
2326/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2327Py_LOCAL(PyBytesObject *)
2328replace_single_character(PyBytesObject *self,
2329 char from_c,
2330 const char *to_s, Py_ssize_t to_len,
2331 Py_ssize_t maxcount)
2332{
2333 char *self_s, *result_s;
2334 char *start, *next, *end;
2335 Py_ssize_t self_len, result_len;
2336 Py_ssize_t count, product;
2337 PyBytesObject *result;
2338
2339 self_s = PyBytes_AS_STRING(self);
2340 self_len = PyBytes_GET_SIZE(self);
2341
2342 count = countchar(self_s, self_len, from_c, maxcount);
2343 if (count == 0) {
2344 /* no matches, return unchanged */
2345 return return_self(self);
2346 }
2347
2348 /* use the difference between current and new, hence the "-1" */
2349 /* result_len = self_len + count * (to_len-1) */
2350 product = count * (to_len-1);
2351 if (product / (to_len-1) != count) {
2352 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002353 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002354 return NULL;
2355 }
2356 result_len = self_len + product;
2357 if (result_len < 0) {
2358 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002359 "replacment bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002360 return NULL;
2361 }
2362
2363 if ( (result = (PyBytesObject *)
2364 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2365 return NULL;
2366 result_s = PyBytes_AS_STRING(result);
2367
2368 start = self_s;
2369 end = self_s + self_len;
2370 while (count-- > 0) {
2371 next = findchar(start, end-start, from_c);
2372 if (next == NULL)
2373 break;
2374
2375 if (next == start) {
2376 /* replace with the 'to' */
2377 Py_MEMCPY(result_s, to_s, to_len);
2378 result_s += to_len;
2379 start += 1;
2380 } else {
2381 /* copy the unchanged old then the 'to' */
2382 Py_MEMCPY(result_s, start, next-start);
2383 result_s += (next-start);
2384 Py_MEMCPY(result_s, to_s, to_len);
2385 result_s += to_len;
2386 start = next+1;
2387 }
2388 }
2389 /* Copy the remainder of the remaining string */
2390 Py_MEMCPY(result_s, start, end-start);
2391
2392 return result;
2393}
2394
2395/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2396Py_LOCAL(PyBytesObject *)
2397replace_substring(PyBytesObject *self,
2398 const char *from_s, Py_ssize_t from_len,
2399 const char *to_s, Py_ssize_t to_len,
2400 Py_ssize_t maxcount) {
2401 char *self_s, *result_s;
2402 char *start, *next, *end;
2403 Py_ssize_t self_len, result_len;
2404 Py_ssize_t count, offset, product;
2405 PyBytesObject *result;
2406
2407 self_s = PyBytes_AS_STRING(self);
2408 self_len = PyBytes_GET_SIZE(self);
2409
2410 count = countstring(self_s, self_len,
2411 from_s, from_len,
2412 0, self_len, FORWARD, maxcount);
2413 if (count == 0) {
2414 /* no matches, return unchanged */
2415 return return_self(self);
2416 }
2417
2418 /* Check for overflow */
2419 /* result_len = self_len + count * (to_len-from_len) */
2420 product = count * (to_len-from_len);
2421 if (product / (to_len-from_len) != count) {
2422 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002423 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002424 return NULL;
2425 }
2426 result_len = self_len + product;
2427 if (result_len < 0) {
2428 PyErr_SetString(PyExc_OverflowError,
Benjamin Peterson4116f362008-05-27 00:36:20 +00002429 "replacement bytes are too long");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002430 return NULL;
2431 }
2432
2433 if ( (result = (PyBytesObject *)
2434 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2435 return NULL;
2436 result_s = PyBytes_AS_STRING(result);
2437
2438 start = self_s;
2439 end = self_s + self_len;
2440 while (count-- > 0) {
2441 offset = findstring(start, end-start,
2442 from_s, from_len,
2443 0, end-start, FORWARD);
2444 if (offset == -1)
2445 break;
2446 next = start+offset;
2447 if (next == start) {
2448 /* replace with the 'to' */
2449 Py_MEMCPY(result_s, to_s, to_len);
2450 result_s += to_len;
2451 start += from_len;
2452 } else {
2453 /* copy the unchanged old then the 'to' */
2454 Py_MEMCPY(result_s, start, next-start);
2455 result_s += (next-start);
2456 Py_MEMCPY(result_s, to_s, to_len);
2457 result_s += to_len;
2458 start = next+from_len;
2459 }
2460 }
2461 /* Copy the remainder of the remaining string */
2462 Py_MEMCPY(result_s, start, end-start);
2463
2464 return result;
2465}
2466
2467
2468Py_LOCAL(PyBytesObject *)
2469replace(PyBytesObject *self,
2470 const char *from_s, Py_ssize_t from_len,
2471 const char *to_s, Py_ssize_t to_len,
2472 Py_ssize_t maxcount)
2473{
2474 if (maxcount < 0) {
2475 maxcount = PY_SSIZE_T_MAX;
2476 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2477 /* nothing to do; return the original string */
2478 return return_self(self);
2479 }
2480
2481 if (maxcount == 0 ||
2482 (from_len == 0 && to_len == 0)) {
2483 /* nothing to do; return the original string */
2484 return return_self(self);
2485 }
2486
2487 /* Handle zero-length special cases */
2488
2489 if (from_len == 0) {
2490 /* insert the 'to' string everywhere. */
2491 /* >>> "Python".replace("", ".") */
2492 /* '.P.y.t.h.o.n.' */
2493 return replace_interleave(self, to_s, to_len, maxcount);
2494 }
2495
2496 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2497 /* point for an empty self string to generate a non-empty string */
2498 /* Special case so the remaining code always gets a non-empty string */
2499 if (PyBytes_GET_SIZE(self) == 0) {
2500 return return_self(self);
2501 }
2502
2503 if (to_len == 0) {
Georg Brandl17cb8a82008-05-30 08:20:09 +00002504 /* delete all occurrences of 'from' string */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002505 if (from_len == 1) {
2506 return replace_delete_single_character(
2507 self, from_s[0], maxcount);
2508 } else {
2509 return replace_delete_substring(self, from_s,
2510 from_len, maxcount);
2511 }
2512 }
2513
2514 /* Handle special case where both strings have the same length */
2515
2516 if (from_len == to_len) {
2517 if (from_len == 1) {
2518 return replace_single_character_in_place(
2519 self,
2520 from_s[0],
2521 to_s[0],
2522 maxcount);
2523 } else {
2524 return replace_substring_in_place(
2525 self, from_s, from_len, to_s, to_len,
2526 maxcount);
2527 }
2528 }
2529
2530 /* Otherwise use the more generic algorithms */
2531 if (from_len == 1) {
2532 return replace_single_character(self, from_s[0],
2533 to_s, to_len, maxcount);
2534 } else {
2535 /* len('from')>=2, len('to')>=1 */
2536 return replace_substring(self, from_s, from_len, to_s, to_len,
2537 maxcount);
2538 }
2539}
2540
2541PyDoc_STRVAR(replace__doc__,
2542"B.replace(old, new[, count]) -> bytes\n\
2543\n\
2544Return a copy of B with all occurrences of subsection\n\
2545old replaced by new. If the optional argument count is\n\
2546given, only the first count occurrences are replaced.");
2547
2548static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002549bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002550{
2551 Py_ssize_t count = -1;
2552 PyObject *from, *to;
2553 const char *from_s, *to_s;
2554 Py_ssize_t from_len, to_len;
2555
2556 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2557 return NULL;
2558
2559 if (PyBytes_Check(from)) {
2560 from_s = PyBytes_AS_STRING(from);
2561 from_len = PyBytes_GET_SIZE(from);
2562 }
2563 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2564 return NULL;
2565
2566 if (PyBytes_Check(to)) {
2567 to_s = PyBytes_AS_STRING(to);
2568 to_len = PyBytes_GET_SIZE(to);
2569 }
2570 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2571 return NULL;
2572
2573 return (PyObject *)replace((PyBytesObject *) self,
2574 from_s, from_len,
2575 to_s, to_len, count);
2576}
2577
2578/** End DALKE **/
2579
2580/* Matches the end (direction >= 0) or start (direction < 0) of self
2581 * against substr, using the start and end arguments. Returns
2582 * -1 on error, 0 if not found and 1 if found.
2583 */
2584Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002585_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002586 Py_ssize_t end, int direction)
2587{
2588 Py_ssize_t len = PyBytes_GET_SIZE(self);
2589 Py_ssize_t slen;
2590 const char* sub;
2591 const char* str;
2592
2593 if (PyBytes_Check(substr)) {
2594 sub = PyBytes_AS_STRING(substr);
2595 slen = PyBytes_GET_SIZE(substr);
2596 }
2597 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2598 return -1;
2599 str = PyBytes_AS_STRING(self);
2600
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002601 bytes_adjust_indices(&start, &end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002602
2603 if (direction < 0) {
2604 /* startswith */
2605 if (start+slen > len)
2606 return 0;
2607 } else {
2608 /* endswith */
2609 if (end-start < slen || start > len)
2610 return 0;
2611
2612 if (end-slen > start)
2613 start = end - slen;
2614 }
2615 if (end-start >= slen)
2616 return ! memcmp(str+start, sub, slen);
2617 return 0;
2618}
2619
2620
2621PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002622"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002623\n\
2624Return True if B starts with the specified prefix, False otherwise.\n\
2625With optional start, test B beginning at that position.\n\
2626With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002627prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002628
2629static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002630bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002631{
2632 Py_ssize_t start = 0;
2633 Py_ssize_t end = PY_SSIZE_T_MAX;
2634 PyObject *subobj;
2635 int result;
2636
2637 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2638 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2639 return NULL;
2640 if (PyTuple_Check(subobj)) {
2641 Py_ssize_t i;
2642 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002643 result = _bytes_tailmatch(self,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002644 PyTuple_GET_ITEM(subobj, i),
2645 start, end, -1);
2646 if (result == -1)
2647 return NULL;
2648 else if (result) {
2649 Py_RETURN_TRUE;
2650 }
2651 }
2652 Py_RETURN_FALSE;
2653 }
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002654 result = _bytes_tailmatch(self, subobj, start, end, -1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002655 if (result == -1)
2656 return NULL;
2657 else
2658 return PyBool_FromLong(result);
2659}
2660
2661
2662PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002663"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002664\n\
2665Return True if B ends with the specified suffix, False otherwise.\n\
2666With optional start, test B beginning at that position.\n\
2667With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002668suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002669
2670static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002671bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002672{
2673 Py_ssize_t start = 0;
2674 Py_ssize_t end = PY_SSIZE_T_MAX;
2675 PyObject *subobj;
2676 int result;
2677
2678 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2679 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2680 return NULL;
2681 if (PyTuple_Check(subobj)) {
2682 Py_ssize_t i;
2683 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002684 result = _bytes_tailmatch(self,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002685 PyTuple_GET_ITEM(subobj, i),
2686 start, end, +1);
2687 if (result == -1)
2688 return NULL;
2689 else if (result) {
2690 Py_RETURN_TRUE;
2691 }
2692 }
2693 Py_RETURN_FALSE;
2694 }
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002695 result = _bytes_tailmatch(self, subobj, start, end, +1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002696 if (result == -1)
2697 return NULL;
2698 else
2699 return PyBool_FromLong(result);
2700}
2701
2702
2703PyDoc_STRVAR(decode__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002704"B.decode([encoding[, errors]]) -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002705\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002706Decode S using the codec registered for encoding. encoding defaults\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002707to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002708handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2709a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002710as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002711able to handle UnicodeDecodeErrors.");
2712
2713static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002714bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002715{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002716 const char *encoding = NULL;
2717 const char *errors = NULL;
Benjamin Peterson308d6372009-09-18 21:42:35 +00002718 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002719
Benjamin Peterson308d6372009-09-18 21:42:35 +00002720 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002721 return NULL;
2722 if (encoding == NULL)
2723 encoding = PyUnicode_GetDefaultEncoding();
Marc-André Lemburgb2750b52008-06-06 12:18:17 +00002724 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002725}
2726
Guido van Rossum20188312006-05-05 15:15:40 +00002727
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002728PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002729"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002730\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002731Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002732Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002733Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002734
2735static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002736hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002737{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002738 if (c >= 128)
2739 return -1;
2740 if (ISDIGIT(c))
2741 return c - '0';
2742 else {
2743 if (ISUPPER(c))
2744 c = TOLOWER(c);
2745 if (c >= 'a' && c <= 'f')
2746 return c - 'a' + 10;
2747 }
2748 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002749}
2750
2751static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002752bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002753{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002754 PyObject *newstring, *hexobj;
2755 char *buf;
2756 Py_UNICODE *hex;
2757 Py_ssize_t hexlen, byteslen, i, j;
2758 int top, bot;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002759
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002760 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2761 return NULL;
2762 assert(PyUnicode_Check(hexobj));
2763 hexlen = PyUnicode_GET_SIZE(hexobj);
2764 hex = PyUnicode_AS_UNICODE(hexobj);
2765 byteslen = hexlen/2; /* This overestimates if there are spaces */
2766 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2767 if (!newstring)
2768 return NULL;
2769 buf = PyBytes_AS_STRING(newstring);
2770 for (i = j = 0; i < hexlen; i += 2) {
2771 /* skip over spaces in the input */
2772 while (hex[i] == ' ')
2773 i++;
2774 if (i >= hexlen)
2775 break;
2776 top = hex_digit_to_int(hex[i]);
2777 bot = hex_digit_to_int(hex[i+1]);
2778 if (top == -1 || bot == -1) {
2779 PyErr_Format(PyExc_ValueError,
2780 "non-hexadecimal number found in "
2781 "fromhex() arg at position %zd", i);
2782 goto error;
2783 }
2784 buf[j++] = (top << 4) + bot;
2785 }
2786 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2787 goto error;
2788 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002789
2790 error:
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002791 Py_XDECREF(newstring);
2792 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002793}
2794
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002795PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002796"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002797
2798static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002799bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002800{
2801 Py_ssize_t res;
Mark Dickinsonfd24b322008-12-06 15:33:31 +00002802 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002803 return PyLong_FromSsize_t(res);
2804}
2805
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002806
2807static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002808bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002809{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002810 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002811}
2812
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002813
2814static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002815bytes_methods[] = {
2816 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002817 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2818 _Py_capitalize__doc__},
2819 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002820 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Benjamin Peterson308d6372009-09-18 21:42:35 +00002821 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002822 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002823 endswith__doc__},
2824 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2825 expandtabs__doc__},
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002826 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2827 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002828 fromhex_doc},
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002829 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002830 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2831 _Py_isalnum__doc__},
2832 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2833 _Py_isalpha__doc__},
2834 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2835 _Py_isdigit__doc__},
2836 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2837 _Py_islower__doc__},
2838 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2839 _Py_isspace__doc__},
2840 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2841 _Py_istitle__doc__},
2842 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2843 _Py_isupper__doc__},
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002844 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002845 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2846 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002847 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2848 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
Georg Brandlabc38772009-04-12 15:51:51 +00002849 _Py_maketrans__doc__},
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002850 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2851 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2852 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2853 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002854 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002855 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002856 rpartition__doc__},
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002857 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2858 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
2859 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002860 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
2861 splitlines__doc__},
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002862 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002863 startswith__doc__},
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002864 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002865 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2866 _Py_swapcase__doc__},
2867 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002868 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002869 translate__doc__},
2870 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2871 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002872 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002873 sizeof__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002874 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002875};
2876
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002877static PyObject *
2878str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2879
2880static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002881bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002882{
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002883 PyObject *x = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002884 const char *encoding = NULL;
2885 const char *errors = NULL;
2886 PyObject *new = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002887 static char *kwlist[] = {"source", "encoding", "errors", 0};
2888
2889 if (type != &PyBytes_Type)
2890 return str_subtype_new(type, args, kwds);
2891 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2892 &encoding, &errors))
2893 return NULL;
2894 if (x == NULL) {
2895 if (encoding != NULL || errors != NULL) {
2896 PyErr_SetString(PyExc_TypeError,
2897 "encoding or errors without sequence "
2898 "argument");
2899 return NULL;
2900 }
2901 return PyBytes_FromString("");
2902 }
2903
2904 if (PyUnicode_Check(x)) {
2905 /* Encode via the codec registry */
2906 if (encoding == NULL) {
2907 PyErr_SetString(PyExc_TypeError,
2908 "string argument without an encoding");
2909 return NULL;
2910 }
Marc-André Lemburgb2750b52008-06-06 12:18:17 +00002911 new = PyUnicode_AsEncodedString(x, encoding, errors);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002912 if (new == NULL)
2913 return NULL;
2914 assert(PyBytes_Check(new));
2915 return new;
2916 }
2917
2918 /* If it's not unicode, there can't be encoding or errors */
2919 if (encoding != NULL || errors != NULL) {
2920 PyErr_SetString(PyExc_TypeError,
2921 "encoding or errors without a string argument");
2922 return NULL;
2923 }
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002924 return PyObject_Bytes(x);
2925}
2926
2927PyObject *
2928PyBytes_FromObject(PyObject *x)
2929{
2930 PyObject *new, *it;
2931 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002932
Benjamin Peterson4b24a422008-08-27 00:28:34 +00002933 if (x == NULL) {
2934 PyErr_BadInternalCall();
2935 return NULL;
2936 }
2937
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002938 /* Is it an int? */
2939 size = PyNumber_AsSsize_t(x, PyExc_ValueError);
2940 if (size == -1 && PyErr_Occurred()) {
2941 PyErr_Clear();
2942 }
2943 else {
2944 if (size < 0) {
2945 PyErr_SetString(PyExc_ValueError, "negative count");
2946 return NULL;
2947 }
2948 new = PyBytes_FromStringAndSize(NULL, size);
2949 if (new == NULL) {
2950 return NULL;
2951 }
2952 if (size > 0) {
2953 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2954 }
2955 return new;
2956 }
2957
2958 /* Use the modern buffer interface */
2959 if (PyObject_CheckBuffer(x)) {
2960 Py_buffer view;
2961 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2962 return NULL;
2963 new = PyBytes_FromStringAndSize(NULL, view.len);
2964 if (!new)
2965 goto fail;
Christian Heimes1a8501c2008-10-02 19:56:01 +00002966 /* XXX(brett.cannon): Better way to get to internal buffer? */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002967 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2968 &view, view.len, 'C') < 0)
2969 goto fail;
Martin v. Löwis423be952008-08-13 15:53:07 +00002970 PyBuffer_Release(&view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002971 return new;
2972 fail:
2973 Py_XDECREF(new);
Martin v. Löwis423be952008-08-13 15:53:07 +00002974 PyBuffer_Release(&view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002975 return NULL;
2976 }
2977
2978 /* For iterator version, create a string object and resize as needed */
2979 /* XXX(gb): is 64 a good value? also, optimize if length is known */
2980 /* XXX(guido): perhaps use Pysequence_Fast() -- I can't imagine the
2981 input being a truly long iterator. */
2982 size = 64;
2983 new = PyBytes_FromStringAndSize(NULL, size);
2984 if (new == NULL)
2985 return NULL;
2986
2987 /* XXX Optimize this if the arguments is a list, tuple */
2988
2989 /* Get the iterator */
2990 it = PyObject_GetIter(x);
2991 if (it == NULL)
2992 goto error;
2993
2994 /* Run the iterator to exhaustion */
2995 for (i = 0; ; i++) {
2996 PyObject *item;
2997 Py_ssize_t value;
2998
2999 /* Get the next item */
3000 item = PyIter_Next(it);
3001 if (item == NULL) {
3002 if (PyErr_Occurred())
3003 goto error;
3004 break;
3005 }
3006
3007 /* Interpret it as an int (__index__) */
3008 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3009 Py_DECREF(item);
3010 if (value == -1 && PyErr_Occurred())
3011 goto error;
3012
3013 /* Range check */
3014 if (value < 0 || value >= 256) {
3015 PyErr_SetString(PyExc_ValueError,
3016 "bytes must be in range(0, 256)");
3017 goto error;
3018 }
3019
3020 /* Append the byte */
3021 if (i >= size) {
3022 size *= 2;
3023 if (_PyBytes_Resize(&new, size) < 0)
3024 goto error;
3025 }
3026 ((PyBytesObject *)new)->ob_sval[i] = value;
3027 }
3028 _PyBytes_Resize(&new, i);
3029
3030 /* Clean up and return success */
3031 Py_DECREF(it);
3032 return new;
3033
3034 error:
3035 /* Error handling when new != NULL */
3036 Py_XDECREF(it);
3037 Py_DECREF(new);
3038 return NULL;
3039}
3040
3041static PyObject *
3042str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3043{
3044 PyObject *tmp, *pnew;
3045 Py_ssize_t n;
3046
3047 assert(PyType_IsSubtype(type, &PyBytes_Type));
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003048 tmp = bytes_new(&PyBytes_Type, args, kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003049 if (tmp == NULL)
3050 return NULL;
3051 assert(PyBytes_CheckExact(tmp));
3052 n = PyBytes_GET_SIZE(tmp);
3053 pnew = type->tp_alloc(type, n);
3054 if (pnew != NULL) {
3055 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3056 PyBytes_AS_STRING(tmp), n+1);
3057 ((PyBytesObject *)pnew)->ob_shash =
3058 ((PyBytesObject *)tmp)->ob_shash;
3059 }
3060 Py_DECREF(tmp);
3061 return pnew;
3062}
3063
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003064PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003065"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003066bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003067bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
3068bytes(memory_view) -> bytes\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003069\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003070Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003071 - an iterable yielding integers in range(256)\n\
3072 - a text string encoded using the specified encoding\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003073 - a bytes or a buffer object\n\
3074 - any object implementing the buffer API.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003075
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003076static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003077
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003078PyTypeObject PyBytes_Type = {
3079 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3080 "bytes",
Mark Dickinsonfd24b322008-12-06 15:33:31 +00003081 PyBytesObject_SIZE,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003082 sizeof(char),
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003083 bytes_dealloc, /* tp_dealloc */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003084 0, /* tp_print */
3085 0, /* tp_getattr */
3086 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00003087 0, /* tp_reserved */
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003088 (reprfunc)bytes_repr, /* tp_repr */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003089 0, /* tp_as_number */
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003090 &bytes_as_sequence, /* tp_as_sequence */
3091 &bytes_as_mapping, /* tp_as_mapping */
3092 (hashfunc)bytes_hash, /* tp_hash */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003093 0, /* tp_call */
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003094 bytes_str, /* tp_str */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003095 PyObject_GenericGetAttr, /* tp_getattro */
3096 0, /* tp_setattro */
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003097 &bytes_as_buffer, /* tp_as_buffer */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003098 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3099 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003100 bytes_doc, /* tp_doc */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003101 0, /* tp_traverse */
3102 0, /* tp_clear */
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003103 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003104 0, /* tp_weaklistoffset */
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003105 bytes_iter, /* tp_iter */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003106 0, /* tp_iternext */
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003107 bytes_methods, /* tp_methods */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003108 0, /* tp_members */
3109 0, /* tp_getset */
3110 &PyBaseObject_Type, /* tp_base */
3111 0, /* tp_dict */
3112 0, /* tp_descr_get */
3113 0, /* tp_descr_set */
3114 0, /* tp_dictoffset */
3115 0, /* tp_init */
3116 0, /* tp_alloc */
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003117 bytes_new, /* tp_new */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003118 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003119};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003120
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003121void
3122PyBytes_Concat(register PyObject **pv, register PyObject *w)
3123{
3124 register PyObject *v;
3125 assert(pv != NULL);
3126 if (*pv == NULL)
3127 return;
3128 if (w == NULL) {
3129 Py_DECREF(*pv);
3130 *pv = NULL;
3131 return;
3132 }
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003133 v = bytes_concat(*pv, w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003134 Py_DECREF(*pv);
3135 *pv = v;
3136}
3137
3138void
3139PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
3140{
3141 PyBytes_Concat(pv, w);
3142 Py_XDECREF(w);
3143}
3144
3145
3146/* The following function breaks the notion that strings are immutable:
3147 it changes the size of a string. We get away with this only if there
3148 is only one module referencing the object. You can also think of it
3149 as creating a new string object and destroying the old one, only
3150 more efficiently. In any case, don't use this if the string may
3151 already be known to some other part of the code...
3152 Note that if there's not enough memory to resize the string, the original
3153 string object at *pv is deallocated, *pv is set to NULL, an "out of
3154 memory" exception is set, and -1 is returned. Else (on success) 0 is
3155 returned, and the value in *pv may or may not be the same as on input.
3156 As always, an extra byte is allocated for a trailing \0 byte (newsize
3157 does *not* include that), and a trailing \0 byte is stored.
3158*/
3159
3160int
3161_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3162{
3163 register PyObject *v;
3164 register PyBytesObject *sv;
3165 v = *pv;
3166 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3167 *pv = 0;
3168 Py_DECREF(v);
3169 PyErr_BadInternalCall();
3170 return -1;
3171 }
3172 /* XXX UNREF/NEWREF interface should be more symmetrical */
3173 _Py_DEC_REFTOTAL;
3174 _Py_ForgetReference(v);
3175 *pv = (PyObject *)
Mark Dickinsonfd24b322008-12-06 15:33:31 +00003176 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003177 if (*pv == NULL) {
3178 PyObject_Del(v);
3179 PyErr_NoMemory();
3180 return -1;
3181 }
3182 _Py_NewReference(*pv);
3183 sv = (PyBytesObject *) *pv;
3184 Py_SIZE(sv) = newsize;
3185 sv->ob_sval[newsize] = '\0';
3186 sv->ob_shash = -1; /* invalidate cached hash value */
3187 return 0;
3188}
3189
3190/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
3191 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3192 * Python's regular ints.
3193 * Return value: a new PyString*, or NULL if error.
3194 * . *pbuf is set to point into it,
3195 * *plen set to the # of chars following that.
3196 * Caller must decref it when done using pbuf.
3197 * The string starting at *pbuf is of the form
3198 * "-"? ("0x" | "0X")? digit+
3199 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3200 * set in flags. The case of hex digits will be correct,
3201 * There will be at least prec digits, zero-filled on the left if
3202 * necessary to get that many.
3203 * val object to be converted
3204 * flags bitmask of format flags; only F_ALT is looked at
3205 * prec minimum number of digits; 0-fill on left if needed
3206 * type a character in [duoxX]; u acts the same as d
3207 *
3208 * CAUTION: o, x and X conversions on regular ints can never
3209 * produce a '-' sign, but can for Python's unbounded ints.
3210 */
3211PyObject*
3212_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
3213 char **pbuf, int *plen)
3214{
3215 PyObject *result = NULL;
3216 char *buf;
3217 Py_ssize_t i;
3218 int sign; /* 1 if '-', else 0 */
3219 int len; /* number of characters */
3220 Py_ssize_t llen;
3221 int numdigits; /* len == numnondigits + numdigits */
3222 int numnondigits = 0;
3223
3224 /* Avoid exceeding SSIZE_T_MAX */
Christian Heimesce694b72008-08-24 16:15:19 +00003225 if (prec > INT_MAX-3) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003226 PyErr_SetString(PyExc_OverflowError,
3227 "precision too large");
3228 return NULL;
3229 }
3230
3231 switch (type) {
3232 case 'd':
3233 case 'u':
3234 /* Special-case boolean: we want 0/1 */
3235 if (PyBool_Check(val))
3236 result = PyNumber_ToBase(val, 10);
3237 else
3238 result = Py_TYPE(val)->tp_str(val);
3239 break;
3240 case 'o':
3241 numnondigits = 2;
3242 result = PyNumber_ToBase(val, 8);
3243 break;
3244 case 'x':
3245 case 'X':
3246 numnondigits = 2;
3247 result = PyNumber_ToBase(val, 16);
3248 break;
3249 default:
3250 assert(!"'type' not in [duoxX]");
3251 }
3252 if (!result)
3253 return NULL;
3254
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00003255 buf = _PyUnicode_AsString(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003256 if (!buf) {
3257 Py_DECREF(result);
3258 return NULL;
3259 }
3260
3261 /* To modify the string in-place, there can only be one reference. */
3262 if (Py_REFCNT(result) != 1) {
3263 PyErr_BadInternalCall();
3264 return NULL;
3265 }
3266 llen = PyUnicode_GetSize(result);
3267 if (llen > INT_MAX) {
3268 PyErr_SetString(PyExc_ValueError,
3269 "string too large in _PyBytes_FormatLong");
3270 return NULL;
3271 }
3272 len = (int)llen;
3273 if (buf[len-1] == 'L') {
3274 --len;
3275 buf[len] = '\0';
3276 }
3277 sign = buf[0] == '-';
3278 numnondigits += sign;
3279 numdigits = len - numnondigits;
3280 assert(numdigits > 0);
3281
3282 /* Get rid of base marker unless F_ALT */
3283 if (((flags & F_ALT) == 0 &&
3284 (type == 'o' || type == 'x' || type == 'X'))) {
3285 assert(buf[sign] == '0');
3286 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
3287 buf[sign+1] == 'o');
3288 numnondigits -= 2;
3289 buf += 2;
3290 len -= 2;
3291 if (sign)
3292 buf[0] = '-';
3293 assert(len == numnondigits + numdigits);
3294 assert(numdigits > 0);
3295 }
3296
3297 /* Fill with leading zeroes to meet minimum width. */
3298 if (prec > numdigits) {
3299 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
3300 numnondigits + prec);
3301 char *b1;
3302 if (!r1) {
3303 Py_DECREF(result);
3304 return NULL;
3305 }
3306 b1 = PyBytes_AS_STRING(r1);
3307 for (i = 0; i < numnondigits; ++i)
3308 *b1++ = *buf++;
3309 for (i = 0; i < prec - numdigits; i++)
3310 *b1++ = '0';
3311 for (i = 0; i < numdigits; i++)
3312 *b1++ = *buf++;
3313 *b1 = '\0';
3314 Py_DECREF(result);
3315 result = r1;
3316 buf = PyBytes_AS_STRING(result);
3317 len = numnondigits + prec;
3318 }
3319
3320 /* Fix up case for hex conversions. */
3321 if (type == 'X') {
3322 /* Need to convert all lower case letters to upper case.
3323 and need to convert 0x to 0X (and -0x to -0X). */
3324 for (i = 0; i < len; i++)
3325 if (buf[i] >= 'a' && buf[i] <= 'x')
3326 buf[i] -= 'a'-'A';
3327 }
3328 *pbuf = buf;
3329 *plen = len;
3330 return result;
3331}
3332
3333void
3334PyBytes_Fini(void)
3335{
3336 int i;
3337 for (i = 0; i < UCHAR_MAX + 1; i++) {
3338 Py_XDECREF(characters[i]);
3339 characters[i] = NULL;
3340 }
3341 Py_XDECREF(nullstring);
3342 nullstring = NULL;
3343}
3344
Benjamin Peterson4116f362008-05-27 00:36:20 +00003345/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003346
3347typedef struct {
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003348 PyObject_HEAD
3349 Py_ssize_t it_index;
3350 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3351} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003352
3353static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003354striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003355{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003356 _PyObject_GC_UNTRACK(it);
3357 Py_XDECREF(it->it_seq);
3358 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003359}
3360
3361static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003362striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003363{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003364 Py_VISIT(it->it_seq);
3365 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003366}
3367
3368static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003369striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003370{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003371 PyBytesObject *seq;
3372 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003373
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003374 assert(it != NULL);
3375 seq = it->it_seq;
3376 if (seq == NULL)
3377 return NULL;
3378 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003379
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003380 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3381 item = PyLong_FromLong(
3382 (unsigned char)seq->ob_sval[it->it_index]);
3383 if (item != NULL)
3384 ++it->it_index;
3385 return item;
3386 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003387
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003388 Py_DECREF(seq);
3389 it->it_seq = NULL;
3390 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003391}
3392
3393static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003394striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003395{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003396 Py_ssize_t len = 0;
3397 if (it->it_seq)
3398 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3399 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003400}
3401
3402PyDoc_STRVAR(length_hint_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003403 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003404
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003405static PyMethodDef striter_methods[] = {
3406 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3407 length_hint_doc},
3408 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003409};
3410
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003411PyTypeObject PyBytesIter_Type = {
3412 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3413 "bytes_iterator", /* tp_name */
3414 sizeof(striterobject), /* tp_basicsize */
3415 0, /* tp_itemsize */
3416 /* methods */
3417 (destructor)striter_dealloc, /* tp_dealloc */
3418 0, /* tp_print */
3419 0, /* tp_getattr */
3420 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00003421 0, /* tp_reserved */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003422 0, /* tp_repr */
3423 0, /* tp_as_number */
3424 0, /* tp_as_sequence */
3425 0, /* tp_as_mapping */
3426 0, /* tp_hash */
3427 0, /* tp_call */
3428 0, /* tp_str */
3429 PyObject_GenericGetAttr, /* tp_getattro */
3430 0, /* tp_setattro */
3431 0, /* tp_as_buffer */
3432 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3433 0, /* tp_doc */
3434 (traverseproc)striter_traverse, /* tp_traverse */
3435 0, /* tp_clear */
3436 0, /* tp_richcompare */
3437 0, /* tp_weaklistoffset */
3438 PyObject_SelfIter, /* tp_iter */
3439 (iternextfunc)striter_next, /* tp_iternext */
3440 striter_methods, /* tp_methods */
3441 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003442};
3443
3444static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003445bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003446{
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003447 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003448
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003449 if (!PyBytes_Check(seq)) {
3450 PyErr_BadInternalCall();
3451 return NULL;
3452 }
3453 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3454 if (it == NULL)
3455 return NULL;
3456 it->it_index = 0;
3457 Py_INCREF(seq);
3458 it->it_seq = (PyBytesObject *)seq;
3459 _PyObject_GC_TRACK(it);
3460 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003461}