blob: c0c82f73a66f7014d5df885ae064d698484bfe05 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroud1188562010-06-09 16:38:55 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
44 For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
45 parameter `size' denotes number of characters to allocate, not counting any
46 null terminating character.
47
48 For PyBytes_FromString(), the parameter `str' points to a null-terminated
49 string containing exactly `size' bytes.
50
51 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
52 either NULL or else points to a string containing at least `size' bytes.
53 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
54 not have to be null-terminated. (Therefore it is safe to construct a
55 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
56 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
57 bytes (setting the last byte to the null terminating character) and you can
58 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000059 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000060 alter the data yourself, since the strings may be shared.
61
62 The PyObject member `op->ob_size', which denotes the number of "extra
63 items" in a variable-size object, will contain the number of bytes
64 allocated for string data, not counting the null terminating character. It
65 is therefore equal to the equal to the `size' parameter (for
66 PyBytes_FromStringAndSize()) or the length of the string in the `str'
67 parameter (for PyBytes_FromString()).
68*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000069PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000071{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 register PyBytesObject *op;
73 if (size < 0) {
74 PyErr_SetString(PyExc_SystemError,
75 "Negative size passed to PyBytes_FromStringAndSize");
76 return NULL;
77 }
78 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000079#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000081#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85 if (size == 1 && str != NULL &&
86 (op = characters[*str & UCHAR_MAX]) != NULL)
87 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000088#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 Py_INCREF(op);
92 return (PyObject *)op;
93 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
96 PyErr_SetString(PyExc_OverflowError,
97 "byte string is too large");
98 return NULL;
99 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +0000100
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101 /* Inline PyObject_NewVar */
102 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
103 if (op == NULL)
104 return PyErr_NoMemory();
105 PyObject_INIT_VAR(op, &PyBytes_Type, size);
106 op->ob_shash = -1;
107 if (str != NULL)
108 Py_MEMCPY(op->ob_sval, str, size);
109 op->ob_sval[size] = '\0';
110 /* share short strings */
111 if (size == 0) {
112 nullstring = op;
113 Py_INCREF(op);
114 } else if (size == 1 && str != NULL) {
115 characters[*str & UCHAR_MAX] = op;
116 Py_INCREF(op);
117 }
118 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000119}
120
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000121PyObject *
122PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000123{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000124 register size_t size;
125 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000127 assert(str != NULL);
128 size = strlen(str);
129 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
130 PyErr_SetString(PyExc_OverflowError,
131 "byte string is too long");
132 return NULL;
133 }
134 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000135#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000136 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000137#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
141 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000144#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 /* Inline PyObject_NewVar */
150 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
151 if (op == NULL)
152 return PyErr_NoMemory();
153 PyObject_INIT_VAR(op, &PyBytes_Type, size);
154 op->ob_shash = -1;
155 Py_MEMCPY(op->ob_sval, str, size+1);
156 /* share short strings */
157 if (size == 0) {
158 nullstring = op;
159 Py_INCREF(op);
160 } else if (size == 1) {
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000165}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000166
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167PyObject *
168PyBytes_FromFormatV(const char *format, va_list vargs)
169{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000175
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000176#ifdef VA_LIST_IS_ARRAY
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 Py_MEMCPY(count, vargs, sizeof(va_list));
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000178#else
179#ifdef __va_copy
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 __va_copy(count, vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000181#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 count = vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000183#endif
184#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
187 if (*f == '%') {
188 const char* p = f;
189 while (*++f && *f != '%' && !ISALPHA(*f))
190 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000191
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000192 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
193 * they don't affect the amount of space we reserve.
194 */
195 if ((*f == 'l' || *f == 'z') &&
196 (f[1] == 'd' || f[1] == 'u'))
197 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000198
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000199 switch (*f) {
200 case 'c':
201 (void)va_arg(count, int);
202 /* fall through... */
203 case '%':
204 n++;
205 break;
206 case 'd': case 'u': case 'i': case 'x':
207 (void) va_arg(count, int);
208 /* 20 bytes is enough to hold a 64-bit
209 integer. Decimal takes the most space.
210 This isn't enough for octal. */
211 n += 20;
212 break;
213 case 's':
214 s = va_arg(count, char*);
215 n += strlen(s);
216 break;
217 case 'p':
218 (void) va_arg(count, int);
219 /* maximum 64-bit pointer representation:
220 * 0xffffffffffffffff
221 * so 19 characters is enough.
222 * XXX I count 18 -- what's the extra for?
223 */
224 n += 19;
225 break;
226 default:
227 /* if we stumble upon an unknown
228 formatting code, copy the rest of
229 the format string to the output
230 string. (we cannot just skip the
231 code, since there's no way to know
232 what's in the argument list) */
233 n += strlen(p);
234 goto expand;
235 }
236 } else
237 n++;
238 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000239 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000240 /* step 2: fill the buffer */
241 /* Since we've analyzed how much space we need for the worst case,
242 use sprintf directly instead of the slower PyOS_snprintf. */
243 string = PyBytes_FromStringAndSize(NULL, n);
244 if (!string)
245 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000246
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000247 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000248
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000249 for (f = format; *f; f++) {
250 if (*f == '%') {
251 const char* p = f++;
252 Py_ssize_t i;
253 int longflag = 0;
254 int size_tflag = 0;
255 /* parse the width.precision part (we're only
256 interested in the precision value, if any) */
257 n = 0;
258 while (ISDIGIT(*f))
259 n = (n*10) + *f++ - '0';
260 if (*f == '.') {
261 f++;
262 n = 0;
263 while (ISDIGIT(*f))
264 n = (n*10) + *f++ - '0';
265 }
266 while (*f && *f != '%' && !ISALPHA(*f))
267 f++;
268 /* handle the long flag, but only for %ld and %lu.
269 others can be added when necessary. */
270 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
271 longflag = 1;
272 ++f;
273 }
274 /* handle the size_t flag. */
275 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
276 size_tflag = 1;
277 ++f;
278 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000279
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000280 switch (*f) {
281 case 'c':
282 *s++ = va_arg(vargs, int);
283 break;
284 case 'd':
285 if (longflag)
286 sprintf(s, "%ld", va_arg(vargs, long));
287 else if (size_tflag)
288 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
289 va_arg(vargs, Py_ssize_t));
290 else
291 sprintf(s, "%d", va_arg(vargs, int));
292 s += strlen(s);
293 break;
294 case 'u':
295 if (longflag)
296 sprintf(s, "%lu",
297 va_arg(vargs, unsigned long));
298 else if (size_tflag)
299 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
300 va_arg(vargs, size_t));
301 else
302 sprintf(s, "%u",
303 va_arg(vargs, unsigned int));
304 s += strlen(s);
305 break;
306 case 'i':
307 sprintf(s, "%i", va_arg(vargs, int));
308 s += strlen(s);
309 break;
310 case 'x':
311 sprintf(s, "%x", va_arg(vargs, int));
312 s += strlen(s);
313 break;
314 case 's':
315 p = va_arg(vargs, char*);
316 i = strlen(p);
317 if (n > 0 && i > n)
318 i = n;
319 Py_MEMCPY(s, p, i);
320 s += i;
321 break;
322 case 'p':
323 sprintf(s, "%p", va_arg(vargs, void*));
324 /* %p is ill-defined: ensure leading 0x. */
325 if (s[1] == 'X')
326 s[1] = 'x';
327 else if (s[1] != 'x') {
328 memmove(s+2, s, strlen(s)+1);
329 s[0] = '0';
330 s[1] = 'x';
331 }
332 s += strlen(s);
333 break;
334 case '%':
335 *s++ = '%';
336 break;
337 default:
338 strcpy(s, p);
339 s += strlen(s);
340 goto end;
341 }
342 } else
343 *s++ = *f;
344 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000345
346 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000347 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
348 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000349}
350
351PyObject *
352PyBytes_FromFormat(const char *format, ...)
353{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000354 PyObject* ret;
355 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000356
357#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000358 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000359#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000360 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000362 ret = PyBytes_FromFormatV(format, vargs);
363 va_end(vargs);
364 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000365}
366
367static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000368bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000369{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000371}
372
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000373/* Unescape a backslash-escaped string. If unicode is non-zero,
374 the string is a u-literal. If recode_encoding is non-zero,
375 the string is UTF-8 encoded and should be re-encoded in the
376 specified encoding. */
377
378PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000379 Py_ssize_t len,
380 const char *errors,
381 Py_ssize_t unicode,
382 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000383{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000384 int c;
385 char *p, *buf;
386 const char *end;
387 PyObject *v;
388 Py_ssize_t newlen = recode_encoding ? 4*len:len;
389 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
390 if (v == NULL)
391 return NULL;
392 p = buf = PyBytes_AsString(v);
393 end = s + len;
394 while (s < end) {
395 if (*s != '\\') {
396 non_esc:
397 if (recode_encoding && (*s & 0x80)) {
398 PyObject *u, *w;
399 char *r;
400 const char* t;
401 Py_ssize_t rn;
402 t = s;
403 /* Decode non-ASCII bytes as UTF-8. */
404 while (t < end && (*t & 0x80)) t++;
405 u = PyUnicode_DecodeUTF8(s, t - s, errors);
406 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000407
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000408 /* Recode them in target encoding. */
409 w = PyUnicode_AsEncodedString(
410 u, recode_encoding, errors);
411 Py_DECREF(u);
412 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000413
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000414 /* Append bytes to output buffer. */
415 assert(PyBytes_Check(w));
416 r = PyBytes_AS_STRING(w);
417 rn = PyBytes_GET_SIZE(w);
418 Py_MEMCPY(p, r, rn);
419 p += rn;
420 Py_DECREF(w);
421 s = t;
422 } else {
423 *p++ = *s++;
424 }
425 continue;
426 }
427 s++;
428 if (s==end) {
429 PyErr_SetString(PyExc_ValueError,
430 "Trailing \\ in string");
431 goto failed;
432 }
433 switch (*s++) {
434 /* XXX This assumes ASCII! */
435 case '\n': break;
436 case '\\': *p++ = '\\'; break;
437 case '\'': *p++ = '\''; break;
438 case '\"': *p++ = '\"'; break;
439 case 'b': *p++ = '\b'; break;
440 case 'f': *p++ = '\014'; break; /* FF */
441 case 't': *p++ = '\t'; break;
442 case 'n': *p++ = '\n'; break;
443 case 'r': *p++ = '\r'; break;
444 case 'v': *p++ = '\013'; break; /* VT */
445 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
446 case '0': case '1': case '2': case '3':
447 case '4': case '5': case '6': case '7':
448 c = s[-1] - '0';
449 if (s < end && '0' <= *s && *s <= '7') {
450 c = (c<<3) + *s++ - '0';
451 if (s < end && '0' <= *s && *s <= '7')
452 c = (c<<3) + *s++ - '0';
453 }
454 *p++ = c;
455 break;
456 case 'x':
457 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
458 unsigned int x = 0;
459 c = Py_CHARMASK(*s);
460 s++;
461 if (ISDIGIT(c))
462 x = c - '0';
463 else if (ISLOWER(c))
464 x = 10 + c - 'a';
465 else
466 x = 10 + c - 'A';
467 x = x << 4;
468 c = Py_CHARMASK(*s);
469 s++;
470 if (ISDIGIT(c))
471 x += c - '0';
472 else if (ISLOWER(c))
473 x += 10 + c - 'a';
474 else
475 x += 10 + c - 'A';
476 *p++ = x;
477 break;
478 }
479 if (!errors || strcmp(errors, "strict") == 0) {
480 PyErr_SetString(PyExc_ValueError,
481 "invalid \\x escape");
482 goto failed;
483 }
484 if (strcmp(errors, "replace") == 0) {
485 *p++ = '?';
486 } else if (strcmp(errors, "ignore") == 0)
487 /* do nothing */;
488 else {
489 PyErr_Format(PyExc_ValueError,
490 "decoding error; unknown "
491 "error handling code: %.400s",
492 errors);
493 goto failed;
494 }
495 default:
496 *p++ = '\\';
497 s--;
498 goto non_esc; /* an arbitry number of unescaped
499 UTF-8 bytes may follow. */
500 }
501 }
502 if (p-buf < newlen)
503 _PyBytes_Resize(&v, p - buf);
504 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000505 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000506 Py_DECREF(v);
507 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000508}
509
510/* -------------------------------------------------------------------- */
511/* object api */
512
513Py_ssize_t
514PyBytes_Size(register PyObject *op)
515{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000516 if (!PyBytes_Check(op)) {
517 PyErr_Format(PyExc_TypeError,
518 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
519 return -1;
520 }
521 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000522}
523
524char *
525PyBytes_AsString(register PyObject *op)
526{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000527 if (!PyBytes_Check(op)) {
528 PyErr_Format(PyExc_TypeError,
529 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
530 return NULL;
531 }
532 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000533}
534
535int
536PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000537 register char **s,
538 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000539{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000540 if (s == NULL) {
541 PyErr_BadInternalCall();
542 return -1;
543 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000544
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000545 if (!PyBytes_Check(obj)) {
546 PyErr_Format(PyExc_TypeError,
547 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
548 return -1;
549 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000550
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000551 *s = PyBytes_AS_STRING(obj);
552 if (len != NULL)
553 *len = PyBytes_GET_SIZE(obj);
554 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
555 PyErr_SetString(PyExc_TypeError,
556 "expected bytes with no null");
557 return -1;
558 }
559 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000560}
Neal Norwitz6968b052007-02-27 19:02:19 +0000561
562/* -------------------------------------------------------------------- */
563/* Methods */
564
Eric Smith0923d1d2009-04-16 20:16:10 +0000565#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000566
567#include "stringlib/fastsearch.h"
568#include "stringlib/count.h"
569#include "stringlib/find.h"
570#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000571#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000572#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000573
Eric Smith0f78bff2009-11-30 01:01:42 +0000574#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000575
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000576PyObject *
577PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000578{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000579 static const char *hexdigits = "0123456789abcdef";
580 register PyBytesObject* op = (PyBytesObject*) obj;
581 Py_ssize_t length = Py_SIZE(op);
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000582 size_t newsize;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000583 PyObject *v;
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000584 if (length > (PY_SSIZE_T_MAX - 3) / 4) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000585 PyErr_SetString(PyExc_OverflowError,
586 "bytes object is too large to make repr");
587 return NULL;
588 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000589 newsize = 3 + 4 * length;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000590 v = PyUnicode_FromUnicode(NULL, newsize);
591 if (v == NULL) {
592 return NULL;
593 }
594 else {
595 register Py_ssize_t i;
596 register Py_UNICODE c;
597 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
598 int quote;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000599
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000600 /* Figure out which quote to use; single is preferred */
601 quote = '\'';
602 if (smartquotes) {
603 char *test, *start;
604 start = PyBytes_AS_STRING(op);
605 for (test = start; test < start+length; ++test) {
606 if (*test == '"') {
607 quote = '\''; /* back to single */
608 goto decided;
609 }
610 else if (*test == '\'')
611 quote = '"';
612 }
613 decided:
614 ;
615 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000616
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000617 *p++ = 'b', *p++ = quote;
618 for (i = 0; i < length; i++) {
619 /* There's at least enough room for a hex escape
620 and a closing quote. */
621 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
622 c = op->ob_sval[i];
623 if (c == quote || c == '\\')
624 *p++ = '\\', *p++ = c;
625 else if (c == '\t')
626 *p++ = '\\', *p++ = 't';
627 else if (c == '\n')
628 *p++ = '\\', *p++ = 'n';
629 else if (c == '\r')
630 *p++ = '\\', *p++ = 'r';
631 else if (c < ' ' || c >= 0x7f) {
632 *p++ = '\\';
633 *p++ = 'x';
634 *p++ = hexdigits[(c & 0xf0) >> 4];
635 *p++ = hexdigits[c & 0xf];
636 }
637 else
638 *p++ = c;
639 }
640 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
641 *p++ = quote;
642 *p = '\0';
643 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
644 Py_DECREF(v);
645 return NULL;
646 }
647 return v;
648 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000649}
650
Neal Norwitz6968b052007-02-27 19:02:19 +0000651static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000652bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000653{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000654 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000655}
656
Neal Norwitz6968b052007-02-27 19:02:19 +0000657static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000658bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000659{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000660 if (Py_BytesWarningFlag) {
661 if (PyErr_WarnEx(PyExc_BytesWarning,
662 "str() on a bytes instance", 1))
663 return NULL;
664 }
665 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000666}
667
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000668static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000669bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000670{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000671 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000672}
Neal Norwitz6968b052007-02-27 19:02:19 +0000673
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000674/* This is also used by PyBytes_Concat() */
675static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000676bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000677{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000678 Py_ssize_t size;
679 Py_buffer va, vb;
680 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000681
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000682 va.len = -1;
683 vb.len = -1;
684 if (_getbuffer(a, &va) < 0 ||
685 _getbuffer(b, &vb) < 0) {
686 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
687 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
688 goto done;
689 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000691 /* Optimize end cases */
692 if (va.len == 0 && PyBytes_CheckExact(b)) {
693 result = b;
694 Py_INCREF(result);
695 goto done;
696 }
697 if (vb.len == 0 && PyBytes_CheckExact(a)) {
698 result = a;
699 Py_INCREF(result);
700 goto done;
701 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000702
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000703 size = va.len + vb.len;
704 if (size < 0) {
705 PyErr_NoMemory();
706 goto done;
707 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000708
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000709 result = PyBytes_FromStringAndSize(NULL, size);
710 if (result != NULL) {
711 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
712 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
713 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000714
715 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000716 if (va.len != -1)
717 PyBuffer_Release(&va);
718 if (vb.len != -1)
719 PyBuffer_Release(&vb);
720 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000721}
Neal Norwitz6968b052007-02-27 19:02:19 +0000722
723static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000724bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000725{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000726 register Py_ssize_t i;
727 register Py_ssize_t j;
728 register Py_ssize_t size;
729 register PyBytesObject *op;
730 size_t nbytes;
731 if (n < 0)
732 n = 0;
733 /* watch out for overflows: the size can overflow int,
734 * and the # of bytes needed can overflow size_t
735 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000736 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000737 PyErr_SetString(PyExc_OverflowError,
738 "repeated bytes are too long");
739 return NULL;
740 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000741 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000742 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
743 Py_INCREF(a);
744 return (PyObject *)a;
745 }
746 nbytes = (size_t)size;
747 if (nbytes + PyBytesObject_SIZE <= nbytes) {
748 PyErr_SetString(PyExc_OverflowError,
749 "repeated bytes are too long");
750 return NULL;
751 }
752 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
753 if (op == NULL)
754 return PyErr_NoMemory();
755 PyObject_INIT_VAR(op, &PyBytes_Type, size);
756 op->ob_shash = -1;
757 op->ob_sval[size] = '\0';
758 if (Py_SIZE(a) == 1 && n > 0) {
759 memset(op->ob_sval, a->ob_sval[0] , n);
760 return (PyObject *) op;
761 }
762 i = 0;
763 if (i < size) {
764 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
765 i = Py_SIZE(a);
766 }
767 while (i < size) {
768 j = (i <= size-i) ? i : size-i;
769 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
770 i += j;
771 }
772 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000773}
774
Guido van Rossum98297ee2007-11-06 21:34:58 +0000775static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000776bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000777{
778 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
779 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000780 Py_buffer varg;
781 int pos;
782 PyErr_Clear();
783 if (_getbuffer(arg, &varg) < 0)
784 return -1;
785 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
786 varg.buf, varg.len, 0);
787 PyBuffer_Release(&varg);
788 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000789 }
790 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000791 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
792 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000793 }
794
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000795 return memchr(PyBytes_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000796}
797
Neal Norwitz6968b052007-02-27 19:02:19 +0000798static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000799bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000800{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000801 if (i < 0 || i >= Py_SIZE(a)) {
802 PyErr_SetString(PyExc_IndexError, "index out of range");
803 return NULL;
804 }
805 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000806}
807
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000808static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000809bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000810{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000811 int c;
812 Py_ssize_t len_a, len_b;
813 Py_ssize_t min_len;
814 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000815
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000816 /* Make sure both arguments are strings. */
817 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
818 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
819 (PyObject_IsInstance((PyObject*)a,
820 (PyObject*)&PyUnicode_Type) ||
821 PyObject_IsInstance((PyObject*)b,
822 (PyObject*)&PyUnicode_Type))) {
823 if (PyErr_WarnEx(PyExc_BytesWarning,
824 "Comparison between bytes and string", 1))
825 return NULL;
826 }
827 result = Py_NotImplemented;
828 goto out;
829 }
830 if (a == b) {
831 switch (op) {
832 case Py_EQ:case Py_LE:case Py_GE:
833 result = Py_True;
834 goto out;
835 case Py_NE:case Py_LT:case Py_GT:
836 result = Py_False;
837 goto out;
838 }
839 }
840 if (op == Py_EQ) {
841 /* Supporting Py_NE here as well does not save
842 much time, since Py_NE is rarely used. */
843 if (Py_SIZE(a) == Py_SIZE(b)
844 && (a->ob_sval[0] == b->ob_sval[0]
845 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
846 result = Py_True;
847 } else {
848 result = Py_False;
849 }
850 goto out;
851 }
852 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
853 min_len = (len_a < len_b) ? len_a : len_b;
854 if (min_len > 0) {
855 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
856 if (c==0)
857 c = memcmp(a->ob_sval, b->ob_sval, min_len);
858 } else
859 c = 0;
860 if (c == 0)
861 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
862 switch (op) {
863 case Py_LT: c = c < 0; break;
864 case Py_LE: c = c <= 0; break;
865 case Py_EQ: assert(0); break; /* unreachable */
866 case Py_NE: c = c != 0; break;
867 case Py_GT: c = c > 0; break;
868 case Py_GE: c = c >= 0; break;
869 default:
870 result = Py_NotImplemented;
871 goto out;
872 }
873 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000874 out:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000875 Py_INCREF(result);
876 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000877}
878
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000879static long
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000880bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000881{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000882 register Py_ssize_t len;
883 register unsigned char *p;
884 register long x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000885
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000886 if (a->ob_shash != -1)
887 return a->ob_shash;
888 len = Py_SIZE(a);
889 p = (unsigned char *) a->ob_sval;
890 x = *p << 7;
891 while (--len >= 0)
892 x = (1000003*x) ^ *p++;
893 x ^= Py_SIZE(a);
894 if (x == -1)
895 x = -2;
896 a->ob_shash = x;
897 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000898}
899
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000900static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000901bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000902{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000903 if (PyIndex_Check(item)) {
904 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
905 if (i == -1 && PyErr_Occurred())
906 return NULL;
907 if (i < 0)
908 i += PyBytes_GET_SIZE(self);
909 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
910 PyErr_SetString(PyExc_IndexError,
911 "index out of range");
912 return NULL;
913 }
914 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
915 }
916 else if (PySlice_Check(item)) {
917 Py_ssize_t start, stop, step, slicelength, cur, i;
918 char* source_buf;
919 char* result_buf;
920 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000921
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000922 if (PySlice_GetIndicesEx((PySliceObject*)item,
923 PyBytes_GET_SIZE(self),
924 &start, &stop, &step, &slicelength) < 0) {
925 return NULL;
926 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000927
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000928 if (slicelength <= 0) {
929 return PyBytes_FromStringAndSize("", 0);
930 }
931 else if (start == 0 && step == 1 &&
932 slicelength == PyBytes_GET_SIZE(self) &&
933 PyBytes_CheckExact(self)) {
934 Py_INCREF(self);
935 return (PyObject *)self;
936 }
937 else if (step == 1) {
938 return PyBytes_FromStringAndSize(
939 PyBytes_AS_STRING(self) + start,
940 slicelength);
941 }
942 else {
943 source_buf = PyBytes_AS_STRING(self);
944 result = PyBytes_FromStringAndSize(NULL, slicelength);
945 if (result == NULL)
946 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000947
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000948 result_buf = PyBytes_AS_STRING(result);
949 for (cur = start, i = 0; i < slicelength;
950 cur += step, i++) {
951 result_buf[i] = source_buf[cur];
952 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000953
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000954 return result;
955 }
956 }
957 else {
958 PyErr_Format(PyExc_TypeError,
959 "byte indices must be integers, not %.200s",
960 Py_TYPE(item)->tp_name);
961 return NULL;
962 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000963}
964
965static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000966bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000967{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000968 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
969 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000970}
971
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000972static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000973 (lenfunc)bytes_length, /*sq_length*/
974 (binaryfunc)bytes_concat, /*sq_concat*/
975 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
976 (ssizeargfunc)bytes_item, /*sq_item*/
977 0, /*sq_slice*/
978 0, /*sq_ass_item*/
979 0, /*sq_ass_slice*/
980 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000981};
982
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000983static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000984 (lenfunc)bytes_length,
985 (binaryfunc)bytes_subscript,
986 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000987};
988
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000989static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000990 (getbufferproc)bytes_buffer_getbuffer,
991 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000992};
993
994
995#define LEFTSTRIP 0
996#define RIGHTSTRIP 1
997#define BOTHSTRIP 2
998
999/* Arrays indexed by above */
1000static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1001
1002#define STRIPNAME(i) (stripformat[i]+3)
1003
Neal Norwitz6968b052007-02-27 19:02:19 +00001004PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001005"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001006\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001007Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001008If sep is not specified or is None, B is split on ASCII whitespace\n\
1009characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001010If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001011
1012static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001013bytes_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001014{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001015 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1016 Py_ssize_t maxsplit = -1;
1017 const char *s = PyBytes_AS_STRING(self), *sub;
1018 Py_buffer vsub;
1019 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001020
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001021 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1022 return NULL;
1023 if (maxsplit < 0)
1024 maxsplit = PY_SSIZE_T_MAX;
1025 if (subobj == Py_None)
1026 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1027 if (_getbuffer(subobj, &vsub) < 0)
1028 return NULL;
1029 sub = vsub.buf;
1030 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001031
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001032 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1033 PyBuffer_Release(&vsub);
1034 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001035}
1036
Neal Norwitz6968b052007-02-27 19:02:19 +00001037PyDoc_STRVAR(partition__doc__,
1038"B.partition(sep) -> (head, sep, tail)\n\
1039\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001040Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001041the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001042found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001043
1044static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001045bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001046{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001047 const char *sep;
1048 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001049
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001050 if (PyBytes_Check(sep_obj)) {
1051 sep = PyBytes_AS_STRING(sep_obj);
1052 sep_len = PyBytes_GET_SIZE(sep_obj);
1053 }
1054 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1055 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001056
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001057 return stringlib_partition(
1058 (PyObject*) self,
1059 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1060 sep_obj, sep, sep_len
1061 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001062}
1063
1064PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001065"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001066\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001067Search for the separator sep in B, starting at the end of B,\n\
1068and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001069part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001070bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001071
1072static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001073bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001074{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001075 const char *sep;
1076 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001077
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001078 if (PyBytes_Check(sep_obj)) {
1079 sep = PyBytes_AS_STRING(sep_obj);
1080 sep_len = PyBytes_GET_SIZE(sep_obj);
1081 }
1082 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1083 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001085 return stringlib_rpartition(
1086 (PyObject*) self,
1087 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1088 sep_obj, sep, sep_len
1089 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001090}
1091
Neal Norwitz6968b052007-02-27 19:02:19 +00001092PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001093"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001094\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001095Return a list of the sections in B, using sep as the delimiter,\n\
1096starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001097If sep is not given, B is split on ASCII whitespace characters\n\
1098(space, tab, return, newline, formfeed, vertical tab).\n\
1099If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001100
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001101
Neal Norwitz6968b052007-02-27 19:02:19 +00001102static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001103bytes_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001104{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001105 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1106 Py_ssize_t maxsplit = -1;
1107 const char *s = PyBytes_AS_STRING(self), *sub;
1108 Py_buffer vsub;
1109 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001110
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001111 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1112 return NULL;
1113 if (maxsplit < 0)
1114 maxsplit = PY_SSIZE_T_MAX;
1115 if (subobj == Py_None)
1116 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1117 if (_getbuffer(subobj, &vsub) < 0)
1118 return NULL;
1119 sub = vsub.buf;
1120 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001121
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001122 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1123 PyBuffer_Release(&vsub);
1124 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001125}
1126
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001127
1128PyDoc_STRVAR(join__doc__,
1129"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001130\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001131Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001132Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1133
Neal Norwitz6968b052007-02-27 19:02:19 +00001134static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001135bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001136{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001137 char *sep = PyBytes_AS_STRING(self);
1138 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1139 PyObject *res = NULL;
1140 char *p;
1141 Py_ssize_t seqlen = 0;
1142 size_t sz = 0;
1143 Py_ssize_t i;
1144 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001145
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001146 seq = PySequence_Fast(orig, "");
1147 if (seq == NULL) {
1148 return NULL;
1149 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001150
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001151 seqlen = PySequence_Size(seq);
1152 if (seqlen == 0) {
1153 Py_DECREF(seq);
1154 return PyBytes_FromString("");
1155 }
1156 if (seqlen == 1) {
1157 item = PySequence_Fast_GET_ITEM(seq, 0);
1158 if (PyBytes_CheckExact(item)) {
1159 Py_INCREF(item);
1160 Py_DECREF(seq);
1161 return item;
1162 }
1163 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001164
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001165 /* There are at least two things to join, or else we have a subclass
1166 * of the builtin types in the sequence.
1167 * Do a pre-pass to figure out the total amount of space we'll
1168 * need (sz), and see whether all argument are bytes.
1169 */
1170 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1171 for (i = 0; i < seqlen; i++) {
1172 const size_t old_sz = sz;
1173 item = PySequence_Fast_GET_ITEM(seq, i);
1174 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1175 PyErr_Format(PyExc_TypeError,
1176 "sequence item %zd: expected bytes,"
1177 " %.80s found",
1178 i, Py_TYPE(item)->tp_name);
1179 Py_DECREF(seq);
1180 return NULL;
1181 }
1182 sz += Py_SIZE(item);
1183 if (i != 0)
1184 sz += seplen;
1185 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1186 PyErr_SetString(PyExc_OverflowError,
1187 "join() result is too long for bytes");
1188 Py_DECREF(seq);
1189 return NULL;
1190 }
1191 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001192
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001193 /* Allocate result space. */
1194 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1195 if (res == NULL) {
1196 Py_DECREF(seq);
1197 return NULL;
1198 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001199
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001200 /* Catenate everything. */
1201 /* I'm not worried about a PyByteArray item growing because there's
1202 nowhere in this function where we release the GIL. */
1203 p = PyBytes_AS_STRING(res);
1204 for (i = 0; i < seqlen; ++i) {
1205 size_t n;
1206 char *q;
1207 if (i) {
1208 Py_MEMCPY(p, sep, seplen);
1209 p += seplen;
1210 }
1211 item = PySequence_Fast_GET_ITEM(seq, i);
1212 n = Py_SIZE(item);
1213 if (PyBytes_Check(item))
1214 q = PyBytes_AS_STRING(item);
1215 else
1216 q = PyByteArray_AS_STRING(item);
1217 Py_MEMCPY(p, q, n);
1218 p += n;
1219 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001220
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001221 Py_DECREF(seq);
1222 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001223}
1224
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001225PyObject *
1226_PyBytes_Join(PyObject *sep, PyObject *x)
1227{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001228 assert(sep != NULL && PyBytes_Check(sep));
1229 assert(x != NULL);
1230 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001231}
1232
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001233/* helper macro to fixup start/end slice values */
1234#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001235 if (end > len) \
1236 end = len; \
1237 else if (end < 0) { \
1238 end += len; \
1239 if (end < 0) \
1240 end = 0; \
1241 } \
1242 if (start < 0) { \
1243 start += len; \
1244 if (start < 0) \
1245 start = 0; \
1246 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001247
1248Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001249bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001250{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001251 PyObject *subobj;
1252 const char *sub;
1253 Py_ssize_t sub_len;
1254 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1255 PyObject *obj_start=Py_None, *obj_end=Py_None;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001256
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001257 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1258 &obj_start, &obj_end))
1259 return -2;
1260 /* To support None in "start" and "end" arguments, meaning
1261 the same as if they were not passed.
1262 */
1263 if (obj_start != Py_None)
1264 if (!_PyEval_SliceIndex(obj_start, &start))
1265 return -2;
1266 if (obj_end != Py_None)
1267 if (!_PyEval_SliceIndex(obj_end, &end))
1268 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001269
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001270 if (PyBytes_Check(subobj)) {
1271 sub = PyBytes_AS_STRING(subobj);
1272 sub_len = PyBytes_GET_SIZE(subobj);
1273 }
1274 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1275 /* XXX - the "expected a character buffer object" is pretty
1276 confusing for a non-expert. remap to something else ? */
1277 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001278
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001279 if (dir > 0)
1280 return stringlib_find_slice(
1281 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1282 sub, sub_len, start, end);
1283 else
1284 return stringlib_rfind_slice(
1285 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1286 sub, sub_len, start, end);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001287}
1288
1289
1290PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001291"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001292\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001293Return the lowest index in B where substring sub is found,\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001294such that sub is contained within s[start:end]. Optional\n\
1295arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001296\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001297Return -1 on failure.");
1298
Neal Norwitz6968b052007-02-27 19:02:19 +00001299static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001300bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001301{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001302 Py_ssize_t result = bytes_find_internal(self, args, +1);
1303 if (result == -2)
1304 return NULL;
1305 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001306}
1307
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001308
1309PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001310"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001311\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001312Like B.find() but raise ValueError when the substring is not found.");
1313
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001314static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001315bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001316{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001317 Py_ssize_t result = bytes_find_internal(self, args, +1);
1318 if (result == -2)
1319 return NULL;
1320 if (result == -1) {
1321 PyErr_SetString(PyExc_ValueError,
1322 "substring not found");
1323 return NULL;
1324 }
1325 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001326}
1327
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001328
1329PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001330"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001331\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001332Return the highest index in B where substring sub is found,\n\
1333such that sub is contained within s[start:end]. Optional\n\
1334arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001335\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001336Return -1 on failure.");
1337
Neal Norwitz6968b052007-02-27 19:02:19 +00001338static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001339bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001340{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001341 Py_ssize_t result = bytes_find_internal(self, args, -1);
1342 if (result == -2)
1343 return NULL;
1344 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001345}
1346
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001347
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001348PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001349"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001350\n\
1351Like B.rfind() but raise ValueError when the substring is not found.");
1352
1353static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001354bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001355{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001356 Py_ssize_t result = bytes_find_internal(self, args, -1);
1357 if (result == -2)
1358 return NULL;
1359 if (result == -1) {
1360 PyErr_SetString(PyExc_ValueError,
1361 "substring not found");
1362 return NULL;
1363 }
1364 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001365}
1366
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001367
1368Py_LOCAL_INLINE(PyObject *)
1369do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001370{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001371 Py_buffer vsep;
1372 char *s = PyBytes_AS_STRING(self);
1373 Py_ssize_t len = PyBytes_GET_SIZE(self);
1374 char *sep;
1375 Py_ssize_t seplen;
1376 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001377
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001378 if (_getbuffer(sepobj, &vsep) < 0)
1379 return NULL;
1380 sep = vsep.buf;
1381 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001382
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001383 i = 0;
1384 if (striptype != RIGHTSTRIP) {
1385 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1386 i++;
1387 }
1388 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001389
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 j = len;
1391 if (striptype != LEFTSTRIP) {
1392 do {
1393 j--;
1394 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1395 j++;
1396 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001397
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001398 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001399
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001400 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1401 Py_INCREF(self);
1402 return (PyObject*)self;
1403 }
1404 else
1405 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001406}
1407
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001408
1409Py_LOCAL_INLINE(PyObject *)
1410do_strip(PyBytesObject *self, int striptype)
1411{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001412 char *s = PyBytes_AS_STRING(self);
1413 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001414
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 i = 0;
1416 if (striptype != RIGHTSTRIP) {
1417 while (i < len && ISSPACE(s[i])) {
1418 i++;
1419 }
1420 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 j = len;
1423 if (striptype != LEFTSTRIP) {
1424 do {
1425 j--;
1426 } while (j >= i && ISSPACE(s[j]));
1427 j++;
1428 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001429
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1431 Py_INCREF(self);
1432 return (PyObject*)self;
1433 }
1434 else
1435 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001436}
1437
1438
1439Py_LOCAL_INLINE(PyObject *)
1440do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1441{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001443
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001444 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1445 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001446
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001447 if (sep != NULL && sep != Py_None) {
1448 return do_xstrip(self, striptype, sep);
1449 }
1450 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001451}
1452
1453
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001454PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001455"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001456\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001457Strip leading and trailing bytes contained in the argument.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001458If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001459static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001460bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001461{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001462 if (PyTuple_GET_SIZE(args) == 0)
1463 return do_strip(self, BOTHSTRIP); /* Common case */
1464 else
1465 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001466}
1467
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001468
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001469PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001470"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001471\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001472Strip leading bytes contained in the argument.\n\
1473If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001474static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001475bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001476{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001477 if (PyTuple_GET_SIZE(args) == 0)
1478 return do_strip(self, LEFTSTRIP); /* Common case */
1479 else
1480 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001481}
1482
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001483
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001484PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001485"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001486\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001487Strip trailing bytes contained in the argument.\n\
1488If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001489static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001490bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001491{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001492 if (PyTuple_GET_SIZE(args) == 0)
1493 return do_strip(self, RIGHTSTRIP); /* Common case */
1494 else
1495 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001496}
Neal Norwitz6968b052007-02-27 19:02:19 +00001497
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001498
1499PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001500"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001501\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001502Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001503string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001504as in slice notation.");
1505
1506static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001507bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001508{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001509 PyObject *sub_obj;
1510 const char *str = PyBytes_AS_STRING(self), *sub;
1511 Py_ssize_t sub_len;
1512 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001513
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001514 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1515 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1516 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001517
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001518 if (PyBytes_Check(sub_obj)) {
1519 sub = PyBytes_AS_STRING(sub_obj);
1520 sub_len = PyBytes_GET_SIZE(sub_obj);
1521 }
1522 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1523 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001524
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001525 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001526
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001527 return PyLong_FromSsize_t(
1528 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1529 );
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001530}
1531
1532
1533PyDoc_STRVAR(translate__doc__,
1534"B.translate(table[, deletechars]) -> bytes\n\
1535\n\
1536Return a copy of B, where all characters occurring in the\n\
1537optional argument deletechars are removed, and the remaining\n\
1538characters have been mapped through the given translation\n\
1539table, which must be a bytes object of length 256.");
1540
1541static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001542bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001543{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001544 register char *input, *output;
1545 const char *table;
1546 register Py_ssize_t i, c, changed = 0;
1547 PyObject *input_obj = (PyObject*)self;
1548 const char *output_start, *del_table=NULL;
1549 Py_ssize_t inlen, tablen, dellen = 0;
1550 PyObject *result;
1551 int trans_table[256];
1552 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001553
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001554 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1555 &tableobj, &delobj))
1556 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001557
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001558 if (PyBytes_Check(tableobj)) {
1559 table = PyBytes_AS_STRING(tableobj);
1560 tablen = PyBytes_GET_SIZE(tableobj);
1561 }
1562 else if (tableobj == Py_None) {
1563 table = NULL;
1564 tablen = 256;
1565 }
1566 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1567 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001568
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001569 if (tablen != 256) {
1570 PyErr_SetString(PyExc_ValueError,
1571 "translation table must be 256 characters long");
1572 return NULL;
1573 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001574
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001575 if (delobj != NULL) {
1576 if (PyBytes_Check(delobj)) {
1577 del_table = PyBytes_AS_STRING(delobj);
1578 dellen = PyBytes_GET_SIZE(delobj);
1579 }
1580 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1581 return NULL;
1582 }
1583 else {
1584 del_table = NULL;
1585 dellen = 0;
1586 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001587
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001588 inlen = PyBytes_GET_SIZE(input_obj);
1589 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1590 if (result == NULL)
1591 return NULL;
1592 output_start = output = PyBytes_AsString(result);
1593 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001594
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001595 if (dellen == 0 && table != NULL) {
1596 /* If no deletions are required, use faster code */
1597 for (i = inlen; --i >= 0; ) {
1598 c = Py_CHARMASK(*input++);
1599 if (Py_CHARMASK((*output++ = table[c])) != c)
1600 changed = 1;
1601 }
1602 if (changed || !PyBytes_CheckExact(input_obj))
1603 return result;
1604 Py_DECREF(result);
1605 Py_INCREF(input_obj);
1606 return input_obj;
1607 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001608
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001609 if (table == NULL) {
1610 for (i = 0; i < 256; i++)
1611 trans_table[i] = Py_CHARMASK(i);
1612 } else {
1613 for (i = 0; i < 256; i++)
1614 trans_table[i] = Py_CHARMASK(table[i]);
1615 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001616
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001617 for (i = 0; i < dellen; i++)
1618 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001619
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001620 for (i = inlen; --i >= 0; ) {
1621 c = Py_CHARMASK(*input++);
1622 if (trans_table[c] != -1)
1623 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1624 continue;
1625 changed = 1;
1626 }
1627 if (!changed && PyBytes_CheckExact(input_obj)) {
1628 Py_DECREF(result);
1629 Py_INCREF(input_obj);
1630 return input_obj;
1631 }
1632 /* Fix the size of the resulting string */
1633 if (inlen > 0)
1634 _PyBytes_Resize(&result, output - output_start);
1635 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001636}
1637
1638
Georg Brandlabc38772009-04-12 15:51:51 +00001639static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001640bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001641{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001642 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001643}
1644
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001645/* find and count characters and substrings */
1646
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001647#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001648 ((char *)memchr((const void *)(target), c, target_len))
1649
1650/* String ops must return a string. */
1651/* If the object is subclass of string, create a copy */
1652Py_LOCAL(PyBytesObject *)
1653return_self(PyBytesObject *self)
1654{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001655 if (PyBytes_CheckExact(self)) {
1656 Py_INCREF(self);
1657 return self;
1658 }
1659 return (PyBytesObject *)PyBytes_FromStringAndSize(
1660 PyBytes_AS_STRING(self),
1661 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001662}
1663
1664Py_LOCAL_INLINE(Py_ssize_t)
1665countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1666{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001667 Py_ssize_t count=0;
1668 const char *start=target;
1669 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001670
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001671 while ( (start=findchar(start, end-start, c)) != NULL ) {
1672 count++;
1673 if (count >= maxcount)
1674 break;
1675 start += 1;
1676 }
1677 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001678}
1679
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001680
1681/* Algorithms for different cases of string replacement */
1682
1683/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1684Py_LOCAL(PyBytesObject *)
1685replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001686 const char *to_s, Py_ssize_t to_len,
1687 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001688{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001689 char *self_s, *result_s;
1690 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001691 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001692 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001693
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001694 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001695
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001696 /* 1 at the end plus 1 after every character;
1697 count = min(maxcount, self_len + 1) */
1698 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001699 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001700 else
1701 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1702 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001703
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001704 /* Check for overflow */
1705 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001706 assert(count > 0);
1707 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001708 PyErr_SetString(PyExc_OverflowError,
1709 "replacement bytes are too long");
1710 return NULL;
1711 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001712 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001713
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001714 if (! (result = (PyBytesObject *)
1715 PyBytes_FromStringAndSize(NULL, result_len)) )
1716 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001717
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001718 self_s = PyBytes_AS_STRING(self);
1719 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001720
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001721 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001722
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001723 /* Lay the first one down (guaranteed this will occur) */
1724 Py_MEMCPY(result_s, to_s, to_len);
1725 result_s += to_len;
1726 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001727
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001728 for (i=0; i<count; i++) {
1729 *result_s++ = *self_s++;
1730 Py_MEMCPY(result_s, to_s, to_len);
1731 result_s += to_len;
1732 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001733
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001734 /* Copy the rest of the original string */
1735 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001736
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001737 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001738}
1739
1740/* Special case for deleting a single character */
1741/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1742Py_LOCAL(PyBytesObject *)
1743replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001744 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001745{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001746 char *self_s, *result_s;
1747 char *start, *next, *end;
1748 Py_ssize_t self_len, result_len;
1749 Py_ssize_t count;
1750 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001751
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001752 self_len = PyBytes_GET_SIZE(self);
1753 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001754
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001755 count = countchar(self_s, self_len, from_c, maxcount);
1756 if (count == 0) {
1757 return return_self(self);
1758 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001759
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001760 result_len = self_len - count; /* from_len == 1 */
1761 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001762
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001763 if ( (result = (PyBytesObject *)
1764 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1765 return NULL;
1766 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001767
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001768 start = self_s;
1769 end = self_s + self_len;
1770 while (count-- > 0) {
1771 next = findchar(start, end-start, from_c);
1772 if (next == NULL)
1773 break;
1774 Py_MEMCPY(result_s, start, next-start);
1775 result_s += (next-start);
1776 start = next+1;
1777 }
1778 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001779
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001780 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001781}
1782
1783/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1784
1785Py_LOCAL(PyBytesObject *)
1786replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001787 const char *from_s, Py_ssize_t from_len,
1788 Py_ssize_t maxcount) {
1789 char *self_s, *result_s;
1790 char *start, *next, *end;
1791 Py_ssize_t self_len, result_len;
1792 Py_ssize_t count, offset;
1793 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001794
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001795 self_len = PyBytes_GET_SIZE(self);
1796 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001797
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001798 count = stringlib_count(self_s, self_len,
1799 from_s, from_len,
1800 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001801
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001802 if (count == 0) {
1803 /* no matches */
1804 return return_self(self);
1805 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001806
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001807 result_len = self_len - (count * from_len);
1808 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001809
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001810 if ( (result = (PyBytesObject *)
1811 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1812 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001813
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001814 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001815
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001816 start = self_s;
1817 end = self_s + self_len;
1818 while (count-- > 0) {
1819 offset = stringlib_find(start, end-start,
1820 from_s, from_len,
1821 0);
1822 if (offset == -1)
1823 break;
1824 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001825
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001826 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001827
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001828 result_s += (next-start);
1829 start = next+from_len;
1830 }
1831 Py_MEMCPY(result_s, start, end-start);
1832 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001833}
1834
1835/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1836Py_LOCAL(PyBytesObject *)
1837replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001838 char from_c, char to_c,
1839 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001840{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001841 char *self_s, *result_s, *start, *end, *next;
1842 Py_ssize_t self_len;
1843 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001844
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001845 /* The result string will be the same size */
1846 self_s = PyBytes_AS_STRING(self);
1847 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001848
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001849 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001850
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001851 if (next == NULL) {
1852 /* No matches; return the original string */
1853 return return_self(self);
1854 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001855
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001856 /* Need to make a new string */
1857 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1858 if (result == NULL)
1859 return NULL;
1860 result_s = PyBytes_AS_STRING(result);
1861 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001862
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001863 /* change everything in-place, starting with this one */
1864 start = result_s + (next-self_s);
1865 *start = to_c;
1866 start++;
1867 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001868
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001869 while (--maxcount > 0) {
1870 next = findchar(start, end-start, from_c);
1871 if (next == NULL)
1872 break;
1873 *next = to_c;
1874 start = next+1;
1875 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001876
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001877 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001878}
1879
1880/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1881Py_LOCAL(PyBytesObject *)
1882replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001883 const char *from_s, Py_ssize_t from_len,
1884 const char *to_s, Py_ssize_t to_len,
1885 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001886{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001887 char *result_s, *start, *end;
1888 char *self_s;
1889 Py_ssize_t self_len, offset;
1890 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001891
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001892 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001893
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001894 self_s = PyBytes_AS_STRING(self);
1895 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001896
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001897 offset = stringlib_find(self_s, self_len,
1898 from_s, from_len,
1899 0);
1900 if (offset == -1) {
1901 /* No matches; return the original string */
1902 return return_self(self);
1903 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001904
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001905 /* Need to make a new string */
1906 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1907 if (result == NULL)
1908 return NULL;
1909 result_s = PyBytes_AS_STRING(result);
1910 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001911
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001912 /* change everything in-place, starting with this one */
1913 start = result_s + offset;
1914 Py_MEMCPY(start, to_s, from_len);
1915 start += from_len;
1916 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001917
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001918 while ( --maxcount > 0) {
1919 offset = stringlib_find(start, end-start,
1920 from_s, from_len,
1921 0);
1922 if (offset==-1)
1923 break;
1924 Py_MEMCPY(start+offset, to_s, from_len);
1925 start += offset+from_len;
1926 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001927
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001928 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001929}
1930
1931/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1932Py_LOCAL(PyBytesObject *)
1933replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001934 char from_c,
1935 const char *to_s, Py_ssize_t to_len,
1936 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001937{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001938 char *self_s, *result_s;
1939 char *start, *next, *end;
1940 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001941 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001942 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001943
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001944 self_s = PyBytes_AS_STRING(self);
1945 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001946
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001947 count = countchar(self_s, self_len, from_c, maxcount);
1948 if (count == 0) {
1949 /* no matches, return unchanged */
1950 return return_self(self);
1951 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001952
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001953 /* use the difference between current and new, hence the "-1" */
1954 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001955 assert(count > 0);
1956 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001957 PyErr_SetString(PyExc_OverflowError,
1958 "replacement bytes are too long");
1959 return NULL;
1960 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001961 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001962
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001963 if ( (result = (PyBytesObject *)
1964 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1965 return NULL;
1966 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001967
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001968 start = self_s;
1969 end = self_s + self_len;
1970 while (count-- > 0) {
1971 next = findchar(start, end-start, from_c);
1972 if (next == NULL)
1973 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001974
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001975 if (next == start) {
1976 /* replace with the 'to' */
1977 Py_MEMCPY(result_s, to_s, to_len);
1978 result_s += to_len;
1979 start += 1;
1980 } else {
1981 /* copy the unchanged old then the 'to' */
1982 Py_MEMCPY(result_s, start, next-start);
1983 result_s += (next-start);
1984 Py_MEMCPY(result_s, to_s, to_len);
1985 result_s += to_len;
1986 start = next+1;
1987 }
1988 }
1989 /* Copy the remainder of the remaining string */
1990 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001991
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001992 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001993}
1994
1995/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1996Py_LOCAL(PyBytesObject *)
1997replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001998 const char *from_s, Py_ssize_t from_len,
1999 const char *to_s, Py_ssize_t to_len,
2000 Py_ssize_t maxcount) {
2001 char *self_s, *result_s;
2002 char *start, *next, *end;
2003 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002004 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002005 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002006
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002007 self_s = PyBytes_AS_STRING(self);
2008 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002009
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002010 count = stringlib_count(self_s, self_len,
2011 from_s, from_len,
2012 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002013
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002014 if (count == 0) {
2015 /* no matches, return unchanged */
2016 return return_self(self);
2017 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002018
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002019 /* Check for overflow */
2020 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002021 assert(count > 0);
2022 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002023 PyErr_SetString(PyExc_OverflowError,
2024 "replacement bytes are too long");
2025 return NULL;
2026 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002027 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002028
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002029 if ( (result = (PyBytesObject *)
2030 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2031 return NULL;
2032 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002033
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002034 start = self_s;
2035 end = self_s + self_len;
2036 while (count-- > 0) {
2037 offset = stringlib_find(start, end-start,
2038 from_s, from_len,
2039 0);
2040 if (offset == -1)
2041 break;
2042 next = start+offset;
2043 if (next == start) {
2044 /* replace with the 'to' */
2045 Py_MEMCPY(result_s, to_s, to_len);
2046 result_s += to_len;
2047 start += from_len;
2048 } else {
2049 /* copy the unchanged old then the 'to' */
2050 Py_MEMCPY(result_s, start, next-start);
2051 result_s += (next-start);
2052 Py_MEMCPY(result_s, to_s, to_len);
2053 result_s += to_len;
2054 start = next+from_len;
2055 }
2056 }
2057 /* Copy the remainder of the remaining string */
2058 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002059
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002060 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002061}
2062
2063
2064Py_LOCAL(PyBytesObject *)
2065replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002066 const char *from_s, Py_ssize_t from_len,
2067 const char *to_s, Py_ssize_t to_len,
2068 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002069{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002070 if (maxcount < 0) {
2071 maxcount = PY_SSIZE_T_MAX;
2072 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2073 /* nothing to do; return the original string */
2074 return return_self(self);
2075 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002077 if (maxcount == 0 ||
2078 (from_len == 0 && to_len == 0)) {
2079 /* nothing to do; return the original string */
2080 return return_self(self);
2081 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002082
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002083 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002085 if (from_len == 0) {
2086 /* insert the 'to' string everywhere. */
2087 /* >>> "Python".replace("", ".") */
2088 /* '.P.y.t.h.o.n.' */
2089 return replace_interleave(self, to_s, to_len, maxcount);
2090 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002092 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2093 /* point for an empty self string to generate a non-empty string */
2094 /* Special case so the remaining code always gets a non-empty string */
2095 if (PyBytes_GET_SIZE(self) == 0) {
2096 return return_self(self);
2097 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002098
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002099 if (to_len == 0) {
2100 /* delete all occurrences of 'from' string */
2101 if (from_len == 1) {
2102 return replace_delete_single_character(
2103 self, from_s[0], maxcount);
2104 } else {
2105 return replace_delete_substring(self, from_s,
2106 from_len, maxcount);
2107 }
2108 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002109
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002110 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002111
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002112 if (from_len == to_len) {
2113 if (from_len == 1) {
2114 return replace_single_character_in_place(
2115 self,
2116 from_s[0],
2117 to_s[0],
2118 maxcount);
2119 } else {
2120 return replace_substring_in_place(
2121 self, from_s, from_len, to_s, to_len,
2122 maxcount);
2123 }
2124 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002125
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002126 /* Otherwise use the more generic algorithms */
2127 if (from_len == 1) {
2128 return replace_single_character(self, from_s[0],
2129 to_s, to_len, maxcount);
2130 } else {
2131 /* len('from')>=2, len('to')>=1 */
2132 return replace_substring(self, from_s, from_len, to_s, to_len,
2133 maxcount);
2134 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002135}
2136
2137PyDoc_STRVAR(replace__doc__,
2138"B.replace(old, new[, count]) -> bytes\n\
2139\n\
2140Return a copy of B with all occurrences of subsection\n\
2141old replaced by new. If the optional argument count is\n\
Senthil Kumaran77210b42010-08-09 08:56:25 +00002142positive, only the first count occurrences are replaced. A\n\
Senthil Kumaranf2de1ff2010-08-09 09:03:57 +00002143negative value of count replaces all occurrences");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002144
2145static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002146bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002147{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002148 Py_ssize_t count = -1;
2149 PyObject *from, *to;
2150 const char *from_s, *to_s;
2151 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002152
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002153 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2154 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002156 if (PyBytes_Check(from)) {
2157 from_s = PyBytes_AS_STRING(from);
2158 from_len = PyBytes_GET_SIZE(from);
2159 }
2160 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2161 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002162
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002163 if (PyBytes_Check(to)) {
2164 to_s = PyBytes_AS_STRING(to);
2165 to_len = PyBytes_GET_SIZE(to);
2166 }
2167 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2168 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002169
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002170 return (PyObject *)replace((PyBytesObject *) self,
2171 from_s, from_len,
2172 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002173}
2174
2175/** End DALKE **/
2176
2177/* Matches the end (direction >= 0) or start (direction < 0) of self
2178 * against substr, using the start and end arguments. Returns
2179 * -1 on error, 0 if not found and 1 if found.
2180 */
2181Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002182_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002183 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002184{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002185 Py_ssize_t len = PyBytes_GET_SIZE(self);
2186 Py_ssize_t slen;
2187 const char* sub;
2188 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002189
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002190 if (PyBytes_Check(substr)) {
2191 sub = PyBytes_AS_STRING(substr);
2192 slen = PyBytes_GET_SIZE(substr);
2193 }
2194 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2195 return -1;
2196 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002197
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002198 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002199
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002200 if (direction < 0) {
2201 /* startswith */
2202 if (start+slen > len)
2203 return 0;
2204 } else {
2205 /* endswith */
2206 if (end-start < slen || start > len)
2207 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002208
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002209 if (end-slen > start)
2210 start = end - slen;
2211 }
2212 if (end-start >= slen)
2213 return ! memcmp(str+start, sub, slen);
2214 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002215}
2216
2217
2218PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002219"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002220\n\
2221Return True if B starts with the specified prefix, False otherwise.\n\
2222With optional start, test B beginning at that position.\n\
2223With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002224prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002225
2226static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002227bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002228{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002229 Py_ssize_t start = 0;
2230 Py_ssize_t end = PY_SSIZE_T_MAX;
2231 PyObject *subobj;
2232 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002233
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002234 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2235 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2236 return NULL;
2237 if (PyTuple_Check(subobj)) {
2238 Py_ssize_t i;
2239 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2240 result = _bytes_tailmatch(self,
2241 PyTuple_GET_ITEM(subobj, i),
2242 start, end, -1);
2243 if (result == -1)
2244 return NULL;
2245 else if (result) {
2246 Py_RETURN_TRUE;
2247 }
2248 }
2249 Py_RETURN_FALSE;
2250 }
2251 result = _bytes_tailmatch(self, subobj, start, end, -1);
2252 if (result == -1)
2253 return NULL;
2254 else
2255 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002256}
2257
2258
2259PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002260"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002261\n\
2262Return True if B ends with the specified suffix, False otherwise.\n\
2263With optional start, test B beginning at that position.\n\
2264With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002265suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002266
2267static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002268bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002269{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002270 Py_ssize_t start = 0;
2271 Py_ssize_t end = PY_SSIZE_T_MAX;
2272 PyObject *subobj;
2273 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002274
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002275 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2276 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2277 return NULL;
2278 if (PyTuple_Check(subobj)) {
2279 Py_ssize_t i;
2280 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2281 result = _bytes_tailmatch(self,
2282 PyTuple_GET_ITEM(subobj, i),
2283 start, end, +1);
2284 if (result == -1)
2285 return NULL;
2286 else if (result) {
2287 Py_RETURN_TRUE;
2288 }
2289 }
2290 Py_RETURN_FALSE;
2291 }
2292 result = _bytes_tailmatch(self, subobj, start, end, +1);
2293 if (result == -1)
2294 return NULL;
2295 else
2296 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002297}
2298
2299
2300PyDoc_STRVAR(decode__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002301"B.decode([encoding[, errors]]) -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002302\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002303Decode B using the codec registered for encoding. encoding defaults\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002304to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002305handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2306a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002307as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002308able to handle UnicodeDecodeErrors.");
2309
2310static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002311bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002312{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002313 const char *encoding = NULL;
2314 const char *errors = NULL;
2315 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002316
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002317 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2318 return NULL;
2319 if (encoding == NULL)
2320 encoding = PyUnicode_GetDefaultEncoding();
2321 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002322}
2323
Guido van Rossum20188312006-05-05 15:15:40 +00002324
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002325PyDoc_STRVAR(splitlines__doc__,
2326"B.splitlines([keepends]) -> list of lines\n\
2327\n\
2328Return a list of the lines in B, breaking at line boundaries.\n\
2329Line breaks are not included in the resulting list unless keepends\n\
2330is given and true.");
2331
2332static PyObject*
2333bytes_splitlines(PyObject *self, PyObject *args)
2334{
2335 int keepends = 0;
2336
2337 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002338 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002339
2340 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002341 (PyObject*) self, PyBytes_AS_STRING(self),
2342 PyBytes_GET_SIZE(self), keepends
2343 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002344}
2345
2346
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002347PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002348"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002349\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002350Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002351Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002352Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002353
2354static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002355hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002356{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002357 if (c >= 128)
2358 return -1;
2359 if (ISDIGIT(c))
2360 return c - '0';
2361 else {
2362 if (ISUPPER(c))
2363 c = TOLOWER(c);
2364 if (c >= 'a' && c <= 'f')
2365 return c - 'a' + 10;
2366 }
2367 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002368}
2369
2370static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002371bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002372{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002373 PyObject *newstring, *hexobj;
2374 char *buf;
2375 Py_UNICODE *hex;
2376 Py_ssize_t hexlen, byteslen, i, j;
2377 int top, bot;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002378
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002379 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2380 return NULL;
2381 assert(PyUnicode_Check(hexobj));
2382 hexlen = PyUnicode_GET_SIZE(hexobj);
2383 hex = PyUnicode_AS_UNICODE(hexobj);
2384 byteslen = hexlen/2; /* This overestimates if there are spaces */
2385 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2386 if (!newstring)
2387 return NULL;
2388 buf = PyBytes_AS_STRING(newstring);
2389 for (i = j = 0; i < hexlen; i += 2) {
2390 /* skip over spaces in the input */
2391 while (hex[i] == ' ')
2392 i++;
2393 if (i >= hexlen)
2394 break;
2395 top = hex_digit_to_int(hex[i]);
2396 bot = hex_digit_to_int(hex[i+1]);
2397 if (top == -1 || bot == -1) {
2398 PyErr_Format(PyExc_ValueError,
2399 "non-hexadecimal number found in "
2400 "fromhex() arg at position %zd", i);
2401 goto error;
2402 }
2403 buf[j++] = (top << 4) + bot;
2404 }
2405 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2406 goto error;
2407 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002408
2409 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002410 Py_XDECREF(newstring);
2411 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002412}
2413
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002414PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002415"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002416
2417static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002418bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002419{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002420 Py_ssize_t res;
2421 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2422 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002423}
2424
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002425
2426static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002427bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002428{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002429 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002430}
2431
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002432
2433static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002434bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002435 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2436 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2437 _Py_capitalize__doc__},
2438 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2439 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2440 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2441 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2442 endswith__doc__},
2443 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2444 expandtabs__doc__},
2445 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2446 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2447 fromhex_doc},
2448 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2449 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2450 _Py_isalnum__doc__},
2451 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2452 _Py_isalpha__doc__},
2453 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2454 _Py_isdigit__doc__},
2455 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2456 _Py_islower__doc__},
2457 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2458 _Py_isspace__doc__},
2459 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2460 _Py_istitle__doc__},
2461 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2462 _Py_isupper__doc__},
2463 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2464 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2465 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2466 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2467 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2468 _Py_maketrans__doc__},
2469 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2470 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2471 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2472 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2473 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2474 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2475 rpartition__doc__},
2476 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2477 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
2478 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2479 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS,
2480 splitlines__doc__},
2481 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2482 startswith__doc__},
2483 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2484 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2485 _Py_swapcase__doc__},
2486 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2487 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2488 translate__doc__},
2489 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2490 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2491 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2492 sizeof__doc__},
2493 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002494};
2495
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002496static PyObject *
2497str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2498
2499static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002500bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002501{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002502 PyObject *x = NULL;
2503 const char *encoding = NULL;
2504 const char *errors = NULL;
2505 PyObject *new = NULL;
2506 Py_ssize_t size;
2507 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002508
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002509 if (type != &PyBytes_Type)
2510 return str_subtype_new(type, args, kwds);
2511 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2512 &encoding, &errors))
2513 return NULL;
2514 if (x == NULL) {
2515 if (encoding != NULL || errors != NULL) {
2516 PyErr_SetString(PyExc_TypeError,
2517 "encoding or errors without sequence "
2518 "argument");
2519 return NULL;
2520 }
2521 return PyBytes_FromString("");
2522 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002523
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002524 if (PyUnicode_Check(x)) {
2525 /* Encode via the codec registry */
2526 if (encoding == NULL) {
2527 PyErr_SetString(PyExc_TypeError,
2528 "string argument without an encoding");
2529 return NULL;
2530 }
2531 new = PyUnicode_AsEncodedString(x, encoding, errors);
2532 if (new == NULL)
2533 return NULL;
2534 assert(PyBytes_Check(new));
2535 return new;
2536 }
2537 /* Is it an integer? */
2538 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2539 if (size == -1 && PyErr_Occurred()) {
2540 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2541 return NULL;
2542 PyErr_Clear();
2543 }
2544 else if (size < 0) {
2545 PyErr_SetString(PyExc_ValueError, "negative count");
2546 return NULL;
2547 }
2548 else {
2549 new = PyBytes_FromStringAndSize(NULL, size);
2550 if (new == NULL) {
2551 return NULL;
2552 }
2553 if (size > 0) {
2554 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2555 }
2556 return new;
2557 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002558
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002559 /* If it's not unicode, there can't be encoding or errors */
2560 if (encoding != NULL || errors != NULL) {
2561 PyErr_SetString(PyExc_TypeError,
2562 "encoding or errors without a string argument");
2563 return NULL;
2564 }
2565 return PyObject_Bytes(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002566}
2567
2568PyObject *
2569PyBytes_FromObject(PyObject *x)
2570{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002571 PyObject *new, *it;
2572 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002573
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002574 if (x == NULL) {
2575 PyErr_BadInternalCall();
2576 return NULL;
2577 }
2578 /* Use the modern buffer interface */
2579 if (PyObject_CheckBuffer(x)) {
2580 Py_buffer view;
2581 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2582 return NULL;
2583 new = PyBytes_FromStringAndSize(NULL, view.len);
2584 if (!new)
2585 goto fail;
2586 /* XXX(brett.cannon): Better way to get to internal buffer? */
2587 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2588 &view, view.len, 'C') < 0)
2589 goto fail;
2590 PyBuffer_Release(&view);
2591 return new;
2592 fail:
2593 Py_XDECREF(new);
2594 PyBuffer_Release(&view);
2595 return NULL;
2596 }
2597 if (PyUnicode_Check(x)) {
2598 PyErr_SetString(PyExc_TypeError,
2599 "cannot convert unicode object to bytes");
2600 return NULL;
2601 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002602
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002603 if (PyList_CheckExact(x)) {
2604 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2605 if (new == NULL)
2606 return NULL;
2607 for (i = 0; i < Py_SIZE(x); i++) {
2608 Py_ssize_t value = PyNumber_AsSsize_t(
2609 PyList_GET_ITEM(x, i), PyExc_ValueError);
2610 if (value == -1 && PyErr_Occurred()) {
2611 Py_DECREF(new);
2612 return NULL;
2613 }
2614 if (value < 0 || value >= 256) {
2615 PyErr_SetString(PyExc_ValueError,
2616 "bytes must be in range(0, 256)");
2617 Py_DECREF(new);
2618 return NULL;
2619 }
2620 ((PyBytesObject *)new)->ob_sval[i] = value;
2621 }
2622 return new;
2623 }
2624 if (PyTuple_CheckExact(x)) {
2625 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2626 if (new == NULL)
2627 return NULL;
2628 for (i = 0; i < Py_SIZE(x); i++) {
2629 Py_ssize_t value = PyNumber_AsSsize_t(
2630 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2631 if (value == -1 && PyErr_Occurred()) {
2632 Py_DECREF(new);
2633 return NULL;
2634 }
2635 if (value < 0 || value >= 256) {
2636 PyErr_SetString(PyExc_ValueError,
2637 "bytes must be in range(0, 256)");
2638 Py_DECREF(new);
2639 return NULL;
2640 }
2641 ((PyBytesObject *)new)->ob_sval[i] = value;
2642 }
2643 return new;
2644 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002645
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002646 /* For iterator version, create a string object and resize as needed */
2647 size = _PyObject_LengthHint(x, 64);
2648 if (size == -1 && PyErr_Occurred())
2649 return NULL;
2650 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2651 returning a shared empty bytes string. This required because we
2652 want to call _PyBytes_Resize() the returned object, which we can
2653 only do on bytes objects with refcount == 1. */
2654 size += 1;
2655 new = PyBytes_FromStringAndSize(NULL, size);
2656 if (new == NULL)
2657 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002658
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002659 /* Get the iterator */
2660 it = PyObject_GetIter(x);
2661 if (it == NULL)
2662 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002663
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002664 /* Run the iterator to exhaustion */
2665 for (i = 0; ; i++) {
2666 PyObject *item;
2667 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002668
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002669 /* Get the next item */
2670 item = PyIter_Next(it);
2671 if (item == NULL) {
2672 if (PyErr_Occurred())
2673 goto error;
2674 break;
2675 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002676
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002677 /* Interpret it as an int (__index__) */
2678 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2679 Py_DECREF(item);
2680 if (value == -1 && PyErr_Occurred())
2681 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002682
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002683 /* Range check */
2684 if (value < 0 || value >= 256) {
2685 PyErr_SetString(PyExc_ValueError,
2686 "bytes must be in range(0, 256)");
2687 goto error;
2688 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002689
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002690 /* Append the byte */
2691 if (i >= size) {
2692 size = 2 * size + 1;
2693 if (_PyBytes_Resize(&new, size) < 0)
2694 goto error;
2695 }
2696 ((PyBytesObject *)new)->ob_sval[i] = value;
2697 }
2698 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002699
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002700 /* Clean up and return success */
2701 Py_DECREF(it);
2702 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002703
2704 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002705 /* Error handling when new != NULL */
2706 Py_XDECREF(it);
2707 Py_DECREF(new);
2708 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002709}
2710
2711static PyObject *
2712str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2713{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002714 PyObject *tmp, *pnew;
2715 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002716
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002717 assert(PyType_IsSubtype(type, &PyBytes_Type));
2718 tmp = bytes_new(&PyBytes_Type, args, kwds);
2719 if (tmp == NULL)
2720 return NULL;
2721 assert(PyBytes_CheckExact(tmp));
2722 n = PyBytes_GET_SIZE(tmp);
2723 pnew = type->tp_alloc(type, n);
2724 if (pnew != NULL) {
2725 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2726 PyBytes_AS_STRING(tmp), n+1);
2727 ((PyBytesObject *)pnew)->ob_shash =
2728 ((PyBytesObject *)tmp)->ob_shash;
2729 }
2730 Py_DECREF(tmp);
2731 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002732}
2733
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002734PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002735"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002736bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002737bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2738bytes(memory_view) -> bytes\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002739\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002740Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002741 - an iterable yielding integers in range(256)\n\
2742 - a text string encoded using the specified encoding\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002743 - a bytes or a buffer object\n\
2744 - any object implementing the buffer API.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002745
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002746static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002747
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002748PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002749 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2750 "bytes",
2751 PyBytesObject_SIZE,
2752 sizeof(char),
2753 bytes_dealloc, /* tp_dealloc */
2754 0, /* tp_print */
2755 0, /* tp_getattr */
2756 0, /* tp_setattr */
2757 0, /* tp_reserved */
2758 (reprfunc)bytes_repr, /* tp_repr */
2759 0, /* tp_as_number */
2760 &bytes_as_sequence, /* tp_as_sequence */
2761 &bytes_as_mapping, /* tp_as_mapping */
2762 (hashfunc)bytes_hash, /* tp_hash */
2763 0, /* tp_call */
2764 bytes_str, /* tp_str */
2765 PyObject_GenericGetAttr, /* tp_getattro */
2766 0, /* tp_setattro */
2767 &bytes_as_buffer, /* tp_as_buffer */
2768 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2769 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2770 bytes_doc, /* tp_doc */
2771 0, /* tp_traverse */
2772 0, /* tp_clear */
2773 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2774 0, /* tp_weaklistoffset */
2775 bytes_iter, /* tp_iter */
2776 0, /* tp_iternext */
2777 bytes_methods, /* tp_methods */
2778 0, /* tp_members */
2779 0, /* tp_getset */
2780 &PyBaseObject_Type, /* tp_base */
2781 0, /* tp_dict */
2782 0, /* tp_descr_get */
2783 0, /* tp_descr_set */
2784 0, /* tp_dictoffset */
2785 0, /* tp_init */
2786 0, /* tp_alloc */
2787 bytes_new, /* tp_new */
2788 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002789};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002790
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002791void
2792PyBytes_Concat(register PyObject **pv, register PyObject *w)
2793{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002794 register PyObject *v;
2795 assert(pv != NULL);
2796 if (*pv == NULL)
2797 return;
2798 if (w == NULL) {
2799 Py_DECREF(*pv);
2800 *pv = NULL;
2801 return;
2802 }
2803 v = bytes_concat(*pv, w);
2804 Py_DECREF(*pv);
2805 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002806}
2807
2808void
2809PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
2810{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002811 PyBytes_Concat(pv, w);
2812 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002813}
2814
2815
2816/* The following function breaks the notion that strings are immutable:
2817 it changes the size of a string. We get away with this only if there
2818 is only one module referencing the object. You can also think of it
2819 as creating a new string object and destroying the old one, only
2820 more efficiently. In any case, don't use this if the string may
2821 already be known to some other part of the code...
2822 Note that if there's not enough memory to resize the string, the original
2823 string object at *pv is deallocated, *pv is set to NULL, an "out of
2824 memory" exception is set, and -1 is returned. Else (on success) 0 is
2825 returned, and the value in *pv may or may not be the same as on input.
2826 As always, an extra byte is allocated for a trailing \0 byte (newsize
2827 does *not* include that), and a trailing \0 byte is stored.
2828*/
2829
2830int
2831_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2832{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002833 register PyObject *v;
2834 register PyBytesObject *sv;
2835 v = *pv;
2836 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2837 *pv = 0;
2838 Py_DECREF(v);
2839 PyErr_BadInternalCall();
2840 return -1;
2841 }
2842 /* XXX UNREF/NEWREF interface should be more symmetrical */
2843 _Py_DEC_REFTOTAL;
2844 _Py_ForgetReference(v);
2845 *pv = (PyObject *)
2846 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2847 if (*pv == NULL) {
2848 PyObject_Del(v);
2849 PyErr_NoMemory();
2850 return -1;
2851 }
2852 _Py_NewReference(*pv);
2853 sv = (PyBytesObject *) *pv;
2854 Py_SIZE(sv) = newsize;
2855 sv->ob_sval[newsize] = '\0';
2856 sv->ob_shash = -1; /* invalidate cached hash value */
2857 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002858}
2859
2860/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
2861 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2862 * Python's regular ints.
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002863 * Return value: a new PyBytes*, or NULL if error.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002864 * . *pbuf is set to point into it,
2865 * *plen set to the # of chars following that.
2866 * Caller must decref it when done using pbuf.
2867 * The string starting at *pbuf is of the form
2868 * "-"? ("0x" | "0X")? digit+
2869 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2870 * set in flags. The case of hex digits will be correct,
2871 * There will be at least prec digits, zero-filled on the left if
2872 * necessary to get that many.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002873 * val object to be converted
2874 * flags bitmask of format flags; only F_ALT is looked at
2875 * prec minimum number of digits; 0-fill on left if needed
2876 * type a character in [duoxX]; u acts the same as d
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002877 *
2878 * CAUTION: o, x and X conversions on regular ints can never
2879 * produce a '-' sign, but can for Python's unbounded ints.
2880 */
2881PyObject*
2882_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002883 char **pbuf, int *plen)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002884{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002885 PyObject *result = NULL;
2886 char *buf;
2887 Py_ssize_t i;
2888 int sign; /* 1 if '-', else 0 */
2889 int len; /* number of characters */
2890 Py_ssize_t llen;
2891 int numdigits; /* len == numnondigits + numdigits */
2892 int numnondigits = 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002893
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002894 /* Avoid exceeding SSIZE_T_MAX */
2895 if (prec > INT_MAX-3) {
2896 PyErr_SetString(PyExc_OverflowError,
2897 "precision too large");
2898 return NULL;
2899 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002900
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002901 switch (type) {
2902 case 'd':
2903 case 'u':
2904 /* Special-case boolean: we want 0/1 */
2905 if (PyBool_Check(val))
2906 result = PyNumber_ToBase(val, 10);
2907 else
2908 result = Py_TYPE(val)->tp_str(val);
2909 break;
2910 case 'o':
2911 numnondigits = 2;
2912 result = PyNumber_ToBase(val, 8);
2913 break;
2914 case 'x':
2915 case 'X':
2916 numnondigits = 2;
2917 result = PyNumber_ToBase(val, 16);
2918 break;
2919 default:
2920 assert(!"'type' not in [duoxX]");
2921 }
2922 if (!result)
2923 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002924
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002925 buf = _PyUnicode_AsString(result);
2926 if (!buf) {
2927 Py_DECREF(result);
2928 return NULL;
2929 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002930
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002931 /* To modify the string in-place, there can only be one reference. */
2932 if (Py_REFCNT(result) != 1) {
2933 PyErr_BadInternalCall();
2934 return NULL;
2935 }
2936 llen = PyUnicode_GetSize(result);
2937 if (llen > INT_MAX) {
2938 PyErr_SetString(PyExc_ValueError,
2939 "string too large in _PyBytes_FormatLong");
2940 return NULL;
2941 }
2942 len = (int)llen;
2943 if (buf[len-1] == 'L') {
2944 --len;
2945 buf[len] = '\0';
2946 }
2947 sign = buf[0] == '-';
2948 numnondigits += sign;
2949 numdigits = len - numnondigits;
2950 assert(numdigits > 0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002951
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002952 /* Get rid of base marker unless F_ALT */
2953 if (((flags & F_ALT) == 0 &&
2954 (type == 'o' || type == 'x' || type == 'X'))) {
2955 assert(buf[sign] == '0');
2956 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
2957 buf[sign+1] == 'o');
2958 numnondigits -= 2;
2959 buf += 2;
2960 len -= 2;
2961 if (sign)
2962 buf[0] = '-';
2963 assert(len == numnondigits + numdigits);
2964 assert(numdigits > 0);
2965 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002966
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002967 /* Fill with leading zeroes to meet minimum width. */
2968 if (prec > numdigits) {
2969 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
2970 numnondigits + prec);
2971 char *b1;
2972 if (!r1) {
2973 Py_DECREF(result);
2974 return NULL;
2975 }
2976 b1 = PyBytes_AS_STRING(r1);
2977 for (i = 0; i < numnondigits; ++i)
2978 *b1++ = *buf++;
2979 for (i = 0; i < prec - numdigits; i++)
2980 *b1++ = '0';
2981 for (i = 0; i < numdigits; i++)
2982 *b1++ = *buf++;
2983 *b1 = '\0';
2984 Py_DECREF(result);
2985 result = r1;
2986 buf = PyBytes_AS_STRING(result);
2987 len = numnondigits + prec;
2988 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002989
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002990 /* Fix up case for hex conversions. */
2991 if (type == 'X') {
2992 /* Need to convert all lower case letters to upper case.
2993 and need to convert 0x to 0X (and -0x to -0X). */
2994 for (i = 0; i < len; i++)
2995 if (buf[i] >= 'a' && buf[i] <= 'x')
2996 buf[i] -= 'a'-'A';
2997 }
2998 *pbuf = buf;
2999 *plen = len;
3000 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003001}
3002
3003void
3004PyBytes_Fini(void)
3005{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003006 int i;
3007 for (i = 0; i < UCHAR_MAX + 1; i++) {
3008 Py_XDECREF(characters[i]);
3009 characters[i] = NULL;
3010 }
3011 Py_XDECREF(nullstring);
3012 nullstring = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003013}
3014
Benjamin Peterson4116f362008-05-27 00:36:20 +00003015/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003016
3017typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003018 PyObject_HEAD
3019 Py_ssize_t it_index;
3020 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003021} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003022
3023static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003024striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003025{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003026 _PyObject_GC_UNTRACK(it);
3027 Py_XDECREF(it->it_seq);
3028 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003029}
3030
3031static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003032striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003033{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003034 Py_VISIT(it->it_seq);
3035 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003036}
3037
3038static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003039striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003040{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003041 PyBytesObject *seq;
3042 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003043
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003044 assert(it != NULL);
3045 seq = it->it_seq;
3046 if (seq == NULL)
3047 return NULL;
3048 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003049
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003050 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3051 item = PyLong_FromLong(
3052 (unsigned char)seq->ob_sval[it->it_index]);
3053 if (item != NULL)
3054 ++it->it_index;
3055 return item;
3056 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003057
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003058 Py_DECREF(seq);
3059 it->it_seq = NULL;
3060 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003061}
3062
3063static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003064striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003065{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003066 Py_ssize_t len = 0;
3067 if (it->it_seq)
3068 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3069 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003070}
3071
3072PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003073 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003074
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003075static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003076 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3077 length_hint_doc},
3078 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003079};
3080
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003081PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003082 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3083 "bytes_iterator", /* tp_name */
3084 sizeof(striterobject), /* tp_basicsize */
3085 0, /* tp_itemsize */
3086 /* methods */
3087 (destructor)striter_dealloc, /* tp_dealloc */
3088 0, /* tp_print */
3089 0, /* tp_getattr */
3090 0, /* tp_setattr */
3091 0, /* tp_reserved */
3092 0, /* tp_repr */
3093 0, /* tp_as_number */
3094 0, /* tp_as_sequence */
3095 0, /* tp_as_mapping */
3096 0, /* tp_hash */
3097 0, /* tp_call */
3098 0, /* tp_str */
3099 PyObject_GenericGetAttr, /* tp_getattro */
3100 0, /* tp_setattro */
3101 0, /* tp_as_buffer */
3102 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3103 0, /* tp_doc */
3104 (traverseproc)striter_traverse, /* tp_traverse */
3105 0, /* tp_clear */
3106 0, /* tp_richcompare */
3107 0, /* tp_weaklistoffset */
3108 PyObject_SelfIter, /* tp_iter */
3109 (iternextfunc)striter_next, /* tp_iternext */
3110 striter_methods, /* tp_methods */
3111 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003112};
3113
3114static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003115bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003116{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003117 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003118
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003119 if (!PyBytes_Check(seq)) {
3120 PyErr_BadInternalCall();
3121 return NULL;
3122 }
3123 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3124 if (it == NULL)
3125 return NULL;
3126 it->it_index = 0;
3127 Py_INCREF(seq);
3128 it->it_seq = (PyBytesObject *)seq;
3129 _PyObject_GC_TRACK(it);
3130 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003131}