blob: eb8036065a1cb7e446b206402aca27b0484fdd76 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroud1188562010-06-09 16:38:55 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
44 For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
45 parameter `size' denotes number of characters to allocate, not counting any
46 null terminating character.
47
48 For PyBytes_FromString(), the parameter `str' points to a null-terminated
49 string containing exactly `size' bytes.
50
51 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
52 either NULL or else points to a string containing at least `size' bytes.
53 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
54 not have to be null-terminated. (Therefore it is safe to construct a
55 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
56 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
57 bytes (setting the last byte to the null terminating character) and you can
58 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000059 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000060 alter the data yourself, since the strings may be shared.
61
62 The PyObject member `op->ob_size', which denotes the number of "extra
63 items" in a variable-size object, will contain the number of bytes
64 allocated for string data, not counting the null terminating character. It
65 is therefore equal to the equal to the `size' parameter (for
66 PyBytes_FromStringAndSize()) or the length of the string in the `str'
67 parameter (for PyBytes_FromString()).
68*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000069PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000071{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 register PyBytesObject *op;
73 if (size < 0) {
74 PyErr_SetString(PyExc_SystemError,
75 "Negative size passed to PyBytes_FromStringAndSize");
76 return NULL;
77 }
78 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000079#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000081#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85 if (size == 1 && str != NULL &&
86 (op = characters[*str & UCHAR_MAX]) != NULL)
87 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000088#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 Py_INCREF(op);
92 return (PyObject *)op;
93 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
96 PyErr_SetString(PyExc_OverflowError,
97 "byte string is too large");
98 return NULL;
99 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +0000100
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101 /* Inline PyObject_NewVar */
102 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
103 if (op == NULL)
104 return PyErr_NoMemory();
105 PyObject_INIT_VAR(op, &PyBytes_Type, size);
106 op->ob_shash = -1;
107 if (str != NULL)
108 Py_MEMCPY(op->ob_sval, str, size);
109 op->ob_sval[size] = '\0';
110 /* share short strings */
111 if (size == 0) {
112 nullstring = op;
113 Py_INCREF(op);
114 } else if (size == 1 && str != NULL) {
115 characters[*str & UCHAR_MAX] = op;
116 Py_INCREF(op);
117 }
118 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000119}
120
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000121PyObject *
122PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000123{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000124 register size_t size;
125 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000127 assert(str != NULL);
128 size = strlen(str);
129 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
130 PyErr_SetString(PyExc_OverflowError,
131 "byte string is too long");
132 return NULL;
133 }
134 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000135#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000136 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000137#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
141 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000144#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 /* Inline PyObject_NewVar */
150 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
151 if (op == NULL)
152 return PyErr_NoMemory();
153 PyObject_INIT_VAR(op, &PyBytes_Type, size);
154 op->ob_shash = -1;
155 Py_MEMCPY(op->ob_sval, str, size+1);
156 /* share short strings */
157 if (size == 0) {
158 nullstring = op;
159 Py_INCREF(op);
160 } else if (size == 1) {
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000165}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000166
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167PyObject *
168PyBytes_FromFormatV(const char *format, va_list vargs)
169{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000175
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000176#ifdef VA_LIST_IS_ARRAY
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 Py_MEMCPY(count, vargs, sizeof(va_list));
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000178#else
179#ifdef __va_copy
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 __va_copy(count, vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000181#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 count = vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000183#endif
184#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
187 if (*f == '%') {
188 const char* p = f;
189 while (*++f && *f != '%' && !ISALPHA(*f))
190 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000191
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000192 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
193 * they don't affect the amount of space we reserve.
194 */
195 if ((*f == 'l' || *f == 'z') &&
196 (f[1] == 'd' || f[1] == 'u'))
197 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000198
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000199 switch (*f) {
200 case 'c':
201 (void)va_arg(count, int);
202 /* fall through... */
203 case '%':
204 n++;
205 break;
206 case 'd': case 'u': case 'i': case 'x':
207 (void) va_arg(count, int);
208 /* 20 bytes is enough to hold a 64-bit
209 integer. Decimal takes the most space.
210 This isn't enough for octal. */
211 n += 20;
212 break;
213 case 's':
214 s = va_arg(count, char*);
215 n += strlen(s);
216 break;
217 case 'p':
218 (void) va_arg(count, int);
219 /* maximum 64-bit pointer representation:
220 * 0xffffffffffffffff
221 * so 19 characters is enough.
222 * XXX I count 18 -- what's the extra for?
223 */
224 n += 19;
225 break;
226 default:
227 /* if we stumble upon an unknown
228 formatting code, copy the rest of
229 the format string to the output
230 string. (we cannot just skip the
231 code, since there's no way to know
232 what's in the argument list) */
233 n += strlen(p);
234 goto expand;
235 }
236 } else
237 n++;
238 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000239 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000240 /* step 2: fill the buffer */
241 /* Since we've analyzed how much space we need for the worst case,
242 use sprintf directly instead of the slower PyOS_snprintf. */
243 string = PyBytes_FromStringAndSize(NULL, n);
244 if (!string)
245 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000246
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000247 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000248
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000249 for (f = format; *f; f++) {
250 if (*f == '%') {
251 const char* p = f++;
252 Py_ssize_t i;
253 int longflag = 0;
254 int size_tflag = 0;
255 /* parse the width.precision part (we're only
256 interested in the precision value, if any) */
257 n = 0;
258 while (ISDIGIT(*f))
259 n = (n*10) + *f++ - '0';
260 if (*f == '.') {
261 f++;
262 n = 0;
263 while (ISDIGIT(*f))
264 n = (n*10) + *f++ - '0';
265 }
266 while (*f && *f != '%' && !ISALPHA(*f))
267 f++;
268 /* handle the long flag, but only for %ld and %lu.
269 others can be added when necessary. */
270 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
271 longflag = 1;
272 ++f;
273 }
274 /* handle the size_t flag. */
275 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
276 size_tflag = 1;
277 ++f;
278 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000279
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000280 switch (*f) {
281 case 'c':
282 *s++ = va_arg(vargs, int);
283 break;
284 case 'd':
285 if (longflag)
286 sprintf(s, "%ld", va_arg(vargs, long));
287 else if (size_tflag)
288 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
289 va_arg(vargs, Py_ssize_t));
290 else
291 sprintf(s, "%d", va_arg(vargs, int));
292 s += strlen(s);
293 break;
294 case 'u':
295 if (longflag)
296 sprintf(s, "%lu",
297 va_arg(vargs, unsigned long));
298 else if (size_tflag)
299 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
300 va_arg(vargs, size_t));
301 else
302 sprintf(s, "%u",
303 va_arg(vargs, unsigned int));
304 s += strlen(s);
305 break;
306 case 'i':
307 sprintf(s, "%i", va_arg(vargs, int));
308 s += strlen(s);
309 break;
310 case 'x':
311 sprintf(s, "%x", va_arg(vargs, int));
312 s += strlen(s);
313 break;
314 case 's':
315 p = va_arg(vargs, char*);
316 i = strlen(p);
317 if (n > 0 && i > n)
318 i = n;
319 Py_MEMCPY(s, p, i);
320 s += i;
321 break;
322 case 'p':
323 sprintf(s, "%p", va_arg(vargs, void*));
324 /* %p is ill-defined: ensure leading 0x. */
325 if (s[1] == 'X')
326 s[1] = 'x';
327 else if (s[1] != 'x') {
328 memmove(s+2, s, strlen(s)+1);
329 s[0] = '0';
330 s[1] = 'x';
331 }
332 s += strlen(s);
333 break;
334 case '%':
335 *s++ = '%';
336 break;
337 default:
338 strcpy(s, p);
339 s += strlen(s);
340 goto end;
341 }
342 } else
343 *s++ = *f;
344 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000345
346 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000347 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
348 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000349}
350
351PyObject *
352PyBytes_FromFormat(const char *format, ...)
353{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000354 PyObject* ret;
355 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000356
357#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000358 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000359#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000360 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000362 ret = PyBytes_FromFormatV(format, vargs);
363 va_end(vargs);
364 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000365}
366
367static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000368bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000369{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000371}
372
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000373/* Unescape a backslash-escaped string. If unicode is non-zero,
374 the string is a u-literal. If recode_encoding is non-zero,
375 the string is UTF-8 encoded and should be re-encoded in the
376 specified encoding. */
377
378PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000379 Py_ssize_t len,
380 const char *errors,
381 Py_ssize_t unicode,
382 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000383{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000384 int c;
385 char *p, *buf;
386 const char *end;
387 PyObject *v;
388 Py_ssize_t newlen = recode_encoding ? 4*len:len;
389 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
390 if (v == NULL)
391 return NULL;
392 p = buf = PyBytes_AsString(v);
393 end = s + len;
394 while (s < end) {
395 if (*s != '\\') {
396 non_esc:
397 if (recode_encoding && (*s & 0x80)) {
398 PyObject *u, *w;
399 char *r;
400 const char* t;
401 Py_ssize_t rn;
402 t = s;
403 /* Decode non-ASCII bytes as UTF-8. */
404 while (t < end && (*t & 0x80)) t++;
405 u = PyUnicode_DecodeUTF8(s, t - s, errors);
406 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000407
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000408 /* Recode them in target encoding. */
409 w = PyUnicode_AsEncodedString(
410 u, recode_encoding, errors);
411 Py_DECREF(u);
412 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000413
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000414 /* Append bytes to output buffer. */
415 assert(PyBytes_Check(w));
416 r = PyBytes_AS_STRING(w);
417 rn = PyBytes_GET_SIZE(w);
418 Py_MEMCPY(p, r, rn);
419 p += rn;
420 Py_DECREF(w);
421 s = t;
422 } else {
423 *p++ = *s++;
424 }
425 continue;
426 }
427 s++;
428 if (s==end) {
429 PyErr_SetString(PyExc_ValueError,
430 "Trailing \\ in string");
431 goto failed;
432 }
433 switch (*s++) {
434 /* XXX This assumes ASCII! */
435 case '\n': break;
436 case '\\': *p++ = '\\'; break;
437 case '\'': *p++ = '\''; break;
438 case '\"': *p++ = '\"'; break;
439 case 'b': *p++ = '\b'; break;
440 case 'f': *p++ = '\014'; break; /* FF */
441 case 't': *p++ = '\t'; break;
442 case 'n': *p++ = '\n'; break;
443 case 'r': *p++ = '\r'; break;
444 case 'v': *p++ = '\013'; break; /* VT */
445 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
446 case '0': case '1': case '2': case '3':
447 case '4': case '5': case '6': case '7':
448 c = s[-1] - '0';
449 if (s < end && '0' <= *s && *s <= '7') {
450 c = (c<<3) + *s++ - '0';
451 if (s < end && '0' <= *s && *s <= '7')
452 c = (c<<3) + *s++ - '0';
453 }
454 *p++ = c;
455 break;
456 case 'x':
457 if (s+1 < end && ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
458 unsigned int x = 0;
459 c = Py_CHARMASK(*s);
460 s++;
461 if (ISDIGIT(c))
462 x = c - '0';
463 else if (ISLOWER(c))
464 x = 10 + c - 'a';
465 else
466 x = 10 + c - 'A';
467 x = x << 4;
468 c = Py_CHARMASK(*s);
469 s++;
470 if (ISDIGIT(c))
471 x += c - '0';
472 else if (ISLOWER(c))
473 x += 10 + c - 'a';
474 else
475 x += 10 + c - 'A';
476 *p++ = x;
477 break;
478 }
479 if (!errors || strcmp(errors, "strict") == 0) {
480 PyErr_SetString(PyExc_ValueError,
481 "invalid \\x escape");
482 goto failed;
483 }
484 if (strcmp(errors, "replace") == 0) {
485 *p++ = '?';
486 } else if (strcmp(errors, "ignore") == 0)
487 /* do nothing */;
488 else {
489 PyErr_Format(PyExc_ValueError,
490 "decoding error; unknown "
491 "error handling code: %.400s",
492 errors);
493 goto failed;
494 }
495 default:
496 *p++ = '\\';
497 s--;
498 goto non_esc; /* an arbitry number of unescaped
499 UTF-8 bytes may follow. */
500 }
501 }
502 if (p-buf < newlen)
503 _PyBytes_Resize(&v, p - buf);
504 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000505 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000506 Py_DECREF(v);
507 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000508}
509
510/* -------------------------------------------------------------------- */
511/* object api */
512
513Py_ssize_t
514PyBytes_Size(register PyObject *op)
515{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000516 if (!PyBytes_Check(op)) {
517 PyErr_Format(PyExc_TypeError,
518 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
519 return -1;
520 }
521 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000522}
523
524char *
525PyBytes_AsString(register PyObject *op)
526{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000527 if (!PyBytes_Check(op)) {
528 PyErr_Format(PyExc_TypeError,
529 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
530 return NULL;
531 }
532 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000533}
534
535int
536PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000537 register char **s,
538 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000539{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000540 if (s == NULL) {
541 PyErr_BadInternalCall();
542 return -1;
543 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000544
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000545 if (!PyBytes_Check(obj)) {
546 PyErr_Format(PyExc_TypeError,
547 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
548 return -1;
549 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000550
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000551 *s = PyBytes_AS_STRING(obj);
552 if (len != NULL)
553 *len = PyBytes_GET_SIZE(obj);
554 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
555 PyErr_SetString(PyExc_TypeError,
556 "expected bytes with no null");
557 return -1;
558 }
559 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000560}
Neal Norwitz6968b052007-02-27 19:02:19 +0000561
562/* -------------------------------------------------------------------- */
563/* Methods */
564
Eric Smith0923d1d2009-04-16 20:16:10 +0000565#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000566
567#include "stringlib/fastsearch.h"
568#include "stringlib/count.h"
569#include "stringlib/find.h"
570#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000571#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000572#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000573
Eric Smith0f78bff2009-11-30 01:01:42 +0000574#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000575
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000576PyObject *
577PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000578{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000579 static const char *hexdigits = "0123456789abcdef";
580 register PyBytesObject* op = (PyBytesObject*) obj;
581 Py_ssize_t length = Py_SIZE(op);
582 size_t newsize = 3 + 4 * length;
583 PyObject *v;
584 if (newsize > PY_SSIZE_T_MAX || (newsize-3) / 4 != length) {
585 PyErr_SetString(PyExc_OverflowError,
586 "bytes object is too large to make repr");
587 return NULL;
588 }
589 v = PyUnicode_FromUnicode(NULL, newsize);
590 if (v == NULL) {
591 return NULL;
592 }
593 else {
594 register Py_ssize_t i;
595 register Py_UNICODE c;
596 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
597 int quote;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000598
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000599 /* Figure out which quote to use; single is preferred */
600 quote = '\'';
601 if (smartquotes) {
602 char *test, *start;
603 start = PyBytes_AS_STRING(op);
604 for (test = start; test < start+length; ++test) {
605 if (*test == '"') {
606 quote = '\''; /* back to single */
607 goto decided;
608 }
609 else if (*test == '\'')
610 quote = '"';
611 }
612 decided:
613 ;
614 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000615
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000616 *p++ = 'b', *p++ = quote;
617 for (i = 0; i < length; i++) {
618 /* There's at least enough room for a hex escape
619 and a closing quote. */
620 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
621 c = op->ob_sval[i];
622 if (c == quote || c == '\\')
623 *p++ = '\\', *p++ = c;
624 else if (c == '\t')
625 *p++ = '\\', *p++ = 't';
626 else if (c == '\n')
627 *p++ = '\\', *p++ = 'n';
628 else if (c == '\r')
629 *p++ = '\\', *p++ = 'r';
630 else if (c < ' ' || c >= 0x7f) {
631 *p++ = '\\';
632 *p++ = 'x';
633 *p++ = hexdigits[(c & 0xf0) >> 4];
634 *p++ = hexdigits[c & 0xf];
635 }
636 else
637 *p++ = c;
638 }
639 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
640 *p++ = quote;
641 *p = '\0';
642 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
643 Py_DECREF(v);
644 return NULL;
645 }
646 return v;
647 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000648}
649
Neal Norwitz6968b052007-02-27 19:02:19 +0000650static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000651bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000652{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000653 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000654}
655
Neal Norwitz6968b052007-02-27 19:02:19 +0000656static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000657bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000658{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000659 if (Py_BytesWarningFlag) {
660 if (PyErr_WarnEx(PyExc_BytesWarning,
661 "str() on a bytes instance", 1))
662 return NULL;
663 }
664 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000665}
666
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000667static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000668bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000669{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000670 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000671}
Neal Norwitz6968b052007-02-27 19:02:19 +0000672
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000673/* This is also used by PyBytes_Concat() */
674static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000675bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000676{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000677 Py_ssize_t size;
678 Py_buffer va, vb;
679 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000680
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000681 va.len = -1;
682 vb.len = -1;
683 if (_getbuffer(a, &va) < 0 ||
684 _getbuffer(b, &vb) < 0) {
685 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
686 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
687 goto done;
688 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000689
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000690 /* Optimize end cases */
691 if (va.len == 0 && PyBytes_CheckExact(b)) {
692 result = b;
693 Py_INCREF(result);
694 goto done;
695 }
696 if (vb.len == 0 && PyBytes_CheckExact(a)) {
697 result = a;
698 Py_INCREF(result);
699 goto done;
700 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000701
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000702 size = va.len + vb.len;
703 if (size < 0) {
704 PyErr_NoMemory();
705 goto done;
706 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000707
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000708 result = PyBytes_FromStringAndSize(NULL, size);
709 if (result != NULL) {
710 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
711 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
712 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000713
714 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000715 if (va.len != -1)
716 PyBuffer_Release(&va);
717 if (vb.len != -1)
718 PyBuffer_Release(&vb);
719 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000720}
Neal Norwitz6968b052007-02-27 19:02:19 +0000721
722static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000723bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000724{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000725 register Py_ssize_t i;
726 register Py_ssize_t j;
727 register Py_ssize_t size;
728 register PyBytesObject *op;
729 size_t nbytes;
730 if (n < 0)
731 n = 0;
732 /* watch out for overflows: the size can overflow int,
733 * and the # of bytes needed can overflow size_t
734 */
735 size = Py_SIZE(a) * n;
736 if (n && size / n != Py_SIZE(a)) {
737 PyErr_SetString(PyExc_OverflowError,
738 "repeated bytes are too long");
739 return NULL;
740 }
741 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
742 Py_INCREF(a);
743 return (PyObject *)a;
744 }
745 nbytes = (size_t)size;
746 if (nbytes + PyBytesObject_SIZE <= nbytes) {
747 PyErr_SetString(PyExc_OverflowError,
748 "repeated bytes are too long");
749 return NULL;
750 }
751 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
752 if (op == NULL)
753 return PyErr_NoMemory();
754 PyObject_INIT_VAR(op, &PyBytes_Type, size);
755 op->ob_shash = -1;
756 op->ob_sval[size] = '\0';
757 if (Py_SIZE(a) == 1 && n > 0) {
758 memset(op->ob_sval, a->ob_sval[0] , n);
759 return (PyObject *) op;
760 }
761 i = 0;
762 if (i < size) {
763 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
764 i = Py_SIZE(a);
765 }
766 while (i < size) {
767 j = (i <= size-i) ? i : size-i;
768 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
769 i += j;
770 }
771 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000772}
773
Guido van Rossum98297ee2007-11-06 21:34:58 +0000774static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000775bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000776{
777 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
778 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000779 Py_buffer varg;
780 int pos;
781 PyErr_Clear();
782 if (_getbuffer(arg, &varg) < 0)
783 return -1;
784 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
785 varg.buf, varg.len, 0);
786 PyBuffer_Release(&varg);
787 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000788 }
789 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000790 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
791 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000792 }
793
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000794 return memchr(PyBytes_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000795}
796
Neal Norwitz6968b052007-02-27 19:02:19 +0000797static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000798bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000799{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000800 if (i < 0 || i >= Py_SIZE(a)) {
801 PyErr_SetString(PyExc_IndexError, "index out of range");
802 return NULL;
803 }
804 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000805}
806
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000807static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000808bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000809{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000810 int c;
811 Py_ssize_t len_a, len_b;
812 Py_ssize_t min_len;
813 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000814
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000815 /* Make sure both arguments are strings. */
816 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
817 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
818 (PyObject_IsInstance((PyObject*)a,
819 (PyObject*)&PyUnicode_Type) ||
820 PyObject_IsInstance((PyObject*)b,
821 (PyObject*)&PyUnicode_Type))) {
822 if (PyErr_WarnEx(PyExc_BytesWarning,
823 "Comparison between bytes and string", 1))
824 return NULL;
825 }
826 result = Py_NotImplemented;
827 goto out;
828 }
829 if (a == b) {
830 switch (op) {
831 case Py_EQ:case Py_LE:case Py_GE:
832 result = Py_True;
833 goto out;
834 case Py_NE:case Py_LT:case Py_GT:
835 result = Py_False;
836 goto out;
837 }
838 }
839 if (op == Py_EQ) {
840 /* Supporting Py_NE here as well does not save
841 much time, since Py_NE is rarely used. */
842 if (Py_SIZE(a) == Py_SIZE(b)
843 && (a->ob_sval[0] == b->ob_sval[0]
844 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
845 result = Py_True;
846 } else {
847 result = Py_False;
848 }
849 goto out;
850 }
851 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
852 min_len = (len_a < len_b) ? len_a : len_b;
853 if (min_len > 0) {
854 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
855 if (c==0)
856 c = memcmp(a->ob_sval, b->ob_sval, min_len);
857 } else
858 c = 0;
859 if (c == 0)
860 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
861 switch (op) {
862 case Py_LT: c = c < 0; break;
863 case Py_LE: c = c <= 0; break;
864 case Py_EQ: assert(0); break; /* unreachable */
865 case Py_NE: c = c != 0; break;
866 case Py_GT: c = c > 0; break;
867 case Py_GE: c = c >= 0; break;
868 default:
869 result = Py_NotImplemented;
870 goto out;
871 }
872 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000873 out:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000874 Py_INCREF(result);
875 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000876}
877
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000878static long
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000879bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000880{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000881 register Py_ssize_t len;
882 register unsigned char *p;
883 register long x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000884
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000885 if (a->ob_shash != -1)
886 return a->ob_shash;
887 len = Py_SIZE(a);
888 p = (unsigned char *) a->ob_sval;
889 x = *p << 7;
890 while (--len >= 0)
891 x = (1000003*x) ^ *p++;
892 x ^= Py_SIZE(a);
893 if (x == -1)
894 x = -2;
895 a->ob_shash = x;
896 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000897}
898
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000899static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000900bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000901{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000902 if (PyIndex_Check(item)) {
903 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
904 if (i == -1 && PyErr_Occurred())
905 return NULL;
906 if (i < 0)
907 i += PyBytes_GET_SIZE(self);
908 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
909 PyErr_SetString(PyExc_IndexError,
910 "index out of range");
911 return NULL;
912 }
913 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
914 }
915 else if (PySlice_Check(item)) {
916 Py_ssize_t start, stop, step, slicelength, cur, i;
917 char* source_buf;
918 char* result_buf;
919 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000920
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000921 if (PySlice_GetIndicesEx((PySliceObject*)item,
922 PyBytes_GET_SIZE(self),
923 &start, &stop, &step, &slicelength) < 0) {
924 return NULL;
925 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000926
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000927 if (slicelength <= 0) {
928 return PyBytes_FromStringAndSize("", 0);
929 }
930 else if (start == 0 && step == 1 &&
931 slicelength == PyBytes_GET_SIZE(self) &&
932 PyBytes_CheckExact(self)) {
933 Py_INCREF(self);
934 return (PyObject *)self;
935 }
936 else if (step == 1) {
937 return PyBytes_FromStringAndSize(
938 PyBytes_AS_STRING(self) + start,
939 slicelength);
940 }
941 else {
942 source_buf = PyBytes_AS_STRING(self);
943 result = PyBytes_FromStringAndSize(NULL, slicelength);
944 if (result == NULL)
945 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000946
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000947 result_buf = PyBytes_AS_STRING(result);
948 for (cur = start, i = 0; i < slicelength;
949 cur += step, i++) {
950 result_buf[i] = source_buf[cur];
951 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000952
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000953 return result;
954 }
955 }
956 else {
957 PyErr_Format(PyExc_TypeError,
958 "byte indices must be integers, not %.200s",
959 Py_TYPE(item)->tp_name);
960 return NULL;
961 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000962}
963
964static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000965bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000966{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000967 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
968 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000969}
970
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000971static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000972 (lenfunc)bytes_length, /*sq_length*/
973 (binaryfunc)bytes_concat, /*sq_concat*/
974 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
975 (ssizeargfunc)bytes_item, /*sq_item*/
976 0, /*sq_slice*/
977 0, /*sq_ass_item*/
978 0, /*sq_ass_slice*/
979 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000980};
981
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000982static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000983 (lenfunc)bytes_length,
984 (binaryfunc)bytes_subscript,
985 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000986};
987
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000988static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000989 (getbufferproc)bytes_buffer_getbuffer,
990 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000991};
992
993
994#define LEFTSTRIP 0
995#define RIGHTSTRIP 1
996#define BOTHSTRIP 2
997
998/* Arrays indexed by above */
999static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1000
1001#define STRIPNAME(i) (stripformat[i]+3)
1002
Neal Norwitz6968b052007-02-27 19:02:19 +00001003PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001004"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001005\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001006Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001007If sep is not specified or is None, B is split on ASCII whitespace\n\
1008characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001009If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001010
1011static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001012bytes_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001013{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001014 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1015 Py_ssize_t maxsplit = -1;
1016 const char *s = PyBytes_AS_STRING(self), *sub;
1017 Py_buffer vsub;
1018 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001019
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1021 return NULL;
1022 if (maxsplit < 0)
1023 maxsplit = PY_SSIZE_T_MAX;
1024 if (subobj == Py_None)
1025 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1026 if (_getbuffer(subobj, &vsub) < 0)
1027 return NULL;
1028 sub = vsub.buf;
1029 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001030
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001031 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1032 PyBuffer_Release(&vsub);
1033 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001034}
1035
Neal Norwitz6968b052007-02-27 19:02:19 +00001036PyDoc_STRVAR(partition__doc__,
1037"B.partition(sep) -> (head, sep, tail)\n\
1038\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001039Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001040the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001041found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001042
1043static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001044bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001045{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001046 const char *sep;
1047 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001048
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001049 if (PyBytes_Check(sep_obj)) {
1050 sep = PyBytes_AS_STRING(sep_obj);
1051 sep_len = PyBytes_GET_SIZE(sep_obj);
1052 }
1053 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1054 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001055
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001056 return stringlib_partition(
1057 (PyObject*) self,
1058 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1059 sep_obj, sep, sep_len
1060 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001061}
1062
1063PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001064"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001065\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001066Search for the separator sep in B, starting at the end of B,\n\
1067and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001068part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001069bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001070
1071static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001072bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001073{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001074 const char *sep;
1075 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001077 if (PyBytes_Check(sep_obj)) {
1078 sep = PyBytes_AS_STRING(sep_obj);
1079 sep_len = PyBytes_GET_SIZE(sep_obj);
1080 }
1081 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1082 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001083
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001084 return stringlib_rpartition(
1085 (PyObject*) self,
1086 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1087 sep_obj, sep, sep_len
1088 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001089}
1090
Neal Norwitz6968b052007-02-27 19:02:19 +00001091PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001092"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001093\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001094Return a list of the sections in B, using sep as the delimiter,\n\
1095starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001096If sep is not given, B is split on ASCII whitespace characters\n\
1097(space, tab, return, newline, formfeed, vertical tab).\n\
1098If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001099
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001100
Neal Norwitz6968b052007-02-27 19:02:19 +00001101static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001102bytes_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001103{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1105 Py_ssize_t maxsplit = -1;
1106 const char *s = PyBytes_AS_STRING(self), *sub;
1107 Py_buffer vsub;
1108 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001109
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001110 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1111 return NULL;
1112 if (maxsplit < 0)
1113 maxsplit = PY_SSIZE_T_MAX;
1114 if (subobj == Py_None)
1115 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1116 if (_getbuffer(subobj, &vsub) < 0)
1117 return NULL;
1118 sub = vsub.buf;
1119 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001120
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001121 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1122 PyBuffer_Release(&vsub);
1123 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001124}
1125
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001126
1127PyDoc_STRVAR(join__doc__,
1128"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001129\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001130Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001131Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1132
Neal Norwitz6968b052007-02-27 19:02:19 +00001133static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001134bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001135{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001136 char *sep = PyBytes_AS_STRING(self);
1137 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1138 PyObject *res = NULL;
1139 char *p;
1140 Py_ssize_t seqlen = 0;
1141 size_t sz = 0;
1142 Py_ssize_t i;
1143 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001145 seq = PySequence_Fast(orig, "");
1146 if (seq == NULL) {
1147 return NULL;
1148 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001149
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001150 seqlen = PySequence_Size(seq);
1151 if (seqlen == 0) {
1152 Py_DECREF(seq);
1153 return PyBytes_FromString("");
1154 }
1155 if (seqlen == 1) {
1156 item = PySequence_Fast_GET_ITEM(seq, 0);
1157 if (PyBytes_CheckExact(item)) {
1158 Py_INCREF(item);
1159 Py_DECREF(seq);
1160 return item;
1161 }
1162 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001163
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001164 /* There are at least two things to join, or else we have a subclass
1165 * of the builtin types in the sequence.
1166 * Do a pre-pass to figure out the total amount of space we'll
1167 * need (sz), and see whether all argument are bytes.
1168 */
1169 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1170 for (i = 0; i < seqlen; i++) {
1171 const size_t old_sz = sz;
1172 item = PySequence_Fast_GET_ITEM(seq, i);
1173 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1174 PyErr_Format(PyExc_TypeError,
1175 "sequence item %zd: expected bytes,"
1176 " %.80s found",
1177 i, Py_TYPE(item)->tp_name);
1178 Py_DECREF(seq);
1179 return NULL;
1180 }
1181 sz += Py_SIZE(item);
1182 if (i != 0)
1183 sz += seplen;
1184 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1185 PyErr_SetString(PyExc_OverflowError,
1186 "join() result is too long for bytes");
1187 Py_DECREF(seq);
1188 return NULL;
1189 }
1190 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001191
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001192 /* Allocate result space. */
1193 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1194 if (res == NULL) {
1195 Py_DECREF(seq);
1196 return NULL;
1197 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001198
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001199 /* Catenate everything. */
1200 /* I'm not worried about a PyByteArray item growing because there's
1201 nowhere in this function where we release the GIL. */
1202 p = PyBytes_AS_STRING(res);
1203 for (i = 0; i < seqlen; ++i) {
1204 size_t n;
1205 char *q;
1206 if (i) {
1207 Py_MEMCPY(p, sep, seplen);
1208 p += seplen;
1209 }
1210 item = PySequence_Fast_GET_ITEM(seq, i);
1211 n = Py_SIZE(item);
1212 if (PyBytes_Check(item))
1213 q = PyBytes_AS_STRING(item);
1214 else
1215 q = PyByteArray_AS_STRING(item);
1216 Py_MEMCPY(p, q, n);
1217 p += n;
1218 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001219
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001220 Py_DECREF(seq);
1221 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001222}
1223
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001224PyObject *
1225_PyBytes_Join(PyObject *sep, PyObject *x)
1226{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001227 assert(sep != NULL && PyBytes_Check(sep));
1228 assert(x != NULL);
1229 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001230}
1231
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001232/* helper macro to fixup start/end slice values */
1233#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001234 if (end > len) \
1235 end = len; \
1236 else if (end < 0) { \
1237 end += len; \
1238 if (end < 0) \
1239 end = 0; \
1240 } \
1241 if (start < 0) { \
1242 start += len; \
1243 if (start < 0) \
1244 start = 0; \
1245 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001246
1247Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001248bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001249{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 PyObject *subobj;
1251 const char *sub;
1252 Py_ssize_t sub_len;
1253 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1254 PyObject *obj_start=Py_None, *obj_end=Py_None;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001255
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001256 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1257 &obj_start, &obj_end))
1258 return -2;
1259 /* To support None in "start" and "end" arguments, meaning
1260 the same as if they were not passed.
1261 */
1262 if (obj_start != Py_None)
1263 if (!_PyEval_SliceIndex(obj_start, &start))
1264 return -2;
1265 if (obj_end != Py_None)
1266 if (!_PyEval_SliceIndex(obj_end, &end))
1267 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001268
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001269 if (PyBytes_Check(subobj)) {
1270 sub = PyBytes_AS_STRING(subobj);
1271 sub_len = PyBytes_GET_SIZE(subobj);
1272 }
1273 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1274 /* XXX - the "expected a character buffer object" is pretty
1275 confusing for a non-expert. remap to something else ? */
1276 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001277
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001278 if (dir > 0)
1279 return stringlib_find_slice(
1280 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1281 sub, sub_len, start, end);
1282 else
1283 return stringlib_rfind_slice(
1284 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1285 sub, sub_len, start, end);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001286}
1287
1288
1289PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001290"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001291\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001292Return the lowest index in B where substring sub is found,\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001293such that sub is contained within s[start:end]. Optional\n\
1294arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001295\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001296Return -1 on failure.");
1297
Neal Norwitz6968b052007-02-27 19:02:19 +00001298static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001299bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001300{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001301 Py_ssize_t result = bytes_find_internal(self, args, +1);
1302 if (result == -2)
1303 return NULL;
1304 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001305}
1306
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001307
1308PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001309"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001310\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001311Like B.find() but raise ValueError when the substring is not found.");
1312
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001313static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001314bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001315{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001316 Py_ssize_t result = bytes_find_internal(self, args, +1);
1317 if (result == -2)
1318 return NULL;
1319 if (result == -1) {
1320 PyErr_SetString(PyExc_ValueError,
1321 "substring not found");
1322 return NULL;
1323 }
1324 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001325}
1326
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001327
1328PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001329"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001330\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001331Return the highest index in B where substring sub is found,\n\
1332such that sub is contained within s[start:end]. Optional\n\
1333arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001334\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001335Return -1 on failure.");
1336
Neal Norwitz6968b052007-02-27 19:02:19 +00001337static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001338bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001339{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001340 Py_ssize_t result = bytes_find_internal(self, args, -1);
1341 if (result == -2)
1342 return NULL;
1343 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001344}
1345
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001346
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001347PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001348"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001349\n\
1350Like B.rfind() but raise ValueError when the substring is not found.");
1351
1352static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001353bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001354{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001355 Py_ssize_t result = bytes_find_internal(self, args, -1);
1356 if (result == -2)
1357 return NULL;
1358 if (result == -1) {
1359 PyErr_SetString(PyExc_ValueError,
1360 "substring not found");
1361 return NULL;
1362 }
1363 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001364}
1365
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001366
1367Py_LOCAL_INLINE(PyObject *)
1368do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001369{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 Py_buffer vsep;
1371 char *s = PyBytes_AS_STRING(self);
1372 Py_ssize_t len = PyBytes_GET_SIZE(self);
1373 char *sep;
1374 Py_ssize_t seplen;
1375 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001376
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001377 if (_getbuffer(sepobj, &vsep) < 0)
1378 return NULL;
1379 sep = vsep.buf;
1380 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001381
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001382 i = 0;
1383 if (striptype != RIGHTSTRIP) {
1384 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1385 i++;
1386 }
1387 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001388
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001389 j = len;
1390 if (striptype != LEFTSTRIP) {
1391 do {
1392 j--;
1393 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1394 j++;
1395 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001396
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001397 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001398
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001399 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1400 Py_INCREF(self);
1401 return (PyObject*)self;
1402 }
1403 else
1404 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001405}
1406
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001407
1408Py_LOCAL_INLINE(PyObject *)
1409do_strip(PyBytesObject *self, int striptype)
1410{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001411 char *s = PyBytes_AS_STRING(self);
1412 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001413
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001414 i = 0;
1415 if (striptype != RIGHTSTRIP) {
1416 while (i < len && ISSPACE(s[i])) {
1417 i++;
1418 }
1419 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001420
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001421 j = len;
1422 if (striptype != LEFTSTRIP) {
1423 do {
1424 j--;
1425 } while (j >= i && ISSPACE(s[j]));
1426 j++;
1427 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001428
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001429 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1430 Py_INCREF(self);
1431 return (PyObject*)self;
1432 }
1433 else
1434 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001435}
1436
1437
1438Py_LOCAL_INLINE(PyObject *)
1439do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1440{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001441 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001442
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001443 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1444 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001445
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001446 if (sep != NULL && sep != Py_None) {
1447 return do_xstrip(self, striptype, sep);
1448 }
1449 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001450}
1451
1452
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001453PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001454"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001455\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001456Strip leading and trailing bytes contained in the argument.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001457If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001458static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001459bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001460{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001461 if (PyTuple_GET_SIZE(args) == 0)
1462 return do_strip(self, BOTHSTRIP); /* Common case */
1463 else
1464 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001465}
1466
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001467
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001468PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001469"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001470\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001471Strip leading bytes contained in the argument.\n\
1472If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001473static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001474bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001475{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001476 if (PyTuple_GET_SIZE(args) == 0)
1477 return do_strip(self, LEFTSTRIP); /* Common case */
1478 else
1479 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001480}
1481
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001482
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001483PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001484"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001485\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001486Strip trailing bytes contained in the argument.\n\
1487If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001488static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001489bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001490{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001491 if (PyTuple_GET_SIZE(args) == 0)
1492 return do_strip(self, RIGHTSTRIP); /* Common case */
1493 else
1494 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001495}
Neal Norwitz6968b052007-02-27 19:02:19 +00001496
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001497
1498PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001499"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001500\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001501Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001502string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001503as in slice notation.");
1504
1505static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001506bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001507{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001508 PyObject *sub_obj;
1509 const char *str = PyBytes_AS_STRING(self), *sub;
1510 Py_ssize_t sub_len;
1511 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001512
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001513 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1514 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1515 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001516
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001517 if (PyBytes_Check(sub_obj)) {
1518 sub = PyBytes_AS_STRING(sub_obj);
1519 sub_len = PyBytes_GET_SIZE(sub_obj);
1520 }
1521 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1522 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001523
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001524 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001525
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001526 return PyLong_FromSsize_t(
1527 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1528 );
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001529}
1530
1531
1532PyDoc_STRVAR(translate__doc__,
1533"B.translate(table[, deletechars]) -> bytes\n\
1534\n\
1535Return a copy of B, where all characters occurring in the\n\
1536optional argument deletechars are removed, and the remaining\n\
1537characters have been mapped through the given translation\n\
1538table, which must be a bytes object of length 256.");
1539
1540static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001541bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001542{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001543 register char *input, *output;
1544 const char *table;
1545 register Py_ssize_t i, c, changed = 0;
1546 PyObject *input_obj = (PyObject*)self;
1547 const char *output_start, *del_table=NULL;
1548 Py_ssize_t inlen, tablen, dellen = 0;
1549 PyObject *result;
1550 int trans_table[256];
1551 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001552
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001553 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1554 &tableobj, &delobj))
1555 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001556
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001557 if (PyBytes_Check(tableobj)) {
1558 table = PyBytes_AS_STRING(tableobj);
1559 tablen = PyBytes_GET_SIZE(tableobj);
1560 }
1561 else if (tableobj == Py_None) {
1562 table = NULL;
1563 tablen = 256;
1564 }
1565 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1566 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001567
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001568 if (tablen != 256) {
1569 PyErr_SetString(PyExc_ValueError,
1570 "translation table must be 256 characters long");
1571 return NULL;
1572 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001573
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001574 if (delobj != NULL) {
1575 if (PyBytes_Check(delobj)) {
1576 del_table = PyBytes_AS_STRING(delobj);
1577 dellen = PyBytes_GET_SIZE(delobj);
1578 }
1579 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1580 return NULL;
1581 }
1582 else {
1583 del_table = NULL;
1584 dellen = 0;
1585 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001586
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001587 inlen = PyBytes_GET_SIZE(input_obj);
1588 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1589 if (result == NULL)
1590 return NULL;
1591 output_start = output = PyBytes_AsString(result);
1592 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001593
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001594 if (dellen == 0 && table != NULL) {
1595 /* If no deletions are required, use faster code */
1596 for (i = inlen; --i >= 0; ) {
1597 c = Py_CHARMASK(*input++);
1598 if (Py_CHARMASK((*output++ = table[c])) != c)
1599 changed = 1;
1600 }
1601 if (changed || !PyBytes_CheckExact(input_obj))
1602 return result;
1603 Py_DECREF(result);
1604 Py_INCREF(input_obj);
1605 return input_obj;
1606 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001607
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001608 if (table == NULL) {
1609 for (i = 0; i < 256; i++)
1610 trans_table[i] = Py_CHARMASK(i);
1611 } else {
1612 for (i = 0; i < 256; i++)
1613 trans_table[i] = Py_CHARMASK(table[i]);
1614 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001615
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001616 for (i = 0; i < dellen; i++)
1617 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001618
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001619 for (i = inlen; --i >= 0; ) {
1620 c = Py_CHARMASK(*input++);
1621 if (trans_table[c] != -1)
1622 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1623 continue;
1624 changed = 1;
1625 }
1626 if (!changed && PyBytes_CheckExact(input_obj)) {
1627 Py_DECREF(result);
1628 Py_INCREF(input_obj);
1629 return input_obj;
1630 }
1631 /* Fix the size of the resulting string */
1632 if (inlen > 0)
1633 _PyBytes_Resize(&result, output - output_start);
1634 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001635}
1636
1637
Georg Brandlabc38772009-04-12 15:51:51 +00001638static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001639bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001640{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001641 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001642}
1643
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001644/* find and count characters and substrings */
1645
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001646#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001647 ((char *)memchr((const void *)(target), c, target_len))
1648
1649/* String ops must return a string. */
1650/* If the object is subclass of string, create a copy */
1651Py_LOCAL(PyBytesObject *)
1652return_self(PyBytesObject *self)
1653{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001654 if (PyBytes_CheckExact(self)) {
1655 Py_INCREF(self);
1656 return self;
1657 }
1658 return (PyBytesObject *)PyBytes_FromStringAndSize(
1659 PyBytes_AS_STRING(self),
1660 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001661}
1662
1663Py_LOCAL_INLINE(Py_ssize_t)
1664countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1665{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001666 Py_ssize_t count=0;
1667 const char *start=target;
1668 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001669
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001670 while ( (start=findchar(start, end-start, c)) != NULL ) {
1671 count++;
1672 if (count >= maxcount)
1673 break;
1674 start += 1;
1675 }
1676 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001677}
1678
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001679
1680/* Algorithms for different cases of string replacement */
1681
1682/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1683Py_LOCAL(PyBytesObject *)
1684replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001685 const char *to_s, Py_ssize_t to_len,
1686 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001687{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001688 char *self_s, *result_s;
1689 Py_ssize_t self_len, result_len;
1690 Py_ssize_t count, i, product;
1691 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001692
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001693 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001694
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001695 /* 1 at the end plus 1 after every character */
1696 count = self_len+1;
1697 if (maxcount < count)
1698 count = maxcount;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001699
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001700 /* Check for overflow */
1701 /* result_len = count * to_len + self_len; */
1702 product = count * to_len;
1703 if (product / to_len != count) {
1704 PyErr_SetString(PyExc_OverflowError,
1705 "replacement bytes are too long");
1706 return NULL;
1707 }
1708 result_len = product + self_len;
1709 if (result_len < 0) {
1710 PyErr_SetString(PyExc_OverflowError,
1711 "replacement bytes are too long");
1712 return NULL;
1713 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001714
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001715 if (! (result = (PyBytesObject *)
1716 PyBytes_FromStringAndSize(NULL, result_len)) )
1717 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001718
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001719 self_s = PyBytes_AS_STRING(self);
1720 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001721
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001722 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001723
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001724 /* Lay the first one down (guaranteed this will occur) */
1725 Py_MEMCPY(result_s, to_s, to_len);
1726 result_s += to_len;
1727 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001728
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001729 for (i=0; i<count; i++) {
1730 *result_s++ = *self_s++;
1731 Py_MEMCPY(result_s, to_s, to_len);
1732 result_s += to_len;
1733 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001734
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001735 /* Copy the rest of the original string */
1736 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001737
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001738 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001739}
1740
1741/* Special case for deleting a single character */
1742/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1743Py_LOCAL(PyBytesObject *)
1744replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001745 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001746{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001747 char *self_s, *result_s;
1748 char *start, *next, *end;
1749 Py_ssize_t self_len, result_len;
1750 Py_ssize_t count;
1751 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001752
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001753 self_len = PyBytes_GET_SIZE(self);
1754 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001755
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001756 count = countchar(self_s, self_len, from_c, maxcount);
1757 if (count == 0) {
1758 return return_self(self);
1759 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001760
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001761 result_len = self_len - count; /* from_len == 1 */
1762 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001763
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001764 if ( (result = (PyBytesObject *)
1765 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1766 return NULL;
1767 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001768
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001769 start = self_s;
1770 end = self_s + self_len;
1771 while (count-- > 0) {
1772 next = findchar(start, end-start, from_c);
1773 if (next == NULL)
1774 break;
1775 Py_MEMCPY(result_s, start, next-start);
1776 result_s += (next-start);
1777 start = next+1;
1778 }
1779 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001780
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001781 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001782}
1783
1784/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1785
1786Py_LOCAL(PyBytesObject *)
1787replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001788 const char *from_s, Py_ssize_t from_len,
1789 Py_ssize_t maxcount) {
1790 char *self_s, *result_s;
1791 char *start, *next, *end;
1792 Py_ssize_t self_len, result_len;
1793 Py_ssize_t count, offset;
1794 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001795
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001796 self_len = PyBytes_GET_SIZE(self);
1797 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001798
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001799 count = stringlib_count(self_s, self_len,
1800 from_s, from_len,
1801 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001802
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001803 if (count == 0) {
1804 /* no matches */
1805 return return_self(self);
1806 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001807
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001808 result_len = self_len - (count * from_len);
1809 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001810
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001811 if ( (result = (PyBytesObject *)
1812 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1813 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001814
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001815 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001816
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001817 start = self_s;
1818 end = self_s + self_len;
1819 while (count-- > 0) {
1820 offset = stringlib_find(start, end-start,
1821 from_s, from_len,
1822 0);
1823 if (offset == -1)
1824 break;
1825 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001826
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001827 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001828
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001829 result_s += (next-start);
1830 start = next+from_len;
1831 }
1832 Py_MEMCPY(result_s, start, end-start);
1833 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001834}
1835
1836/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1837Py_LOCAL(PyBytesObject *)
1838replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001839 char from_c, char to_c,
1840 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001841{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001842 char *self_s, *result_s, *start, *end, *next;
1843 Py_ssize_t self_len;
1844 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001845
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001846 /* The result string will be the same size */
1847 self_s = PyBytes_AS_STRING(self);
1848 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001849
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001850 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001851
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001852 if (next == NULL) {
1853 /* No matches; return the original string */
1854 return return_self(self);
1855 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001856
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001857 /* Need to make a new string */
1858 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1859 if (result == NULL)
1860 return NULL;
1861 result_s = PyBytes_AS_STRING(result);
1862 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001863
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001864 /* change everything in-place, starting with this one */
1865 start = result_s + (next-self_s);
1866 *start = to_c;
1867 start++;
1868 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001869
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001870 while (--maxcount > 0) {
1871 next = findchar(start, end-start, from_c);
1872 if (next == NULL)
1873 break;
1874 *next = to_c;
1875 start = next+1;
1876 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001877
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001878 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001879}
1880
1881/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1882Py_LOCAL(PyBytesObject *)
1883replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001884 const char *from_s, Py_ssize_t from_len,
1885 const char *to_s, Py_ssize_t to_len,
1886 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001887{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001888 char *result_s, *start, *end;
1889 char *self_s;
1890 Py_ssize_t self_len, offset;
1891 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001892
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001893 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001894
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001895 self_s = PyBytes_AS_STRING(self);
1896 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001897
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001898 offset = stringlib_find(self_s, self_len,
1899 from_s, from_len,
1900 0);
1901 if (offset == -1) {
1902 /* No matches; return the original string */
1903 return return_self(self);
1904 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001905
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001906 /* Need to make a new string */
1907 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1908 if (result == NULL)
1909 return NULL;
1910 result_s = PyBytes_AS_STRING(result);
1911 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001912
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001913 /* change everything in-place, starting with this one */
1914 start = result_s + offset;
1915 Py_MEMCPY(start, to_s, from_len);
1916 start += from_len;
1917 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001918
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001919 while ( --maxcount > 0) {
1920 offset = stringlib_find(start, end-start,
1921 from_s, from_len,
1922 0);
1923 if (offset==-1)
1924 break;
1925 Py_MEMCPY(start+offset, to_s, from_len);
1926 start += offset+from_len;
1927 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001928
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001929 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001930}
1931
1932/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1933Py_LOCAL(PyBytesObject *)
1934replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001935 char from_c,
1936 const char *to_s, Py_ssize_t to_len,
1937 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001938{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001939 char *self_s, *result_s;
1940 char *start, *next, *end;
1941 Py_ssize_t self_len, result_len;
1942 Py_ssize_t count, product;
1943 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001944
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001945 self_s = PyBytes_AS_STRING(self);
1946 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001947
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001948 count = countchar(self_s, self_len, from_c, maxcount);
1949 if (count == 0) {
1950 /* no matches, return unchanged */
1951 return return_self(self);
1952 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001953
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001954 /* use the difference between current and new, hence the "-1" */
1955 /* result_len = self_len + count * (to_len-1) */
1956 product = count * (to_len-1);
1957 if (product / (to_len-1) != count) {
1958 PyErr_SetString(PyExc_OverflowError,
1959 "replacement bytes are too long");
1960 return NULL;
1961 }
1962 result_len = self_len + product;
1963 if (result_len < 0) {
1964 PyErr_SetString(PyExc_OverflowError,
1965 "replacment bytes are too long");
1966 return NULL;
1967 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001968
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001969 if ( (result = (PyBytesObject *)
1970 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1971 return NULL;
1972 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001973
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001974 start = self_s;
1975 end = self_s + self_len;
1976 while (count-- > 0) {
1977 next = findchar(start, end-start, from_c);
1978 if (next == NULL)
1979 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001980
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001981 if (next == start) {
1982 /* replace with the 'to' */
1983 Py_MEMCPY(result_s, to_s, to_len);
1984 result_s += to_len;
1985 start += 1;
1986 } else {
1987 /* copy the unchanged old then the 'to' */
1988 Py_MEMCPY(result_s, start, next-start);
1989 result_s += (next-start);
1990 Py_MEMCPY(result_s, to_s, to_len);
1991 result_s += to_len;
1992 start = next+1;
1993 }
1994 }
1995 /* Copy the remainder of the remaining string */
1996 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001997
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001998 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001999}
2000
2001/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2002Py_LOCAL(PyBytesObject *)
2003replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002004 const char *from_s, Py_ssize_t from_len,
2005 const char *to_s, Py_ssize_t to_len,
2006 Py_ssize_t maxcount) {
2007 char *self_s, *result_s;
2008 char *start, *next, *end;
2009 Py_ssize_t self_len, result_len;
2010 Py_ssize_t count, offset, product;
2011 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002012
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002013 self_s = PyBytes_AS_STRING(self);
2014 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002015
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002016 count = stringlib_count(self_s, self_len,
2017 from_s, from_len,
2018 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002019
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002020 if (count == 0) {
2021 /* no matches, return unchanged */
2022 return return_self(self);
2023 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002024
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002025 /* Check for overflow */
2026 /* result_len = self_len + count * (to_len-from_len) */
2027 product = count * (to_len-from_len);
2028 if (product / (to_len-from_len) != count) {
2029 PyErr_SetString(PyExc_OverflowError,
2030 "replacement bytes are too long");
2031 return NULL;
2032 }
2033 result_len = self_len + product;
2034 if (result_len < 0) {
2035 PyErr_SetString(PyExc_OverflowError,
2036 "replacement bytes are too long");
2037 return NULL;
2038 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002039
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002040 if ( (result = (PyBytesObject *)
2041 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2042 return NULL;
2043 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002044
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002045 start = self_s;
2046 end = self_s + self_len;
2047 while (count-- > 0) {
2048 offset = stringlib_find(start, end-start,
2049 from_s, from_len,
2050 0);
2051 if (offset == -1)
2052 break;
2053 next = start+offset;
2054 if (next == start) {
2055 /* replace with the 'to' */
2056 Py_MEMCPY(result_s, to_s, to_len);
2057 result_s += to_len;
2058 start += from_len;
2059 } else {
2060 /* copy the unchanged old then the 'to' */
2061 Py_MEMCPY(result_s, start, next-start);
2062 result_s += (next-start);
2063 Py_MEMCPY(result_s, to_s, to_len);
2064 result_s += to_len;
2065 start = next+from_len;
2066 }
2067 }
2068 /* Copy the remainder of the remaining string */
2069 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002070
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002071 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002072}
2073
2074
2075Py_LOCAL(PyBytesObject *)
2076replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002077 const char *from_s, Py_ssize_t from_len,
2078 const char *to_s, Py_ssize_t to_len,
2079 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002080{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002081 if (maxcount < 0) {
2082 maxcount = PY_SSIZE_T_MAX;
2083 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2084 /* nothing to do; return the original string */
2085 return return_self(self);
2086 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002087
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002088 if (maxcount == 0 ||
2089 (from_len == 0 && to_len == 0)) {
2090 /* nothing to do; return the original string */
2091 return return_self(self);
2092 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002093
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002094 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002095
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002096 if (from_len == 0) {
2097 /* insert the 'to' string everywhere. */
2098 /* >>> "Python".replace("", ".") */
2099 /* '.P.y.t.h.o.n.' */
2100 return replace_interleave(self, to_s, to_len, maxcount);
2101 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002102
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002103 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2104 /* point for an empty self string to generate a non-empty string */
2105 /* Special case so the remaining code always gets a non-empty string */
2106 if (PyBytes_GET_SIZE(self) == 0) {
2107 return return_self(self);
2108 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002109
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002110 if (to_len == 0) {
2111 /* delete all occurrences of 'from' string */
2112 if (from_len == 1) {
2113 return replace_delete_single_character(
2114 self, from_s[0], maxcount);
2115 } else {
2116 return replace_delete_substring(self, from_s,
2117 from_len, maxcount);
2118 }
2119 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002120
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002121 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002122
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002123 if (from_len == to_len) {
2124 if (from_len == 1) {
2125 return replace_single_character_in_place(
2126 self,
2127 from_s[0],
2128 to_s[0],
2129 maxcount);
2130 } else {
2131 return replace_substring_in_place(
2132 self, from_s, from_len, to_s, to_len,
2133 maxcount);
2134 }
2135 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002136
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002137 /* Otherwise use the more generic algorithms */
2138 if (from_len == 1) {
2139 return replace_single_character(self, from_s[0],
2140 to_s, to_len, maxcount);
2141 } else {
2142 /* len('from')>=2, len('to')>=1 */
2143 return replace_substring(self, from_s, from_len, to_s, to_len,
2144 maxcount);
2145 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002146}
2147
2148PyDoc_STRVAR(replace__doc__,
2149"B.replace(old, new[, count]) -> bytes\n\
2150\n\
2151Return a copy of B with all occurrences of subsection\n\
2152old replaced by new. If the optional argument count is\n\
Senthil Kumaran77210b42010-08-09 08:56:25 +00002153positive, only the first count occurrences are replaced. A\n\
Senthil Kumaranf2de1ff2010-08-09 09:03:57 +00002154negative value of count replaces all occurrences");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002155
2156static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002157bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002158{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002159 Py_ssize_t count = -1;
2160 PyObject *from, *to;
2161 const char *from_s, *to_s;
2162 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002163
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002164 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2165 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002166
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002167 if (PyBytes_Check(from)) {
2168 from_s = PyBytes_AS_STRING(from);
2169 from_len = PyBytes_GET_SIZE(from);
2170 }
2171 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2172 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002173
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002174 if (PyBytes_Check(to)) {
2175 to_s = PyBytes_AS_STRING(to);
2176 to_len = PyBytes_GET_SIZE(to);
2177 }
2178 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2179 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002180
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002181 return (PyObject *)replace((PyBytesObject *) self,
2182 from_s, from_len,
2183 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002184}
2185
2186/** End DALKE **/
2187
2188/* Matches the end (direction >= 0) or start (direction < 0) of self
2189 * against substr, using the start and end arguments. Returns
2190 * -1 on error, 0 if not found and 1 if found.
2191 */
2192Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002193_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002194 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002195{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002196 Py_ssize_t len = PyBytes_GET_SIZE(self);
2197 Py_ssize_t slen;
2198 const char* sub;
2199 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002200
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002201 if (PyBytes_Check(substr)) {
2202 sub = PyBytes_AS_STRING(substr);
2203 slen = PyBytes_GET_SIZE(substr);
2204 }
2205 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2206 return -1;
2207 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002208
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002209 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002210
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002211 if (direction < 0) {
2212 /* startswith */
2213 if (start+slen > len)
2214 return 0;
2215 } else {
2216 /* endswith */
2217 if (end-start < slen || start > len)
2218 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002219
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002220 if (end-slen > start)
2221 start = end - slen;
2222 }
2223 if (end-start >= slen)
2224 return ! memcmp(str+start, sub, slen);
2225 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002226}
2227
2228
2229PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002230"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002231\n\
2232Return True if B starts with the specified prefix, False otherwise.\n\
2233With optional start, test B beginning at that position.\n\
2234With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002235prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002236
2237static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002238bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002239{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002240 Py_ssize_t start = 0;
2241 Py_ssize_t end = PY_SSIZE_T_MAX;
2242 PyObject *subobj;
2243 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002244
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002245 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2246 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2247 return NULL;
2248 if (PyTuple_Check(subobj)) {
2249 Py_ssize_t i;
2250 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2251 result = _bytes_tailmatch(self,
2252 PyTuple_GET_ITEM(subobj, i),
2253 start, end, -1);
2254 if (result == -1)
2255 return NULL;
2256 else if (result) {
2257 Py_RETURN_TRUE;
2258 }
2259 }
2260 Py_RETURN_FALSE;
2261 }
2262 result = _bytes_tailmatch(self, subobj, start, end, -1);
2263 if (result == -1)
2264 return NULL;
2265 else
2266 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002267}
2268
2269
2270PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002271"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002272\n\
2273Return True if B ends with the specified suffix, False otherwise.\n\
2274With optional start, test B beginning at that position.\n\
2275With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002276suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002277
2278static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002279bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002280{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002281 Py_ssize_t start = 0;
2282 Py_ssize_t end = PY_SSIZE_T_MAX;
2283 PyObject *subobj;
2284 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002285
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002286 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2287 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2288 return NULL;
2289 if (PyTuple_Check(subobj)) {
2290 Py_ssize_t i;
2291 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2292 result = _bytes_tailmatch(self,
2293 PyTuple_GET_ITEM(subobj, i),
2294 start, end, +1);
2295 if (result == -1)
2296 return NULL;
2297 else if (result) {
2298 Py_RETURN_TRUE;
2299 }
2300 }
2301 Py_RETURN_FALSE;
2302 }
2303 result = _bytes_tailmatch(self, subobj, start, end, +1);
2304 if (result == -1)
2305 return NULL;
2306 else
2307 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002308}
2309
2310
2311PyDoc_STRVAR(decode__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002312"B.decode([encoding[, errors]]) -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002313\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002314Decode B using the codec registered for encoding. encoding defaults\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002315to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002316handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2317a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002318as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002319able to handle UnicodeDecodeErrors.");
2320
2321static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002322bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002323{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002324 const char *encoding = NULL;
2325 const char *errors = NULL;
2326 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002327
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002328 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2329 return NULL;
2330 if (encoding == NULL)
2331 encoding = PyUnicode_GetDefaultEncoding();
2332 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002333}
2334
Guido van Rossum20188312006-05-05 15:15:40 +00002335
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002336PyDoc_STRVAR(splitlines__doc__,
2337"B.splitlines([keepends]) -> list of lines\n\
2338\n\
2339Return a list of the lines in B, breaking at line boundaries.\n\
2340Line breaks are not included in the resulting list unless keepends\n\
2341is given and true.");
2342
2343static PyObject*
2344bytes_splitlines(PyObject *self, PyObject *args)
2345{
2346 int keepends = 0;
2347
2348 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002349 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002350
2351 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002352 (PyObject*) self, PyBytes_AS_STRING(self),
2353 PyBytes_GET_SIZE(self), keepends
2354 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002355}
2356
2357
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002358PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002359"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002360\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002361Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002362Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002363Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002364
2365static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002366hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002367{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002368 if (c >= 128)
2369 return -1;
2370 if (ISDIGIT(c))
2371 return c - '0';
2372 else {
2373 if (ISUPPER(c))
2374 c = TOLOWER(c);
2375 if (c >= 'a' && c <= 'f')
2376 return c - 'a' + 10;
2377 }
2378 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002379}
2380
2381static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002382bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002383{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002384 PyObject *newstring, *hexobj;
2385 char *buf;
2386 Py_UNICODE *hex;
2387 Py_ssize_t hexlen, byteslen, i, j;
2388 int top, bot;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002389
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002390 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2391 return NULL;
2392 assert(PyUnicode_Check(hexobj));
2393 hexlen = PyUnicode_GET_SIZE(hexobj);
2394 hex = PyUnicode_AS_UNICODE(hexobj);
2395 byteslen = hexlen/2; /* This overestimates if there are spaces */
2396 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2397 if (!newstring)
2398 return NULL;
2399 buf = PyBytes_AS_STRING(newstring);
2400 for (i = j = 0; i < hexlen; i += 2) {
2401 /* skip over spaces in the input */
2402 while (hex[i] == ' ')
2403 i++;
2404 if (i >= hexlen)
2405 break;
2406 top = hex_digit_to_int(hex[i]);
2407 bot = hex_digit_to_int(hex[i+1]);
2408 if (top == -1 || bot == -1) {
2409 PyErr_Format(PyExc_ValueError,
2410 "non-hexadecimal number found in "
2411 "fromhex() arg at position %zd", i);
2412 goto error;
2413 }
2414 buf[j++] = (top << 4) + bot;
2415 }
2416 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2417 goto error;
2418 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002419
2420 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002421 Py_XDECREF(newstring);
2422 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002423}
2424
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002425PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002426"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002427
2428static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002429bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002430{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002431 Py_ssize_t res;
2432 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2433 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002434}
2435
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002436
2437static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002438bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002439{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002440 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002441}
2442
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002443
2444static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002445bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002446 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2447 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2448 _Py_capitalize__doc__},
2449 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2450 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2451 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2452 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2453 endswith__doc__},
2454 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2455 expandtabs__doc__},
2456 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2457 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2458 fromhex_doc},
2459 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2460 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2461 _Py_isalnum__doc__},
2462 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2463 _Py_isalpha__doc__},
2464 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2465 _Py_isdigit__doc__},
2466 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2467 _Py_islower__doc__},
2468 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2469 _Py_isspace__doc__},
2470 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2471 _Py_istitle__doc__},
2472 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2473 _Py_isupper__doc__},
2474 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2475 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2476 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2477 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2478 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2479 _Py_maketrans__doc__},
2480 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2481 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2482 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2483 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2484 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2485 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2486 rpartition__doc__},
2487 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2488 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
2489 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2490 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS,
2491 splitlines__doc__},
2492 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2493 startswith__doc__},
2494 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2495 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2496 _Py_swapcase__doc__},
2497 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2498 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2499 translate__doc__},
2500 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2501 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2502 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2503 sizeof__doc__},
2504 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002505};
2506
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002507static PyObject *
2508str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2509
2510static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002511bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002512{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002513 PyObject *x = NULL;
2514 const char *encoding = NULL;
2515 const char *errors = NULL;
2516 PyObject *new = NULL;
2517 Py_ssize_t size;
2518 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002519
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002520 if (type != &PyBytes_Type)
2521 return str_subtype_new(type, args, kwds);
2522 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2523 &encoding, &errors))
2524 return NULL;
2525 if (x == NULL) {
2526 if (encoding != NULL || errors != NULL) {
2527 PyErr_SetString(PyExc_TypeError,
2528 "encoding or errors without sequence "
2529 "argument");
2530 return NULL;
2531 }
2532 return PyBytes_FromString("");
2533 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002534
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002535 if (PyUnicode_Check(x)) {
2536 /* Encode via the codec registry */
2537 if (encoding == NULL) {
2538 PyErr_SetString(PyExc_TypeError,
2539 "string argument without an encoding");
2540 return NULL;
2541 }
2542 new = PyUnicode_AsEncodedString(x, encoding, errors);
2543 if (new == NULL)
2544 return NULL;
2545 assert(PyBytes_Check(new));
2546 return new;
2547 }
2548 /* Is it an integer? */
2549 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2550 if (size == -1 && PyErr_Occurred()) {
2551 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2552 return NULL;
2553 PyErr_Clear();
2554 }
2555 else if (size < 0) {
2556 PyErr_SetString(PyExc_ValueError, "negative count");
2557 return NULL;
2558 }
2559 else {
2560 new = PyBytes_FromStringAndSize(NULL, size);
2561 if (new == NULL) {
2562 return NULL;
2563 }
2564 if (size > 0) {
2565 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2566 }
2567 return new;
2568 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002569
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002570 /* If it's not unicode, there can't be encoding or errors */
2571 if (encoding != NULL || errors != NULL) {
2572 PyErr_SetString(PyExc_TypeError,
2573 "encoding or errors without a string argument");
2574 return NULL;
2575 }
2576 return PyObject_Bytes(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002577}
2578
2579PyObject *
2580PyBytes_FromObject(PyObject *x)
2581{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002582 PyObject *new, *it;
2583 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002584
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002585 if (x == NULL) {
2586 PyErr_BadInternalCall();
2587 return NULL;
2588 }
2589 /* Use the modern buffer interface */
2590 if (PyObject_CheckBuffer(x)) {
2591 Py_buffer view;
2592 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2593 return NULL;
2594 new = PyBytes_FromStringAndSize(NULL, view.len);
2595 if (!new)
2596 goto fail;
2597 /* XXX(brett.cannon): Better way to get to internal buffer? */
2598 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2599 &view, view.len, 'C') < 0)
2600 goto fail;
2601 PyBuffer_Release(&view);
2602 return new;
2603 fail:
2604 Py_XDECREF(new);
2605 PyBuffer_Release(&view);
2606 return NULL;
2607 }
2608 if (PyUnicode_Check(x)) {
2609 PyErr_SetString(PyExc_TypeError,
2610 "cannot convert unicode object to bytes");
2611 return NULL;
2612 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002613
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002614 if (PyList_CheckExact(x)) {
2615 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2616 if (new == NULL)
2617 return NULL;
2618 for (i = 0; i < Py_SIZE(x); i++) {
2619 Py_ssize_t value = PyNumber_AsSsize_t(
2620 PyList_GET_ITEM(x, i), PyExc_ValueError);
2621 if (value == -1 && PyErr_Occurred()) {
2622 Py_DECREF(new);
2623 return NULL;
2624 }
2625 if (value < 0 || value >= 256) {
2626 PyErr_SetString(PyExc_ValueError,
2627 "bytes must be in range(0, 256)");
2628 Py_DECREF(new);
2629 return NULL;
2630 }
2631 ((PyBytesObject *)new)->ob_sval[i] = value;
2632 }
2633 return new;
2634 }
2635 if (PyTuple_CheckExact(x)) {
2636 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2637 if (new == NULL)
2638 return NULL;
2639 for (i = 0; i < Py_SIZE(x); i++) {
2640 Py_ssize_t value = PyNumber_AsSsize_t(
2641 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2642 if (value == -1 && PyErr_Occurred()) {
2643 Py_DECREF(new);
2644 return NULL;
2645 }
2646 if (value < 0 || value >= 256) {
2647 PyErr_SetString(PyExc_ValueError,
2648 "bytes must be in range(0, 256)");
2649 Py_DECREF(new);
2650 return NULL;
2651 }
2652 ((PyBytesObject *)new)->ob_sval[i] = value;
2653 }
2654 return new;
2655 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002656
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002657 /* For iterator version, create a string object and resize as needed */
2658 size = _PyObject_LengthHint(x, 64);
2659 if (size == -1 && PyErr_Occurred())
2660 return NULL;
2661 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2662 returning a shared empty bytes string. This required because we
2663 want to call _PyBytes_Resize() the returned object, which we can
2664 only do on bytes objects with refcount == 1. */
2665 size += 1;
2666 new = PyBytes_FromStringAndSize(NULL, size);
2667 if (new == NULL)
2668 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002669
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002670 /* Get the iterator */
2671 it = PyObject_GetIter(x);
2672 if (it == NULL)
2673 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002674
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002675 /* Run the iterator to exhaustion */
2676 for (i = 0; ; i++) {
2677 PyObject *item;
2678 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002679
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002680 /* Get the next item */
2681 item = PyIter_Next(it);
2682 if (item == NULL) {
2683 if (PyErr_Occurred())
2684 goto error;
2685 break;
2686 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002687
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002688 /* Interpret it as an int (__index__) */
2689 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2690 Py_DECREF(item);
2691 if (value == -1 && PyErr_Occurred())
2692 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002693
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002694 /* Range check */
2695 if (value < 0 || value >= 256) {
2696 PyErr_SetString(PyExc_ValueError,
2697 "bytes must be in range(0, 256)");
2698 goto error;
2699 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002700
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002701 /* Append the byte */
2702 if (i >= size) {
2703 size = 2 * size + 1;
2704 if (_PyBytes_Resize(&new, size) < 0)
2705 goto error;
2706 }
2707 ((PyBytesObject *)new)->ob_sval[i] = value;
2708 }
2709 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002710
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002711 /* Clean up and return success */
2712 Py_DECREF(it);
2713 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002714
2715 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002716 /* Error handling when new != NULL */
2717 Py_XDECREF(it);
2718 Py_DECREF(new);
2719 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002720}
2721
2722static PyObject *
2723str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2724{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002725 PyObject *tmp, *pnew;
2726 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002727
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002728 assert(PyType_IsSubtype(type, &PyBytes_Type));
2729 tmp = bytes_new(&PyBytes_Type, args, kwds);
2730 if (tmp == NULL)
2731 return NULL;
2732 assert(PyBytes_CheckExact(tmp));
2733 n = PyBytes_GET_SIZE(tmp);
2734 pnew = type->tp_alloc(type, n);
2735 if (pnew != NULL) {
2736 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2737 PyBytes_AS_STRING(tmp), n+1);
2738 ((PyBytesObject *)pnew)->ob_shash =
2739 ((PyBytesObject *)tmp)->ob_shash;
2740 }
2741 Py_DECREF(tmp);
2742 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002743}
2744
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002745PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002746"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002747bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002748bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2749bytes(memory_view) -> bytes\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002750\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002751Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002752 - an iterable yielding integers in range(256)\n\
2753 - a text string encoded using the specified encoding\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002754 - a bytes or a buffer object\n\
2755 - any object implementing the buffer API.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002756
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002757static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002758
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002759PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002760 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2761 "bytes",
2762 PyBytesObject_SIZE,
2763 sizeof(char),
2764 bytes_dealloc, /* tp_dealloc */
2765 0, /* tp_print */
2766 0, /* tp_getattr */
2767 0, /* tp_setattr */
2768 0, /* tp_reserved */
2769 (reprfunc)bytes_repr, /* tp_repr */
2770 0, /* tp_as_number */
2771 &bytes_as_sequence, /* tp_as_sequence */
2772 &bytes_as_mapping, /* tp_as_mapping */
2773 (hashfunc)bytes_hash, /* tp_hash */
2774 0, /* tp_call */
2775 bytes_str, /* tp_str */
2776 PyObject_GenericGetAttr, /* tp_getattro */
2777 0, /* tp_setattro */
2778 &bytes_as_buffer, /* tp_as_buffer */
2779 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2780 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2781 bytes_doc, /* tp_doc */
2782 0, /* tp_traverse */
2783 0, /* tp_clear */
2784 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2785 0, /* tp_weaklistoffset */
2786 bytes_iter, /* tp_iter */
2787 0, /* tp_iternext */
2788 bytes_methods, /* tp_methods */
2789 0, /* tp_members */
2790 0, /* tp_getset */
2791 &PyBaseObject_Type, /* tp_base */
2792 0, /* tp_dict */
2793 0, /* tp_descr_get */
2794 0, /* tp_descr_set */
2795 0, /* tp_dictoffset */
2796 0, /* tp_init */
2797 0, /* tp_alloc */
2798 bytes_new, /* tp_new */
2799 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002800};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002801
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002802void
2803PyBytes_Concat(register PyObject **pv, register PyObject *w)
2804{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002805 register PyObject *v;
2806 assert(pv != NULL);
2807 if (*pv == NULL)
2808 return;
2809 if (w == NULL) {
2810 Py_DECREF(*pv);
2811 *pv = NULL;
2812 return;
2813 }
2814 v = bytes_concat(*pv, w);
2815 Py_DECREF(*pv);
2816 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002817}
2818
2819void
2820PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
2821{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002822 PyBytes_Concat(pv, w);
2823 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002824}
2825
2826
2827/* The following function breaks the notion that strings are immutable:
2828 it changes the size of a string. We get away with this only if there
2829 is only one module referencing the object. You can also think of it
2830 as creating a new string object and destroying the old one, only
2831 more efficiently. In any case, don't use this if the string may
2832 already be known to some other part of the code...
2833 Note that if there's not enough memory to resize the string, the original
2834 string object at *pv is deallocated, *pv is set to NULL, an "out of
2835 memory" exception is set, and -1 is returned. Else (on success) 0 is
2836 returned, and the value in *pv may or may not be the same as on input.
2837 As always, an extra byte is allocated for a trailing \0 byte (newsize
2838 does *not* include that), and a trailing \0 byte is stored.
2839*/
2840
2841int
2842_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2843{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002844 register PyObject *v;
2845 register PyBytesObject *sv;
2846 v = *pv;
2847 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2848 *pv = 0;
2849 Py_DECREF(v);
2850 PyErr_BadInternalCall();
2851 return -1;
2852 }
2853 /* XXX UNREF/NEWREF interface should be more symmetrical */
2854 _Py_DEC_REFTOTAL;
2855 _Py_ForgetReference(v);
2856 *pv = (PyObject *)
2857 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2858 if (*pv == NULL) {
2859 PyObject_Del(v);
2860 PyErr_NoMemory();
2861 return -1;
2862 }
2863 _Py_NewReference(*pv);
2864 sv = (PyBytesObject *) *pv;
2865 Py_SIZE(sv) = newsize;
2866 sv->ob_sval[newsize] = '\0';
2867 sv->ob_shash = -1; /* invalidate cached hash value */
2868 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002869}
2870
2871/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
2872 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2873 * Python's regular ints.
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002874 * Return value: a new PyBytes*, or NULL if error.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002875 * . *pbuf is set to point into it,
2876 * *plen set to the # of chars following that.
2877 * Caller must decref it when done using pbuf.
2878 * The string starting at *pbuf is of the form
2879 * "-"? ("0x" | "0X")? digit+
2880 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2881 * set in flags. The case of hex digits will be correct,
2882 * There will be at least prec digits, zero-filled on the left if
2883 * necessary to get that many.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002884 * val object to be converted
2885 * flags bitmask of format flags; only F_ALT is looked at
2886 * prec minimum number of digits; 0-fill on left if needed
2887 * type a character in [duoxX]; u acts the same as d
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002888 *
2889 * CAUTION: o, x and X conversions on regular ints can never
2890 * produce a '-' sign, but can for Python's unbounded ints.
2891 */
2892PyObject*
2893_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002894 char **pbuf, int *plen)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002895{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002896 PyObject *result = NULL;
2897 char *buf;
2898 Py_ssize_t i;
2899 int sign; /* 1 if '-', else 0 */
2900 int len; /* number of characters */
2901 Py_ssize_t llen;
2902 int numdigits; /* len == numnondigits + numdigits */
2903 int numnondigits = 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002904
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002905 /* Avoid exceeding SSIZE_T_MAX */
2906 if (prec > INT_MAX-3) {
2907 PyErr_SetString(PyExc_OverflowError,
2908 "precision too large");
2909 return NULL;
2910 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002911
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002912 switch (type) {
2913 case 'd':
2914 case 'u':
2915 /* Special-case boolean: we want 0/1 */
2916 if (PyBool_Check(val))
2917 result = PyNumber_ToBase(val, 10);
2918 else
2919 result = Py_TYPE(val)->tp_str(val);
2920 break;
2921 case 'o':
2922 numnondigits = 2;
2923 result = PyNumber_ToBase(val, 8);
2924 break;
2925 case 'x':
2926 case 'X':
2927 numnondigits = 2;
2928 result = PyNumber_ToBase(val, 16);
2929 break;
2930 default:
2931 assert(!"'type' not in [duoxX]");
2932 }
2933 if (!result)
2934 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002935
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002936 buf = _PyUnicode_AsString(result);
2937 if (!buf) {
2938 Py_DECREF(result);
2939 return NULL;
2940 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002941
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002942 /* To modify the string in-place, there can only be one reference. */
2943 if (Py_REFCNT(result) != 1) {
2944 PyErr_BadInternalCall();
2945 return NULL;
2946 }
2947 llen = PyUnicode_GetSize(result);
2948 if (llen > INT_MAX) {
2949 PyErr_SetString(PyExc_ValueError,
2950 "string too large in _PyBytes_FormatLong");
2951 return NULL;
2952 }
2953 len = (int)llen;
2954 if (buf[len-1] == 'L') {
2955 --len;
2956 buf[len] = '\0';
2957 }
2958 sign = buf[0] == '-';
2959 numnondigits += sign;
2960 numdigits = len - numnondigits;
2961 assert(numdigits > 0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002962
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002963 /* Get rid of base marker unless F_ALT */
2964 if (((flags & F_ALT) == 0 &&
2965 (type == 'o' || type == 'x' || type == 'X'))) {
2966 assert(buf[sign] == '0');
2967 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
2968 buf[sign+1] == 'o');
2969 numnondigits -= 2;
2970 buf += 2;
2971 len -= 2;
2972 if (sign)
2973 buf[0] = '-';
2974 assert(len == numnondigits + numdigits);
2975 assert(numdigits > 0);
2976 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002977
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002978 /* Fill with leading zeroes to meet minimum width. */
2979 if (prec > numdigits) {
2980 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
2981 numnondigits + prec);
2982 char *b1;
2983 if (!r1) {
2984 Py_DECREF(result);
2985 return NULL;
2986 }
2987 b1 = PyBytes_AS_STRING(r1);
2988 for (i = 0; i < numnondigits; ++i)
2989 *b1++ = *buf++;
2990 for (i = 0; i < prec - numdigits; i++)
2991 *b1++ = '0';
2992 for (i = 0; i < numdigits; i++)
2993 *b1++ = *buf++;
2994 *b1 = '\0';
2995 Py_DECREF(result);
2996 result = r1;
2997 buf = PyBytes_AS_STRING(result);
2998 len = numnondigits + prec;
2999 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003000
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003001 /* Fix up case for hex conversions. */
3002 if (type == 'X') {
3003 /* Need to convert all lower case letters to upper case.
3004 and need to convert 0x to 0X (and -0x to -0X). */
3005 for (i = 0; i < len; i++)
3006 if (buf[i] >= 'a' && buf[i] <= 'x')
3007 buf[i] -= 'a'-'A';
3008 }
3009 *pbuf = buf;
3010 *plen = len;
3011 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003012}
3013
3014void
3015PyBytes_Fini(void)
3016{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003017 int i;
3018 for (i = 0; i < UCHAR_MAX + 1; i++) {
3019 Py_XDECREF(characters[i]);
3020 characters[i] = NULL;
3021 }
3022 Py_XDECREF(nullstring);
3023 nullstring = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003024}
3025
Benjamin Peterson4116f362008-05-27 00:36:20 +00003026/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003027
3028typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003029 PyObject_HEAD
3030 Py_ssize_t it_index;
3031 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003032} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003033
3034static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003035striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003036{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003037 _PyObject_GC_UNTRACK(it);
3038 Py_XDECREF(it->it_seq);
3039 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003040}
3041
3042static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003043striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003044{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003045 Py_VISIT(it->it_seq);
3046 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003047}
3048
3049static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003050striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003051{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003052 PyBytesObject *seq;
3053 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003054
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003055 assert(it != NULL);
3056 seq = it->it_seq;
3057 if (seq == NULL)
3058 return NULL;
3059 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003060
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003061 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3062 item = PyLong_FromLong(
3063 (unsigned char)seq->ob_sval[it->it_index]);
3064 if (item != NULL)
3065 ++it->it_index;
3066 return item;
3067 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003068
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003069 Py_DECREF(seq);
3070 it->it_seq = NULL;
3071 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003072}
3073
3074static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003075striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003076{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003077 Py_ssize_t len = 0;
3078 if (it->it_seq)
3079 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3080 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003081}
3082
3083PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003084 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003085
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003086static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003087 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3088 length_hint_doc},
3089 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003090};
3091
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003092PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003093 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3094 "bytes_iterator", /* tp_name */
3095 sizeof(striterobject), /* tp_basicsize */
3096 0, /* tp_itemsize */
3097 /* methods */
3098 (destructor)striter_dealloc, /* tp_dealloc */
3099 0, /* tp_print */
3100 0, /* tp_getattr */
3101 0, /* tp_setattr */
3102 0, /* tp_reserved */
3103 0, /* tp_repr */
3104 0, /* tp_as_number */
3105 0, /* tp_as_sequence */
3106 0, /* tp_as_mapping */
3107 0, /* tp_hash */
3108 0, /* tp_call */
3109 0, /* tp_str */
3110 PyObject_GenericGetAttr, /* tp_getattro */
3111 0, /* tp_setattro */
3112 0, /* tp_as_buffer */
3113 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3114 0, /* tp_doc */
3115 (traverseproc)striter_traverse, /* tp_traverse */
3116 0, /* tp_clear */
3117 0, /* tp_richcompare */
3118 0, /* tp_weaklistoffset */
3119 PyObject_SelfIter, /* tp_iter */
3120 (iternextfunc)striter_next, /* tp_iternext */
3121 striter_methods, /* tp_methods */
3122 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003123};
3124
3125static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003126bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003127{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003128 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003129
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003130 if (!PyBytes_Check(seq)) {
3131 PyErr_BadInternalCall();
3132 return NULL;
3133 }
3134 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3135 if (it == NULL)
3136 return NULL;
3137 it->it_index = 0;
3138 Py_INCREF(seq);
3139 it->it_seq = (PyBytesObject *)seq;
3140 _PyObject_GC_TRACK(it);
3141 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003142}