blob: a1db7789f15a2cc01d8309dc501c69a71edafa69 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Antoine Pitroucfc22b42012-10-16 21:07:23 +020013 PyBufferProcs *bufferprocs;
14 if (PyBytes_CheckExact(obj)) {
15 /* Fast path, e.g. for .join() of many bytes objects */
16 Py_INCREF(obj);
17 view->obj = obj;
18 view->buf = PyBytes_AS_STRING(obj);
19 view->len = PyBytes_GET_SIZE(obj);
20 return view->len;
21 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Antoine Pitroucfc22b42012-10-16 21:07:23 +020023 bufferprocs = Py_TYPE(obj)->tp_as_buffer;
24 if (bufferprocs == NULL || bufferprocs->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000025 {
Antoine Pitroud1188562010-06-09 16:38:55 +000026 PyErr_Format(PyExc_TypeError,
27 "Type %.100s doesn't support the buffer API",
28 Py_TYPE(obj)->tp_name);
29 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000030 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000031
Antoine Pitroucfc22b42012-10-16 21:07:23 +020032 if (bufferprocs->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000033 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000034 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000035}
36
Christian Heimes2c9c7a52008-05-26 13:42:13 +000037#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000038Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000039#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000040
Christian Heimes2c9c7a52008-05-26 13:42:13 +000041static PyBytesObject *characters[UCHAR_MAX + 1];
42static PyBytesObject *nullstring;
43
Mark Dickinsonfd24b322008-12-06 15:33:31 +000044/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
45 for a string of length n should request PyBytesObject_SIZE + n bytes.
46
47 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
48 3 bytes per string allocation on a typical system.
49*/
50#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
51
Christian Heimes2c9c7a52008-05-26 13:42:13 +000052/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000053 For PyBytes_FromString(), the parameter `str' points to a null-terminated
54 string containing exactly `size' bytes.
55
56 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
57 either NULL or else points to a string containing at least `size' bytes.
58 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
59 not have to be null-terminated. (Therefore it is safe to construct a
60 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
61 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
62 bytes (setting the last byte to the null terminating character) and you can
63 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000064 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000065 alter the data yourself, since the strings may be shared.
66
67 The PyObject member `op->ob_size', which denotes the number of "extra
68 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020069 allocated for string data, not counting the null terminating character.
70 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000071 PyBytes_FromStringAndSize()) or the length of the string in the `str'
72 parameter (for PyBytes_FromString()).
73*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000074PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000076{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 register PyBytesObject *op;
78 if (size < 0) {
79 PyErr_SetString(PyExc_SystemError,
80 "Negative size passed to PyBytes_FromStringAndSize");
81 return NULL;
82 }
83 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000084#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000086#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 Py_INCREF(op);
88 return (PyObject *)op;
89 }
90 if (size == 1 && str != NULL &&
91 (op = characters[*str & UCHAR_MAX]) != NULL)
92 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000093#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000095#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000096 Py_INCREF(op);
97 return (PyObject *)op;
98 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000099
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000100 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
101 PyErr_SetString(PyExc_OverflowError,
102 "byte string is too large");
103 return NULL;
104 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +0000105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 /* Inline PyObject_NewVar */
107 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
108 if (op == NULL)
109 return PyErr_NoMemory();
110 PyObject_INIT_VAR(op, &PyBytes_Type, size);
111 op->ob_shash = -1;
112 if (str != NULL)
113 Py_MEMCPY(op->ob_sval, str, size);
114 op->ob_sval[size] = '\0';
115 /* share short strings */
116 if (size == 0) {
117 nullstring = op;
118 Py_INCREF(op);
119 } else if (size == 1 && str != NULL) {
120 characters[*str & UCHAR_MAX] = op;
121 Py_INCREF(op);
122 }
123 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000124}
125
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000126PyObject *
127PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000128{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 register size_t size;
130 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000131
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 assert(str != NULL);
133 size = strlen(str);
134 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
135 PyErr_SetString(PyExc_OverflowError,
136 "byte string is too long");
137 return NULL;
138 }
139 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000140#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 Py_INCREF(op);
144 return (PyObject *)op;
145 }
146 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000147#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000148 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 Py_INCREF(op);
151 return (PyObject *)op;
152 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000153
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 /* Inline PyObject_NewVar */
155 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
156 if (op == NULL)
157 return PyErr_NoMemory();
158 PyObject_INIT_VAR(op, &PyBytes_Type, size);
159 op->ob_shash = -1;
160 Py_MEMCPY(op->ob_sval, str, size+1);
161 /* share short strings */
162 if (size == 0) {
163 nullstring = op;
164 Py_INCREF(op);
165 } else if (size == 1) {
166 characters[*str & UCHAR_MAX] = op;
167 Py_INCREF(op);
168 }
169 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000170}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000171
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000172PyObject *
173PyBytes_FromFormatV(const char *format, va_list vargs)
174{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 va_list count;
176 Py_ssize_t n = 0;
177 const char* f;
178 char *s;
179 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000180
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000181 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 /* step 1: figure out how large a buffer we need */
183 for (f = format; *f; f++) {
184 if (*f == '%') {
185 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000186 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000188
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
190 * they don't affect the amount of space we reserve.
191 */
192 if ((*f == 'l' || *f == 'z') &&
193 (f[1] == 'd' || f[1] == 'u'))
194 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000195
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000196 switch (*f) {
197 case 'c':
198 (void)va_arg(count, int);
199 /* fall through... */
200 case '%':
201 n++;
202 break;
203 case 'd': case 'u': case 'i': case 'x':
204 (void) va_arg(count, int);
205 /* 20 bytes is enough to hold a 64-bit
206 integer. Decimal takes the most space.
207 This isn't enough for octal. */
208 n += 20;
209 break;
210 case 's':
211 s = va_arg(count, char*);
212 n += strlen(s);
213 break;
214 case 'p':
215 (void) va_arg(count, int);
216 /* maximum 64-bit pointer representation:
217 * 0xffffffffffffffff
218 * so 19 characters is enough.
219 * XXX I count 18 -- what's the extra for?
220 */
221 n += 19;
222 break;
223 default:
224 /* if we stumble upon an unknown
225 formatting code, copy the rest of
226 the format string to the output
227 string. (we cannot just skip the
228 code, since there's no way to know
229 what's in the argument list) */
230 n += strlen(p);
231 goto expand;
232 }
233 } else
234 n++;
235 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000236 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 /* step 2: fill the buffer */
238 /* Since we've analyzed how much space we need for the worst case,
239 use sprintf directly instead of the slower PyOS_snprintf. */
240 string = PyBytes_FromStringAndSize(NULL, n);
241 if (!string)
242 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000243
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000244 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000245
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000246 for (f = format; *f; f++) {
247 if (*f == '%') {
248 const char* p = f++;
249 Py_ssize_t i;
250 int longflag = 0;
251 int size_tflag = 0;
252 /* parse the width.precision part (we're only
253 interested in the precision value, if any) */
254 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000255 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 n = (n*10) + *f++ - '0';
257 if (*f == '.') {
258 f++;
259 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000260 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000261 n = (n*10) + *f++ - '0';
262 }
David Malcolm96960882010-11-05 17:23:41 +0000263 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 f++;
265 /* handle the long flag, but only for %ld and %lu.
266 others can be added when necessary. */
267 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
268 longflag = 1;
269 ++f;
270 }
271 /* handle the size_t flag. */
272 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
273 size_tflag = 1;
274 ++f;
275 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000276
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000277 switch (*f) {
278 case 'c':
279 *s++ = va_arg(vargs, int);
280 break;
281 case 'd':
282 if (longflag)
283 sprintf(s, "%ld", va_arg(vargs, long));
284 else if (size_tflag)
285 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
286 va_arg(vargs, Py_ssize_t));
287 else
288 sprintf(s, "%d", va_arg(vargs, int));
289 s += strlen(s);
290 break;
291 case 'u':
292 if (longflag)
293 sprintf(s, "%lu",
294 va_arg(vargs, unsigned long));
295 else if (size_tflag)
296 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
297 va_arg(vargs, size_t));
298 else
299 sprintf(s, "%u",
300 va_arg(vargs, unsigned int));
301 s += strlen(s);
302 break;
303 case 'i':
304 sprintf(s, "%i", va_arg(vargs, int));
305 s += strlen(s);
306 break;
307 case 'x':
308 sprintf(s, "%x", va_arg(vargs, int));
309 s += strlen(s);
310 break;
311 case 's':
312 p = va_arg(vargs, char*);
313 i = strlen(p);
314 if (n > 0 && i > n)
315 i = n;
316 Py_MEMCPY(s, p, i);
317 s += i;
318 break;
319 case 'p':
320 sprintf(s, "%p", va_arg(vargs, void*));
321 /* %p is ill-defined: ensure leading 0x. */
322 if (s[1] == 'X')
323 s[1] = 'x';
324 else if (s[1] != 'x') {
325 memmove(s+2, s, strlen(s)+1);
326 s[0] = '0';
327 s[1] = 'x';
328 }
329 s += strlen(s);
330 break;
331 case '%':
332 *s++ = '%';
333 break;
334 default:
335 strcpy(s, p);
336 s += strlen(s);
337 goto end;
338 }
339 } else
340 *s++ = *f;
341 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000342
343 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
345 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000346}
347
348PyObject *
349PyBytes_FromFormat(const char *format, ...)
350{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000351 PyObject* ret;
352 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000353
354#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000355 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000356#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000357 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000358#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 ret = PyBytes_FromFormatV(format, vargs);
360 va_end(vargs);
361 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000362}
363
364static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000365bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000366{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000368}
369
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000370/* Unescape a backslash-escaped string. If unicode is non-zero,
371 the string is a u-literal. If recode_encoding is non-zero,
372 the string is UTF-8 encoded and should be re-encoded in the
373 specified encoding. */
374
375PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000376 Py_ssize_t len,
377 const char *errors,
378 Py_ssize_t unicode,
379 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000380{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 int c;
382 char *p, *buf;
383 const char *end;
384 PyObject *v;
385 Py_ssize_t newlen = recode_encoding ? 4*len:len;
386 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
387 if (v == NULL)
388 return NULL;
389 p = buf = PyBytes_AsString(v);
390 end = s + len;
391 while (s < end) {
392 if (*s != '\\') {
393 non_esc:
394 if (recode_encoding && (*s & 0x80)) {
395 PyObject *u, *w;
396 char *r;
397 const char* t;
398 Py_ssize_t rn;
399 t = s;
400 /* Decode non-ASCII bytes as UTF-8. */
401 while (t < end && (*t & 0x80)) t++;
402 u = PyUnicode_DecodeUTF8(s, t - s, errors);
403 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000404
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000405 /* Recode them in target encoding. */
406 w = PyUnicode_AsEncodedString(
407 u, recode_encoding, errors);
408 Py_DECREF(u);
409 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000410
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000411 /* Append bytes to output buffer. */
412 assert(PyBytes_Check(w));
413 r = PyBytes_AS_STRING(w);
414 rn = PyBytes_GET_SIZE(w);
415 Py_MEMCPY(p, r, rn);
416 p += rn;
417 Py_DECREF(w);
418 s = t;
419 } else {
420 *p++ = *s++;
421 }
422 continue;
423 }
424 s++;
425 if (s==end) {
426 PyErr_SetString(PyExc_ValueError,
427 "Trailing \\ in string");
428 goto failed;
429 }
430 switch (*s++) {
431 /* XXX This assumes ASCII! */
432 case '\n': break;
433 case '\\': *p++ = '\\'; break;
434 case '\'': *p++ = '\''; break;
435 case '\"': *p++ = '\"'; break;
436 case 'b': *p++ = '\b'; break;
437 case 'f': *p++ = '\014'; break; /* FF */
438 case 't': *p++ = '\t'; break;
439 case 'n': *p++ = '\n'; break;
440 case 'r': *p++ = '\r'; break;
441 case 'v': *p++ = '\013'; break; /* VT */
442 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
443 case '0': case '1': case '2': case '3':
444 case '4': case '5': case '6': case '7':
445 c = s[-1] - '0';
446 if (s < end && '0' <= *s && *s <= '7') {
447 c = (c<<3) + *s++ - '0';
448 if (s < end && '0' <= *s && *s <= '7')
449 c = (c<<3) + *s++ - '0';
450 }
451 *p++ = c;
452 break;
453 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000454 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000455 unsigned int x = 0;
456 c = Py_CHARMASK(*s);
457 s++;
David Malcolm96960882010-11-05 17:23:41 +0000458 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000460 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 x = 10 + c - 'a';
462 else
463 x = 10 + c - 'A';
464 x = x << 4;
465 c = Py_CHARMASK(*s);
466 s++;
David Malcolm96960882010-11-05 17:23:41 +0000467 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000468 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000469 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000470 x += 10 + c - 'a';
471 else
472 x += 10 + c - 'A';
473 *p++ = x;
474 break;
475 }
476 if (!errors || strcmp(errors, "strict") == 0) {
477 PyErr_SetString(PyExc_ValueError,
478 "invalid \\x escape");
479 goto failed;
480 }
481 if (strcmp(errors, "replace") == 0) {
482 *p++ = '?';
483 } else if (strcmp(errors, "ignore") == 0)
484 /* do nothing */;
485 else {
486 PyErr_Format(PyExc_ValueError,
487 "decoding error; unknown "
488 "error handling code: %.400s",
489 errors);
490 goto failed;
491 }
492 default:
493 *p++ = '\\';
494 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200495 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000496 UTF-8 bytes may follow. */
497 }
498 }
499 if (p-buf < newlen)
500 _PyBytes_Resize(&v, p - buf);
501 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000502 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000503 Py_DECREF(v);
504 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000505}
506
507/* -------------------------------------------------------------------- */
508/* object api */
509
510Py_ssize_t
511PyBytes_Size(register PyObject *op)
512{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000513 if (!PyBytes_Check(op)) {
514 PyErr_Format(PyExc_TypeError,
515 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
516 return -1;
517 }
518 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000519}
520
521char *
522PyBytes_AsString(register PyObject *op)
523{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000524 if (!PyBytes_Check(op)) {
525 PyErr_Format(PyExc_TypeError,
526 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
527 return NULL;
528 }
529 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000530}
531
532int
533PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000534 register char **s,
535 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000536{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000537 if (s == NULL) {
538 PyErr_BadInternalCall();
539 return -1;
540 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000541
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000542 if (!PyBytes_Check(obj)) {
543 PyErr_Format(PyExc_TypeError,
544 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
545 return -1;
546 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000547
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000548 *s = PyBytes_AS_STRING(obj);
549 if (len != NULL)
550 *len = PyBytes_GET_SIZE(obj);
551 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
552 PyErr_SetString(PyExc_TypeError,
553 "expected bytes with no null");
554 return -1;
555 }
556 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000557}
Neal Norwitz6968b052007-02-27 19:02:19 +0000558
559/* -------------------------------------------------------------------- */
560/* Methods */
561
Eric Smith0923d1d2009-04-16 20:16:10 +0000562#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000563
564#include "stringlib/fastsearch.h"
565#include "stringlib/count.h"
566#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +0200567#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000568#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000569#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000570#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000571
Eric Smith0f78bff2009-11-30 01:01:42 +0000572#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000573
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000574PyObject *
575PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000576{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000577 register PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200578 Py_ssize_t i, length = Py_SIZE(op);
579 size_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000580 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200581 unsigned char quote, *s, *p;
582
583 /* Compute size of output string */
584 squotes = dquotes = 0;
585 newsize = 3; /* b'' */
586 s = (unsigned char*)op->ob_sval;
587 for (i = 0; i < length; i++) {
588 switch(s[i]) {
589 case '\'': squotes++; newsize++; break;
590 case '"': dquotes++; newsize++; break;
591 case '\\': case '\t': case '\n': case '\r':
592 newsize += 2; break; /* \C */
593 default:
594 if (s[i] < ' ' || s[i] >= 0x7f)
595 newsize += 4; /* \xHH */
596 else
597 newsize++;
598 }
599 }
600 quote = '\'';
601 if (smartquotes && squotes && !dquotes)
602 quote = '"';
603 if (squotes && quote == '\'')
604 newsize += squotes;
Victor Stinner6430fd52011-09-29 04:02:13 +0200605
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200606 if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000607 PyErr_SetString(PyExc_OverflowError,
608 "bytes object is too large to make repr");
609 return NULL;
610 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200611
612 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000613 if (v == NULL) {
614 return NULL;
615 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200616 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000617
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200618 *p++ = 'b', *p++ = quote;
619 for (i = 0; i < length; i++) {
620 unsigned char c = op->ob_sval[i];
621 if (c == quote || c == '\\')
622 *p++ = '\\', *p++ = c;
623 else if (c == '\t')
624 *p++ = '\\', *p++ = 't';
625 else if (c == '\n')
626 *p++ = '\\', *p++ = 'n';
627 else if (c == '\r')
628 *p++ = '\\', *p++ = 'r';
629 else if (c < ' ' || c >= 0x7f) {
630 *p++ = '\\';
631 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200632 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
633 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000634 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200635 else
636 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000637 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200638 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +0200639 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200640 return v;
Neal Norwitz6968b052007-02-27 19:02:19 +0000641}
642
Neal Norwitz6968b052007-02-27 19:02:19 +0000643static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000644bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000645{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000646 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000647}
648
Neal Norwitz6968b052007-02-27 19:02:19 +0000649static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000650bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000651{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000652 if (Py_BytesWarningFlag) {
653 if (PyErr_WarnEx(PyExc_BytesWarning,
654 "str() on a bytes instance", 1))
655 return NULL;
656 }
657 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000658}
659
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000660static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000661bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000662{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000663 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000664}
Neal Norwitz6968b052007-02-27 19:02:19 +0000665
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000666/* This is also used by PyBytes_Concat() */
667static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000668bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000669{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000670 Py_ssize_t size;
671 Py_buffer va, vb;
672 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000673
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000674 va.len = -1;
675 vb.len = -1;
676 if (_getbuffer(a, &va) < 0 ||
677 _getbuffer(b, &vb) < 0) {
678 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
679 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
680 goto done;
681 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000682
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000683 /* Optimize end cases */
684 if (va.len == 0 && PyBytes_CheckExact(b)) {
685 result = b;
686 Py_INCREF(result);
687 goto done;
688 }
689 if (vb.len == 0 && PyBytes_CheckExact(a)) {
690 result = a;
691 Py_INCREF(result);
692 goto done;
693 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000694
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000695 size = va.len + vb.len;
696 if (size < 0) {
697 PyErr_NoMemory();
698 goto done;
699 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000700
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000701 result = PyBytes_FromStringAndSize(NULL, size);
702 if (result != NULL) {
703 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
704 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
705 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000706
707 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000708 if (va.len != -1)
709 PyBuffer_Release(&va);
710 if (vb.len != -1)
711 PyBuffer_Release(&vb);
712 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000713}
Neal Norwitz6968b052007-02-27 19:02:19 +0000714
715static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000716bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000717{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000718 register Py_ssize_t i;
719 register Py_ssize_t j;
720 register Py_ssize_t size;
721 register PyBytesObject *op;
722 size_t nbytes;
723 if (n < 0)
724 n = 0;
725 /* watch out for overflows: the size can overflow int,
726 * and the # of bytes needed can overflow size_t
727 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000728 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000729 PyErr_SetString(PyExc_OverflowError,
730 "repeated bytes are too long");
731 return NULL;
732 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000733 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000734 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
735 Py_INCREF(a);
736 return (PyObject *)a;
737 }
738 nbytes = (size_t)size;
739 if (nbytes + PyBytesObject_SIZE <= nbytes) {
740 PyErr_SetString(PyExc_OverflowError,
741 "repeated bytes are too long");
742 return NULL;
743 }
744 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
745 if (op == NULL)
746 return PyErr_NoMemory();
747 PyObject_INIT_VAR(op, &PyBytes_Type, size);
748 op->ob_shash = -1;
749 op->ob_sval[size] = '\0';
750 if (Py_SIZE(a) == 1 && n > 0) {
751 memset(op->ob_sval, a->ob_sval[0] , n);
752 return (PyObject *) op;
753 }
754 i = 0;
755 if (i < size) {
756 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
757 i = Py_SIZE(a);
758 }
759 while (i < size) {
760 j = (i <= size-i) ? i : size-i;
761 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
762 i += j;
763 }
764 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000765}
766
Guido van Rossum98297ee2007-11-06 21:34:58 +0000767static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000768bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000769{
770 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
771 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000772 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000773 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000774 PyErr_Clear();
775 if (_getbuffer(arg, &varg) < 0)
776 return -1;
777 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
778 varg.buf, varg.len, 0);
779 PyBuffer_Release(&varg);
780 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000781 }
782 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000783 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
784 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000785 }
786
Antoine Pitrou0010d372010-08-15 17:12:55 +0000787 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000788}
789
Neal Norwitz6968b052007-02-27 19:02:19 +0000790static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000791bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000792{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000793 if (i < 0 || i >= Py_SIZE(a)) {
794 PyErr_SetString(PyExc_IndexError, "index out of range");
795 return NULL;
796 }
797 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000798}
799
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000800static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000801bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000802{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000803 int c;
804 Py_ssize_t len_a, len_b;
805 Py_ssize_t min_len;
806 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000807
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000808 /* Make sure both arguments are strings. */
809 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
810 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
811 (PyObject_IsInstance((PyObject*)a,
812 (PyObject*)&PyUnicode_Type) ||
813 PyObject_IsInstance((PyObject*)b,
814 (PyObject*)&PyUnicode_Type))) {
815 if (PyErr_WarnEx(PyExc_BytesWarning,
816 "Comparison between bytes and string", 1))
817 return NULL;
818 }
819 result = Py_NotImplemented;
820 goto out;
821 }
822 if (a == b) {
823 switch (op) {
824 case Py_EQ:case Py_LE:case Py_GE:
825 result = Py_True;
826 goto out;
827 case Py_NE:case Py_LT:case Py_GT:
828 result = Py_False;
829 goto out;
830 }
831 }
832 if (op == Py_EQ) {
833 /* Supporting Py_NE here as well does not save
834 much time, since Py_NE is rarely used. */
835 if (Py_SIZE(a) == Py_SIZE(b)
836 && (a->ob_sval[0] == b->ob_sval[0]
837 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
838 result = Py_True;
839 } else {
840 result = Py_False;
841 }
842 goto out;
843 }
844 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
845 min_len = (len_a < len_b) ? len_a : len_b;
846 if (min_len > 0) {
847 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
848 if (c==0)
849 c = memcmp(a->ob_sval, b->ob_sval, min_len);
850 } else
851 c = 0;
852 if (c == 0)
853 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
854 switch (op) {
855 case Py_LT: c = c < 0; break;
856 case Py_LE: c = c <= 0; break;
857 case Py_EQ: assert(0); break; /* unreachable */
858 case Py_NE: c = c != 0; break;
859 case Py_GT: c = c > 0; break;
860 case Py_GE: c = c >= 0; break;
861 default:
862 result = Py_NotImplemented;
863 goto out;
864 }
865 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000866 out:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000867 Py_INCREF(result);
868 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000869}
870
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000871static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000872bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000873{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100874 if (a->ob_shash == -1) {
875 /* Can't fail */
876 a->ob_shash = _Py_HashBytes((unsigned char *) a->ob_sval, Py_SIZE(a));
877 }
878 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +0000879}
880
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000881static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000882bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000883{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000884 if (PyIndex_Check(item)) {
885 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
886 if (i == -1 && PyErr_Occurred())
887 return NULL;
888 if (i < 0)
889 i += PyBytes_GET_SIZE(self);
890 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
891 PyErr_SetString(PyExc_IndexError,
892 "index out of range");
893 return NULL;
894 }
895 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
896 }
897 else if (PySlice_Check(item)) {
898 Py_ssize_t start, stop, step, slicelength, cur, i;
899 char* source_buf;
900 char* result_buf;
901 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000902
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000903 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000904 PyBytes_GET_SIZE(self),
905 &start, &stop, &step, &slicelength) < 0) {
906 return NULL;
907 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000908
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000909 if (slicelength <= 0) {
910 return PyBytes_FromStringAndSize("", 0);
911 }
912 else if (start == 0 && step == 1 &&
913 slicelength == PyBytes_GET_SIZE(self) &&
914 PyBytes_CheckExact(self)) {
915 Py_INCREF(self);
916 return (PyObject *)self;
917 }
918 else if (step == 1) {
919 return PyBytes_FromStringAndSize(
920 PyBytes_AS_STRING(self) + start,
921 slicelength);
922 }
923 else {
924 source_buf = PyBytes_AS_STRING(self);
925 result = PyBytes_FromStringAndSize(NULL, slicelength);
926 if (result == NULL)
927 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000928
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000929 result_buf = PyBytes_AS_STRING(result);
930 for (cur = start, i = 0; i < slicelength;
931 cur += step, i++) {
932 result_buf[i] = source_buf[cur];
933 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000934
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000935 return result;
936 }
937 }
938 else {
939 PyErr_Format(PyExc_TypeError,
940 "byte indices must be integers, not %.200s",
941 Py_TYPE(item)->tp_name);
942 return NULL;
943 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000944}
945
946static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000947bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000948{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000949 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
950 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000951}
952
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000953static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000954 (lenfunc)bytes_length, /*sq_length*/
955 (binaryfunc)bytes_concat, /*sq_concat*/
956 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
957 (ssizeargfunc)bytes_item, /*sq_item*/
958 0, /*sq_slice*/
959 0, /*sq_ass_item*/
960 0, /*sq_ass_slice*/
961 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000962};
963
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000964static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000965 (lenfunc)bytes_length,
966 (binaryfunc)bytes_subscript,
967 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000968};
969
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000970static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000971 (getbufferproc)bytes_buffer_getbuffer,
972 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000973};
974
975
976#define LEFTSTRIP 0
977#define RIGHTSTRIP 1
978#define BOTHSTRIP 2
979
980/* Arrays indexed by above */
981static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
982
983#define STRIPNAME(i) (stripformat[i]+3)
984
Neal Norwitz6968b052007-02-27 19:02:19 +0000985PyDoc_STRVAR(split__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200986"B.split(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +0000987\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +0000988Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000989If sep is not specified or is None, B is split on ASCII whitespace\n\
990characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +0000991If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +0000992
993static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200994bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +0000995{
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200996 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000997 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
998 Py_ssize_t maxsplit = -1;
999 const char *s = PyBytes_AS_STRING(self), *sub;
1000 Py_buffer vsub;
1001 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001002
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001003 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split",
1004 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001005 return NULL;
1006 if (maxsplit < 0)
1007 maxsplit = PY_SSIZE_T_MAX;
1008 if (subobj == Py_None)
1009 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1010 if (_getbuffer(subobj, &vsub) < 0)
1011 return NULL;
1012 sub = vsub.buf;
1013 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001014
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001015 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1016 PyBuffer_Release(&vsub);
1017 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001018}
1019
Neal Norwitz6968b052007-02-27 19:02:19 +00001020PyDoc_STRVAR(partition__doc__,
1021"B.partition(sep) -> (head, sep, tail)\n\
1022\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001023Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001024the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001025found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001026
1027static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001028bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001029{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001030 const char *sep;
1031 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001032
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001033 if (PyBytes_Check(sep_obj)) {
1034 sep = PyBytes_AS_STRING(sep_obj);
1035 sep_len = PyBytes_GET_SIZE(sep_obj);
1036 }
1037 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1038 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001039
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001040 return stringlib_partition(
1041 (PyObject*) self,
1042 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1043 sep_obj, sep, sep_len
1044 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001045}
1046
1047PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001048"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001049\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001050Search for the separator sep in B, starting at the end of B,\n\
1051and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001052part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001053bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001054
1055static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001056bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001057{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001058 const char *sep;
1059 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001060
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001061 if (PyBytes_Check(sep_obj)) {
1062 sep = PyBytes_AS_STRING(sep_obj);
1063 sep_len = PyBytes_GET_SIZE(sep_obj);
1064 }
1065 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1066 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001067
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001068 return stringlib_rpartition(
1069 (PyObject*) self,
1070 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1071 sep_obj, sep, sep_len
1072 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001073}
1074
Neal Norwitz6968b052007-02-27 19:02:19 +00001075PyDoc_STRVAR(rsplit__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001076"B.rsplit(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001077\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001078Return a list of the sections in B, using sep as the delimiter,\n\
1079starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001080If sep is not given, B is split on ASCII whitespace characters\n\
1081(space, tab, return, newline, formfeed, vertical tab).\n\
1082If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001083
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001084
Neal Norwitz6968b052007-02-27 19:02:19 +00001085static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001086bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +00001087{
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001088 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001089 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1090 Py_ssize_t maxsplit = -1;
1091 const char *s = PyBytes_AS_STRING(self), *sub;
1092 Py_buffer vsub;
1093 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001094
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001095 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit",
1096 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 return NULL;
1098 if (maxsplit < 0)
1099 maxsplit = PY_SSIZE_T_MAX;
1100 if (subobj == Py_None)
1101 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1102 if (_getbuffer(subobj, &vsub) < 0)
1103 return NULL;
1104 sub = vsub.buf;
1105 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001106
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001107 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1108 PyBuffer_Release(&vsub);
1109 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001110}
1111
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001112
1113PyDoc_STRVAR(join__doc__,
1114"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001115\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001116Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001117Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1118
Neal Norwitz6968b052007-02-27 19:02:19 +00001119static PyObject *
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001120bytes_join(PyObject *self, PyObject *iterable)
Neal Norwitz6968b052007-02-27 19:02:19 +00001121{
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001122 return stringlib_bytes_join(self, iterable);
Neal Norwitz6968b052007-02-27 19:02:19 +00001123}
1124
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001125PyObject *
1126_PyBytes_Join(PyObject *sep, PyObject *x)
1127{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001128 assert(sep != NULL && PyBytes_Check(sep));
1129 assert(x != NULL);
1130 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001131}
1132
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001133/* helper macro to fixup start/end slice values */
1134#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001135 if (end > len) \
1136 end = len; \
1137 else if (end < 0) { \
1138 end += len; \
1139 if (end < 0) \
1140 end = 0; \
1141 } \
1142 if (start < 0) { \
1143 start += len; \
1144 if (start < 0) \
1145 start = 0; \
1146 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001147
1148Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001149bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001150{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001151 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001152 char byte;
1153 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001154 const char *sub;
1155 Py_ssize_t sub_len;
1156 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001157 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001158
Antoine Pitrouac65d962011-10-20 23:54:17 +02001159 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1160 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001161 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001162
Antoine Pitrouac65d962011-10-20 23:54:17 +02001163 if (subobj) {
1164 if (_getbuffer(subobj, &subbuf) < 0)
1165 return -2;
1166
1167 sub = subbuf.buf;
1168 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001169 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001170 else {
1171 sub = &byte;
1172 sub_len = 1;
1173 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001174
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001175 if (dir > 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001176 res = stringlib_find_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001177 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1178 sub, sub_len, start, end);
1179 else
Antoine Pitrouac65d962011-10-20 23:54:17 +02001180 res = stringlib_rfind_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001181 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1182 sub, sub_len, start, end);
Antoine Pitrouac65d962011-10-20 23:54:17 +02001183
1184 if (subobj)
1185 PyBuffer_Release(&subbuf);
1186
1187 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001188}
1189
1190
1191PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001192"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001193\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001194Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001195such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001196arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001197\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001198Return -1 on failure.");
1199
Neal Norwitz6968b052007-02-27 19:02:19 +00001200static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001201bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001202{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001203 Py_ssize_t result = bytes_find_internal(self, args, +1);
1204 if (result == -2)
1205 return NULL;
1206 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001207}
1208
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001209
1210PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001211"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001212\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001213Like B.find() but raise ValueError when the substring is not found.");
1214
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001215static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001216bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001217{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001218 Py_ssize_t result = bytes_find_internal(self, args, +1);
1219 if (result == -2)
1220 return NULL;
1221 if (result == -1) {
1222 PyErr_SetString(PyExc_ValueError,
1223 "substring not found");
1224 return NULL;
1225 }
1226 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001227}
1228
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001229
1230PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001231"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001232\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001233Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001234such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001235arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001236\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001237Return -1 on failure.");
1238
Neal Norwitz6968b052007-02-27 19:02:19 +00001239static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001240bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001241{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001242 Py_ssize_t result = bytes_find_internal(self, args, -1);
1243 if (result == -2)
1244 return NULL;
1245 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001246}
1247
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001248
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001249PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001250"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001251\n\
1252Like B.rfind() but raise ValueError when the substring is not found.");
1253
1254static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001255bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001256{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001257 Py_ssize_t result = bytes_find_internal(self, args, -1);
1258 if (result == -2)
1259 return NULL;
1260 if (result == -1) {
1261 PyErr_SetString(PyExc_ValueError,
1262 "substring not found");
1263 return NULL;
1264 }
1265 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001266}
1267
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001268
1269Py_LOCAL_INLINE(PyObject *)
1270do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001271{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001272 Py_buffer vsep;
1273 char *s = PyBytes_AS_STRING(self);
1274 Py_ssize_t len = PyBytes_GET_SIZE(self);
1275 char *sep;
1276 Py_ssize_t seplen;
1277 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001278
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001279 if (_getbuffer(sepobj, &vsep) < 0)
1280 return NULL;
1281 sep = vsep.buf;
1282 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001283
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001284 i = 0;
1285 if (striptype != RIGHTSTRIP) {
1286 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1287 i++;
1288 }
1289 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001290
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001291 j = len;
1292 if (striptype != LEFTSTRIP) {
1293 do {
1294 j--;
1295 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1296 j++;
1297 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001298
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001299 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001300
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001301 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1302 Py_INCREF(self);
1303 return (PyObject*)self;
1304 }
1305 else
1306 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001307}
1308
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001309
1310Py_LOCAL_INLINE(PyObject *)
1311do_strip(PyBytesObject *self, int striptype)
1312{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001313 char *s = PyBytes_AS_STRING(self);
1314 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001315
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001316 i = 0;
1317 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001318 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001319 i++;
1320 }
1321 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001322
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001323 j = len;
1324 if (striptype != LEFTSTRIP) {
1325 do {
1326 j--;
David Malcolm96960882010-11-05 17:23:41 +00001327 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001328 j++;
1329 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001330
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001331 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1332 Py_INCREF(self);
1333 return (PyObject*)self;
1334 }
1335 else
1336 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001337}
1338
1339
1340Py_LOCAL_INLINE(PyObject *)
1341do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1342{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001343 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001344
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001345 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1346 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001347
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001348 if (sep != NULL && sep != Py_None) {
1349 return do_xstrip(self, striptype, sep);
1350 }
1351 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001352}
1353
1354
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001355PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001356"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001357\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001358Strip leading and trailing bytes contained in the argument.\n\
Georg Brandlbeca27a2012-01-22 21:31:21 +01001359If the argument is omitted, strip leading and trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001360static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001361bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001362{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001363 if (PyTuple_GET_SIZE(args) == 0)
1364 return do_strip(self, BOTHSTRIP); /* Common case */
1365 else
1366 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001367}
1368
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001369
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001370PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001371"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001372\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001373Strip leading bytes contained in the argument.\n\
1374If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001375static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001376bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001377{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001378 if (PyTuple_GET_SIZE(args) == 0)
1379 return do_strip(self, LEFTSTRIP); /* Common case */
1380 else
1381 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001382}
1383
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001384
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001385PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001386"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001387\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001388Strip trailing bytes contained in the argument.\n\
1389If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001390static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001391bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001392{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001393 if (PyTuple_GET_SIZE(args) == 0)
1394 return do_strip(self, RIGHTSTRIP); /* Common case */
1395 else
1396 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001397}
Neal Norwitz6968b052007-02-27 19:02:19 +00001398
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001399
1400PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001401"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001402\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001403Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001404string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001405as in slice notation.");
1406
1407static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001408bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001409{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001410 PyObject *sub_obj;
1411 const char *str = PyBytes_AS_STRING(self), *sub;
1412 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001413 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001414 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001415
Antoine Pitrouac65d962011-10-20 23:54:17 +02001416 Py_buffer vsub;
1417 PyObject *count_obj;
1418
1419 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
1420 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001421 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001422
Antoine Pitrouac65d962011-10-20 23:54:17 +02001423 if (sub_obj) {
1424 if (_getbuffer(sub_obj, &vsub) < 0)
1425 return NULL;
1426
1427 sub = vsub.buf;
1428 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001429 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001430 else {
1431 sub = &byte;
1432 sub_len = 1;
1433 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001434
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001435 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001436
Antoine Pitrouac65d962011-10-20 23:54:17 +02001437 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001438 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1439 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02001440
1441 if (sub_obj)
1442 PyBuffer_Release(&vsub);
1443
1444 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001445}
1446
1447
1448PyDoc_STRVAR(translate__doc__,
1449"B.translate(table[, deletechars]) -> bytes\n\
1450\n\
1451Return a copy of B, where all characters occurring in the\n\
1452optional argument deletechars are removed, and the remaining\n\
1453characters have been mapped through the given translation\n\
1454table, which must be a bytes object of length 256.");
1455
1456static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001457bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001458{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001459 register char *input, *output;
1460 const char *table;
1461 register Py_ssize_t i, c, changed = 0;
1462 PyObject *input_obj = (PyObject*)self;
1463 const char *output_start, *del_table=NULL;
1464 Py_ssize_t inlen, tablen, dellen = 0;
1465 PyObject *result;
1466 int trans_table[256];
1467 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001468
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001469 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1470 &tableobj, &delobj))
1471 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001472
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001473 if (PyBytes_Check(tableobj)) {
1474 table = PyBytes_AS_STRING(tableobj);
1475 tablen = PyBytes_GET_SIZE(tableobj);
1476 }
1477 else if (tableobj == Py_None) {
1478 table = NULL;
1479 tablen = 256;
1480 }
1481 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1482 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001483
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001484 if (tablen != 256) {
1485 PyErr_SetString(PyExc_ValueError,
1486 "translation table must be 256 characters long");
1487 return NULL;
1488 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001489
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001490 if (delobj != NULL) {
1491 if (PyBytes_Check(delobj)) {
1492 del_table = PyBytes_AS_STRING(delobj);
1493 dellen = PyBytes_GET_SIZE(delobj);
1494 }
1495 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1496 return NULL;
1497 }
1498 else {
1499 del_table = NULL;
1500 dellen = 0;
1501 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001502
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001503 inlen = PyBytes_GET_SIZE(input_obj);
1504 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1505 if (result == NULL)
1506 return NULL;
1507 output_start = output = PyBytes_AsString(result);
1508 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001509
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001510 if (dellen == 0 && table != NULL) {
1511 /* If no deletions are required, use faster code */
1512 for (i = inlen; --i >= 0; ) {
1513 c = Py_CHARMASK(*input++);
1514 if (Py_CHARMASK((*output++ = table[c])) != c)
1515 changed = 1;
1516 }
1517 if (changed || !PyBytes_CheckExact(input_obj))
1518 return result;
1519 Py_DECREF(result);
1520 Py_INCREF(input_obj);
1521 return input_obj;
1522 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001523
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001524 if (table == NULL) {
1525 for (i = 0; i < 256; i++)
1526 trans_table[i] = Py_CHARMASK(i);
1527 } else {
1528 for (i = 0; i < 256; i++)
1529 trans_table[i] = Py_CHARMASK(table[i]);
1530 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001531
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001532 for (i = 0; i < dellen; i++)
1533 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001534
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001535 for (i = inlen; --i >= 0; ) {
1536 c = Py_CHARMASK(*input++);
1537 if (trans_table[c] != -1)
1538 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1539 continue;
1540 changed = 1;
1541 }
1542 if (!changed && PyBytes_CheckExact(input_obj)) {
1543 Py_DECREF(result);
1544 Py_INCREF(input_obj);
1545 return input_obj;
1546 }
1547 /* Fix the size of the resulting string */
1548 if (inlen > 0)
1549 _PyBytes_Resize(&result, output - output_start);
1550 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001551}
1552
1553
Georg Brandlabc38772009-04-12 15:51:51 +00001554static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001555bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001556{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001557 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001558}
1559
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001560/* find and count characters and substrings */
1561
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001562#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001563 ((char *)memchr((const void *)(target), c, target_len))
1564
1565/* String ops must return a string. */
1566/* If the object is subclass of string, create a copy */
1567Py_LOCAL(PyBytesObject *)
1568return_self(PyBytesObject *self)
1569{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001570 if (PyBytes_CheckExact(self)) {
1571 Py_INCREF(self);
1572 return self;
1573 }
1574 return (PyBytesObject *)PyBytes_FromStringAndSize(
1575 PyBytes_AS_STRING(self),
1576 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001577}
1578
1579Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001580countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001581{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001582 Py_ssize_t count=0;
1583 const char *start=target;
1584 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001585
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001586 while ( (start=findchar(start, end-start, c)) != NULL ) {
1587 count++;
1588 if (count >= maxcount)
1589 break;
1590 start += 1;
1591 }
1592 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001593}
1594
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001595
1596/* Algorithms for different cases of string replacement */
1597
1598/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1599Py_LOCAL(PyBytesObject *)
1600replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001601 const char *to_s, Py_ssize_t to_len,
1602 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001603{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001604 char *self_s, *result_s;
1605 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001606 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001607 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001608
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001609 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001610
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001611 /* 1 at the end plus 1 after every character;
1612 count = min(maxcount, self_len + 1) */
1613 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001614 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001615 else
1616 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1617 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001618
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001619 /* Check for overflow */
1620 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001621 assert(count > 0);
1622 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001623 PyErr_SetString(PyExc_OverflowError,
1624 "replacement bytes are too long");
1625 return NULL;
1626 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001627 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001628
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001629 if (! (result = (PyBytesObject *)
1630 PyBytes_FromStringAndSize(NULL, result_len)) )
1631 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001632
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001633 self_s = PyBytes_AS_STRING(self);
1634 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001635
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001636 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001637
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001638 /* Lay the first one down (guaranteed this will occur) */
1639 Py_MEMCPY(result_s, to_s, to_len);
1640 result_s += to_len;
1641 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001642
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643 for (i=0; i<count; i++) {
1644 *result_s++ = *self_s++;
1645 Py_MEMCPY(result_s, to_s, to_len);
1646 result_s += to_len;
1647 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001648
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001649 /* Copy the rest of the original string */
1650 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001651
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001652 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001653}
1654
1655/* Special case for deleting a single character */
1656/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1657Py_LOCAL(PyBytesObject *)
1658replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001659 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001660{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001661 char *self_s, *result_s;
1662 char *start, *next, *end;
1663 Py_ssize_t self_len, result_len;
1664 Py_ssize_t count;
1665 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001666
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001667 self_len = PyBytes_GET_SIZE(self);
1668 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001669
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001670 count = countchar(self_s, self_len, from_c, maxcount);
1671 if (count == 0) {
1672 return return_self(self);
1673 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001674
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001675 result_len = self_len - count; /* from_len == 1 */
1676 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001677
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001678 if ( (result = (PyBytesObject *)
1679 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1680 return NULL;
1681 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001682
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001683 start = self_s;
1684 end = self_s + self_len;
1685 while (count-- > 0) {
1686 next = findchar(start, end-start, from_c);
1687 if (next == NULL)
1688 break;
1689 Py_MEMCPY(result_s, start, next-start);
1690 result_s += (next-start);
1691 start = next+1;
1692 }
1693 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001694
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001695 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001696}
1697
1698/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1699
1700Py_LOCAL(PyBytesObject *)
1701replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001702 const char *from_s, Py_ssize_t from_len,
1703 Py_ssize_t maxcount) {
1704 char *self_s, *result_s;
1705 char *start, *next, *end;
1706 Py_ssize_t self_len, result_len;
1707 Py_ssize_t count, offset;
1708 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001709
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001710 self_len = PyBytes_GET_SIZE(self);
1711 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001712
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001713 count = stringlib_count(self_s, self_len,
1714 from_s, from_len,
1715 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001716
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001717 if (count == 0) {
1718 /* no matches */
1719 return return_self(self);
1720 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001721
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001722 result_len = self_len - (count * from_len);
1723 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001724
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001725 if ( (result = (PyBytesObject *)
1726 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1727 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001728
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001729 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001730
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001731 start = self_s;
1732 end = self_s + self_len;
1733 while (count-- > 0) {
1734 offset = stringlib_find(start, end-start,
1735 from_s, from_len,
1736 0);
1737 if (offset == -1)
1738 break;
1739 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001740
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001741 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001742
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001743 result_s += (next-start);
1744 start = next+from_len;
1745 }
1746 Py_MEMCPY(result_s, start, end-start);
1747 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001748}
1749
1750/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1751Py_LOCAL(PyBytesObject *)
1752replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001753 char from_c, char to_c,
1754 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001755{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001756 char *self_s, *result_s, *start, *end, *next;
1757 Py_ssize_t self_len;
1758 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001759
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001760 /* The result string will be the same size */
1761 self_s = PyBytes_AS_STRING(self);
1762 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001763
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001764 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001765
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001766 if (next == NULL) {
1767 /* No matches; return the original string */
1768 return return_self(self);
1769 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001770
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001771 /* Need to make a new string */
1772 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1773 if (result == NULL)
1774 return NULL;
1775 result_s = PyBytes_AS_STRING(result);
1776 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001777
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001778 /* change everything in-place, starting with this one */
1779 start = result_s + (next-self_s);
1780 *start = to_c;
1781 start++;
1782 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001783
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001784 while (--maxcount > 0) {
1785 next = findchar(start, end-start, from_c);
1786 if (next == NULL)
1787 break;
1788 *next = to_c;
1789 start = next+1;
1790 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001791
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001792 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001793}
1794
1795/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1796Py_LOCAL(PyBytesObject *)
1797replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001798 const char *from_s, Py_ssize_t from_len,
1799 const char *to_s, Py_ssize_t to_len,
1800 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001801{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001802 char *result_s, *start, *end;
1803 char *self_s;
1804 Py_ssize_t self_len, offset;
1805 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001806
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001807 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001808
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001809 self_s = PyBytes_AS_STRING(self);
1810 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001811
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001812 offset = stringlib_find(self_s, self_len,
1813 from_s, from_len,
1814 0);
1815 if (offset == -1) {
1816 /* No matches; return the original string */
1817 return return_self(self);
1818 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001819
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001820 /* Need to make a new string */
1821 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1822 if (result == NULL)
1823 return NULL;
1824 result_s = PyBytes_AS_STRING(result);
1825 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001826
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001827 /* change everything in-place, starting with this one */
1828 start = result_s + offset;
1829 Py_MEMCPY(start, to_s, from_len);
1830 start += from_len;
1831 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001832
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001833 while ( --maxcount > 0) {
1834 offset = stringlib_find(start, end-start,
1835 from_s, from_len,
1836 0);
1837 if (offset==-1)
1838 break;
1839 Py_MEMCPY(start+offset, to_s, from_len);
1840 start += offset+from_len;
1841 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001842
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001843 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001844}
1845
1846/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1847Py_LOCAL(PyBytesObject *)
1848replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001849 char from_c,
1850 const char *to_s, Py_ssize_t to_len,
1851 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001852{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001853 char *self_s, *result_s;
1854 char *start, *next, *end;
1855 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001856 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001857 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001858
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001859 self_s = PyBytes_AS_STRING(self);
1860 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001861
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001862 count = countchar(self_s, self_len, from_c, maxcount);
1863 if (count == 0) {
1864 /* no matches, return unchanged */
1865 return return_self(self);
1866 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001867
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001868 /* use the difference between current and new, hence the "-1" */
1869 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001870 assert(count > 0);
1871 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001872 PyErr_SetString(PyExc_OverflowError,
1873 "replacement bytes are too long");
1874 return NULL;
1875 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001876 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001877
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001878 if ( (result = (PyBytesObject *)
1879 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1880 return NULL;
1881 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001882
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001883 start = self_s;
1884 end = self_s + self_len;
1885 while (count-- > 0) {
1886 next = findchar(start, end-start, from_c);
1887 if (next == NULL)
1888 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001889
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001890 if (next == start) {
1891 /* replace with the 'to' */
1892 Py_MEMCPY(result_s, to_s, to_len);
1893 result_s += to_len;
1894 start += 1;
1895 } else {
1896 /* copy the unchanged old then the 'to' */
1897 Py_MEMCPY(result_s, start, next-start);
1898 result_s += (next-start);
1899 Py_MEMCPY(result_s, to_s, to_len);
1900 result_s += to_len;
1901 start = next+1;
1902 }
1903 }
1904 /* Copy the remainder of the remaining string */
1905 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001906
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001907 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001908}
1909
1910/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1911Py_LOCAL(PyBytesObject *)
1912replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001913 const char *from_s, Py_ssize_t from_len,
1914 const char *to_s, Py_ssize_t to_len,
1915 Py_ssize_t maxcount) {
1916 char *self_s, *result_s;
1917 char *start, *next, *end;
1918 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001919 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001920 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001921
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001922 self_s = PyBytes_AS_STRING(self);
1923 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001924
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001925 count = stringlib_count(self_s, self_len,
1926 from_s, from_len,
1927 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001928
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001929 if (count == 0) {
1930 /* no matches, return unchanged */
1931 return return_self(self);
1932 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001933
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001934 /* Check for overflow */
1935 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001936 assert(count > 0);
1937 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001938 PyErr_SetString(PyExc_OverflowError,
1939 "replacement bytes are too long");
1940 return NULL;
1941 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001942 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001943
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001944 if ( (result = (PyBytesObject *)
1945 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1946 return NULL;
1947 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001948
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001949 start = self_s;
1950 end = self_s + self_len;
1951 while (count-- > 0) {
1952 offset = stringlib_find(start, end-start,
1953 from_s, from_len,
1954 0);
1955 if (offset == -1)
1956 break;
1957 next = start+offset;
1958 if (next == start) {
1959 /* replace with the 'to' */
1960 Py_MEMCPY(result_s, to_s, to_len);
1961 result_s += to_len;
1962 start += from_len;
1963 } else {
1964 /* copy the unchanged old then the 'to' */
1965 Py_MEMCPY(result_s, start, next-start);
1966 result_s += (next-start);
1967 Py_MEMCPY(result_s, to_s, to_len);
1968 result_s += to_len;
1969 start = next+from_len;
1970 }
1971 }
1972 /* Copy the remainder of the remaining string */
1973 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001974
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001975 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001976}
1977
1978
1979Py_LOCAL(PyBytesObject *)
1980replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001981 const char *from_s, Py_ssize_t from_len,
1982 const char *to_s, Py_ssize_t to_len,
1983 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001984{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001985 if (maxcount < 0) {
1986 maxcount = PY_SSIZE_T_MAX;
1987 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
1988 /* nothing to do; return the original string */
1989 return return_self(self);
1990 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001991
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001992 if (maxcount == 0 ||
1993 (from_len == 0 && to_len == 0)) {
1994 /* nothing to do; return the original string */
1995 return return_self(self);
1996 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001997
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001998 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001999
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002000 if (from_len == 0) {
2001 /* insert the 'to' string everywhere. */
2002 /* >>> "Python".replace("", ".") */
2003 /* '.P.y.t.h.o.n.' */
2004 return replace_interleave(self, to_s, to_len, maxcount);
2005 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002006
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002007 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2008 /* point for an empty self string to generate a non-empty string */
2009 /* Special case so the remaining code always gets a non-empty string */
2010 if (PyBytes_GET_SIZE(self) == 0) {
2011 return return_self(self);
2012 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002013
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002014 if (to_len == 0) {
2015 /* delete all occurrences of 'from' string */
2016 if (from_len == 1) {
2017 return replace_delete_single_character(
2018 self, from_s[0], maxcount);
2019 } else {
2020 return replace_delete_substring(self, from_s,
2021 from_len, maxcount);
2022 }
2023 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002024
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002025 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002026
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002027 if (from_len == to_len) {
2028 if (from_len == 1) {
2029 return replace_single_character_in_place(
2030 self,
2031 from_s[0],
2032 to_s[0],
2033 maxcount);
2034 } else {
2035 return replace_substring_in_place(
2036 self, from_s, from_len, to_s, to_len,
2037 maxcount);
2038 }
2039 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002040
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002041 /* Otherwise use the more generic algorithms */
2042 if (from_len == 1) {
2043 return replace_single_character(self, from_s[0],
2044 to_s, to_len, maxcount);
2045 } else {
2046 /* len('from')>=2, len('to')>=1 */
2047 return replace_substring(self, from_s, from_len, to_s, to_len,
2048 maxcount);
2049 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002050}
2051
2052PyDoc_STRVAR(replace__doc__,
2053"B.replace(old, new[, count]) -> bytes\n\
2054\n\
2055Return a copy of B with all occurrences of subsection\n\
2056old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002057given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002058
2059static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002060bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002061{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002062 Py_ssize_t count = -1;
2063 PyObject *from, *to;
2064 const char *from_s, *to_s;
2065 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002066
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002067 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2068 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002069
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002070 if (PyBytes_Check(from)) {
2071 from_s = PyBytes_AS_STRING(from);
2072 from_len = PyBytes_GET_SIZE(from);
2073 }
2074 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2075 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002077 if (PyBytes_Check(to)) {
2078 to_s = PyBytes_AS_STRING(to);
2079 to_len = PyBytes_GET_SIZE(to);
2080 }
2081 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2082 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002083
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002084 return (PyObject *)replace((PyBytesObject *) self,
2085 from_s, from_len,
2086 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002087}
2088
2089/** End DALKE **/
2090
2091/* Matches the end (direction >= 0) or start (direction < 0) of self
2092 * against substr, using the start and end arguments. Returns
2093 * -1 on error, 0 if not found and 1 if found.
2094 */
2095Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002096_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002097 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002098{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002099 Py_ssize_t len = PyBytes_GET_SIZE(self);
2100 Py_ssize_t slen;
2101 const char* sub;
2102 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002103
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002104 if (PyBytes_Check(substr)) {
2105 sub = PyBytes_AS_STRING(substr);
2106 slen = PyBytes_GET_SIZE(substr);
2107 }
2108 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2109 return -1;
2110 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002111
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002112 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002113
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002114 if (direction < 0) {
2115 /* startswith */
2116 if (start+slen > len)
2117 return 0;
2118 } else {
2119 /* endswith */
2120 if (end-start < slen || start > len)
2121 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002122
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002123 if (end-slen > start)
2124 start = end - slen;
2125 }
2126 if (end-start >= slen)
2127 return ! memcmp(str+start, sub, slen);
2128 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002129}
2130
2131
2132PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002133"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002134\n\
2135Return True if B starts with the specified prefix, False otherwise.\n\
2136With optional start, test B beginning at that position.\n\
2137With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002138prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002139
2140static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002141bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002142{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002143 Py_ssize_t start = 0;
2144 Py_ssize_t end = PY_SSIZE_T_MAX;
2145 PyObject *subobj;
2146 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002147
Jesus Ceaac451502011-04-20 17:09:23 +02002148 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002149 return NULL;
2150 if (PyTuple_Check(subobj)) {
2151 Py_ssize_t i;
2152 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2153 result = _bytes_tailmatch(self,
2154 PyTuple_GET_ITEM(subobj, i),
2155 start, end, -1);
2156 if (result == -1)
2157 return NULL;
2158 else if (result) {
2159 Py_RETURN_TRUE;
2160 }
2161 }
2162 Py_RETURN_FALSE;
2163 }
2164 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002165 if (result == -1) {
2166 if (PyErr_ExceptionMatches(PyExc_TypeError))
2167 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2168 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002169 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002170 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002171 else
2172 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002173}
2174
2175
2176PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002177"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002178\n\
2179Return True if B ends with the specified suffix, False otherwise.\n\
2180With optional start, test B beginning at that position.\n\
2181With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002182suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002183
2184static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002185bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002186{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002187 Py_ssize_t start = 0;
2188 Py_ssize_t end = PY_SSIZE_T_MAX;
2189 PyObject *subobj;
2190 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002191
Jesus Ceaac451502011-04-20 17:09:23 +02002192 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002193 return NULL;
2194 if (PyTuple_Check(subobj)) {
2195 Py_ssize_t i;
2196 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2197 result = _bytes_tailmatch(self,
2198 PyTuple_GET_ITEM(subobj, i),
2199 start, end, +1);
2200 if (result == -1)
2201 return NULL;
2202 else if (result) {
2203 Py_RETURN_TRUE;
2204 }
2205 }
2206 Py_RETURN_FALSE;
2207 }
2208 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002209 if (result == -1) {
2210 if (PyErr_ExceptionMatches(PyExc_TypeError))
2211 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2212 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002213 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002214 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002215 else
2216 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002217}
2218
2219
2220PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002221"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002222\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002223Decode B using the codec registered for encoding. Default encoding\n\
2224is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002225handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2226a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002227as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002228able to handle UnicodeDecodeErrors.");
2229
2230static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002231bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002232{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002233 const char *encoding = NULL;
2234 const char *errors = NULL;
2235 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002236
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002237 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2238 return NULL;
2239 if (encoding == NULL)
2240 encoding = PyUnicode_GetDefaultEncoding();
2241 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002242}
2243
Guido van Rossum20188312006-05-05 15:15:40 +00002244
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002245PyDoc_STRVAR(splitlines__doc__,
2246"B.splitlines([keepends]) -> list of lines\n\
2247\n\
2248Return a list of the lines in B, breaking at line boundaries.\n\
2249Line breaks are not included in the resulting list unless keepends\n\
2250is given and true.");
2251
2252static PyObject*
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002253bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002254{
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002255 static char *kwlist[] = {"keepends", 0};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002256 int keepends = 0;
2257
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002258 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines",
2259 kwlist, &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002260 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002261
2262 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002263 (PyObject*) self, PyBytes_AS_STRING(self),
2264 PyBytes_GET_SIZE(self), keepends
2265 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002266}
2267
2268
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002269PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002270"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002271\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002272Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002273Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002274Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002275
2276static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002277hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002278{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002279 if (c >= 128)
2280 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002281 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002282 return c - '0';
2283 else {
David Malcolm96960882010-11-05 17:23:41 +00002284 if (Py_ISUPPER(c))
2285 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002286 if (c >= 'a' && c <= 'f')
2287 return c - 'a' + 10;
2288 }
2289 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002290}
2291
2292static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002293bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002294{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002295 PyObject *newstring, *hexobj;
2296 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002297 Py_ssize_t hexlen, byteslen, i, j;
2298 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002299 void *data;
2300 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002301
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002302 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2303 return NULL;
2304 assert(PyUnicode_Check(hexobj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002305 if (PyUnicode_READY(hexobj))
2306 return NULL;
2307 kind = PyUnicode_KIND(hexobj);
2308 data = PyUnicode_DATA(hexobj);
2309 hexlen = PyUnicode_GET_LENGTH(hexobj);
2310
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002311 byteslen = hexlen/2; /* This overestimates if there are spaces */
2312 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2313 if (!newstring)
2314 return NULL;
2315 buf = PyBytes_AS_STRING(newstring);
2316 for (i = j = 0; i < hexlen; i += 2) {
2317 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002318 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002319 i++;
2320 if (i >= hexlen)
2321 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002322 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
2323 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002324 if (top == -1 || bot == -1) {
2325 PyErr_Format(PyExc_ValueError,
2326 "non-hexadecimal number found in "
2327 "fromhex() arg at position %zd", i);
2328 goto error;
2329 }
2330 buf[j++] = (top << 4) + bot;
2331 }
2332 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2333 goto error;
2334 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002335
2336 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002337 Py_XDECREF(newstring);
2338 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002339}
2340
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002341PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002342"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002343
2344static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002345bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002346{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002347 Py_ssize_t res;
2348 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2349 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002350}
2351
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002352
2353static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002354bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002355{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002356 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002357}
2358
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002359
2360static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002361bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002362 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2363 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2364 _Py_capitalize__doc__},
2365 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2366 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2367 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2368 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2369 endswith__doc__},
2370 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2371 expandtabs__doc__},
2372 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2373 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2374 fromhex_doc},
2375 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2376 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2377 _Py_isalnum__doc__},
2378 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2379 _Py_isalpha__doc__},
2380 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2381 _Py_isdigit__doc__},
2382 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2383 _Py_islower__doc__},
2384 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2385 _Py_isspace__doc__},
2386 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2387 _Py_istitle__doc__},
2388 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2389 _Py_isupper__doc__},
2390 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2391 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2392 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2393 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2394 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2395 _Py_maketrans__doc__},
2396 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2397 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2398 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2399 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2400 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2401 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2402 rpartition__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002403 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002404 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002405 {"split", (PyCFunction)bytes_split, METH_VARARGS | METH_KEYWORDS, split__doc__},
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002406 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002407 splitlines__doc__},
2408 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2409 startswith__doc__},
2410 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2411 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2412 _Py_swapcase__doc__},
2413 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2414 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2415 translate__doc__},
2416 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2417 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2418 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2419 sizeof__doc__},
2420 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002421};
2422
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002423static PyObject *
2424str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2425
2426static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002427bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002428{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002429 PyObject *x = NULL;
2430 const char *encoding = NULL;
2431 const char *errors = NULL;
2432 PyObject *new = NULL;
2433 Py_ssize_t size;
2434 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002435
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002436 if (type != &PyBytes_Type)
2437 return str_subtype_new(type, args, kwds);
2438 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2439 &encoding, &errors))
2440 return NULL;
2441 if (x == NULL) {
2442 if (encoding != NULL || errors != NULL) {
2443 PyErr_SetString(PyExc_TypeError,
2444 "encoding or errors without sequence "
2445 "argument");
2446 return NULL;
2447 }
2448 return PyBytes_FromString("");
2449 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002450
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002451 if (PyUnicode_Check(x)) {
2452 /* Encode via the codec registry */
2453 if (encoding == NULL) {
2454 PyErr_SetString(PyExc_TypeError,
2455 "string argument without an encoding");
2456 return NULL;
2457 }
2458 new = PyUnicode_AsEncodedString(x, encoding, errors);
2459 if (new == NULL)
2460 return NULL;
2461 assert(PyBytes_Check(new));
2462 return new;
2463 }
2464 /* Is it an integer? */
2465 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2466 if (size == -1 && PyErr_Occurred()) {
2467 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2468 return NULL;
2469 PyErr_Clear();
2470 }
2471 else if (size < 0) {
2472 PyErr_SetString(PyExc_ValueError, "negative count");
2473 return NULL;
2474 }
2475 else {
2476 new = PyBytes_FromStringAndSize(NULL, size);
2477 if (new == NULL) {
2478 return NULL;
2479 }
2480 if (size > 0) {
2481 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2482 }
2483 return new;
2484 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002485
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002486 /* If it's not unicode, there can't be encoding or errors */
2487 if (encoding != NULL || errors != NULL) {
2488 PyErr_SetString(PyExc_TypeError,
2489 "encoding or errors without a string argument");
2490 return NULL;
2491 }
2492 return PyObject_Bytes(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002493}
2494
2495PyObject *
2496PyBytes_FromObject(PyObject *x)
2497{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002498 PyObject *new, *it;
2499 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002500
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002501 if (x == NULL) {
2502 PyErr_BadInternalCall();
2503 return NULL;
2504 }
Larry Hastingsca28e992012-05-24 22:58:30 -07002505
2506 if (PyBytes_CheckExact(x)) {
2507 Py_INCREF(x);
2508 return x;
2509 }
2510
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002511 /* Use the modern buffer interface */
2512 if (PyObject_CheckBuffer(x)) {
2513 Py_buffer view;
2514 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2515 return NULL;
2516 new = PyBytes_FromStringAndSize(NULL, view.len);
2517 if (!new)
2518 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002519 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2520 &view, view.len, 'C') < 0)
2521 goto fail;
2522 PyBuffer_Release(&view);
2523 return new;
2524 fail:
2525 Py_XDECREF(new);
2526 PyBuffer_Release(&view);
2527 return NULL;
2528 }
2529 if (PyUnicode_Check(x)) {
2530 PyErr_SetString(PyExc_TypeError,
2531 "cannot convert unicode object to bytes");
2532 return NULL;
2533 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002534
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002535 if (PyList_CheckExact(x)) {
2536 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2537 if (new == NULL)
2538 return NULL;
2539 for (i = 0; i < Py_SIZE(x); i++) {
2540 Py_ssize_t value = PyNumber_AsSsize_t(
2541 PyList_GET_ITEM(x, i), PyExc_ValueError);
2542 if (value == -1 && PyErr_Occurred()) {
2543 Py_DECREF(new);
2544 return NULL;
2545 }
2546 if (value < 0 || value >= 256) {
2547 PyErr_SetString(PyExc_ValueError,
2548 "bytes must be in range(0, 256)");
2549 Py_DECREF(new);
2550 return NULL;
2551 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002552 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002553 }
2554 return new;
2555 }
2556 if (PyTuple_CheckExact(x)) {
2557 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2558 if (new == NULL)
2559 return NULL;
2560 for (i = 0; i < Py_SIZE(x); i++) {
2561 Py_ssize_t value = PyNumber_AsSsize_t(
2562 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2563 if (value == -1 && PyErr_Occurred()) {
2564 Py_DECREF(new);
2565 return NULL;
2566 }
2567 if (value < 0 || value >= 256) {
2568 PyErr_SetString(PyExc_ValueError,
2569 "bytes must be in range(0, 256)");
2570 Py_DECREF(new);
2571 return NULL;
2572 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002573 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002574 }
2575 return new;
2576 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002577
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002578 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002579 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002580 if (size == -1 && PyErr_Occurred())
2581 return NULL;
2582 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2583 returning a shared empty bytes string. This required because we
2584 want to call _PyBytes_Resize() the returned object, which we can
2585 only do on bytes objects with refcount == 1. */
2586 size += 1;
2587 new = PyBytes_FromStringAndSize(NULL, size);
2588 if (new == NULL)
2589 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002590
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002591 /* Get the iterator */
2592 it = PyObject_GetIter(x);
2593 if (it == NULL)
2594 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002595
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002596 /* Run the iterator to exhaustion */
2597 for (i = 0; ; i++) {
2598 PyObject *item;
2599 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002600
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002601 /* Get the next item */
2602 item = PyIter_Next(it);
2603 if (item == NULL) {
2604 if (PyErr_Occurred())
2605 goto error;
2606 break;
2607 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002608
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002609 /* Interpret it as an int (__index__) */
2610 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2611 Py_DECREF(item);
2612 if (value == -1 && PyErr_Occurred())
2613 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002614
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002615 /* Range check */
2616 if (value < 0 || value >= 256) {
2617 PyErr_SetString(PyExc_ValueError,
2618 "bytes must be in range(0, 256)");
2619 goto error;
2620 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002621
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002622 /* Append the byte */
2623 if (i >= size) {
2624 size = 2 * size + 1;
2625 if (_PyBytes_Resize(&new, size) < 0)
2626 goto error;
2627 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002628 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002629 }
2630 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002631
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002632 /* Clean up and return success */
2633 Py_DECREF(it);
2634 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002635
2636 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002637 /* Error handling when new != NULL */
2638 Py_XDECREF(it);
2639 Py_DECREF(new);
2640 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002641}
2642
2643static PyObject *
2644str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2645{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002646 PyObject *tmp, *pnew;
2647 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002648
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002649 assert(PyType_IsSubtype(type, &PyBytes_Type));
2650 tmp = bytes_new(&PyBytes_Type, args, kwds);
2651 if (tmp == NULL)
2652 return NULL;
2653 assert(PyBytes_CheckExact(tmp));
2654 n = PyBytes_GET_SIZE(tmp);
2655 pnew = type->tp_alloc(type, n);
2656 if (pnew != NULL) {
2657 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2658 PyBytes_AS_STRING(tmp), n+1);
2659 ((PyBytesObject *)pnew)->ob_shash =
2660 ((PyBytesObject *)tmp)->ob_shash;
2661 }
2662 Py_DECREF(tmp);
2663 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002664}
2665
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002666PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002667"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002668bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002669bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002670bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2671bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002672\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002673Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002674 - an iterable yielding integers in range(256)\n\
2675 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002676 - any object implementing the buffer API.\n\
2677 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002678
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002679static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002680
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002681PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002682 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2683 "bytes",
2684 PyBytesObject_SIZE,
2685 sizeof(char),
2686 bytes_dealloc, /* tp_dealloc */
2687 0, /* tp_print */
2688 0, /* tp_getattr */
2689 0, /* tp_setattr */
2690 0, /* tp_reserved */
2691 (reprfunc)bytes_repr, /* tp_repr */
2692 0, /* tp_as_number */
2693 &bytes_as_sequence, /* tp_as_sequence */
2694 &bytes_as_mapping, /* tp_as_mapping */
2695 (hashfunc)bytes_hash, /* tp_hash */
2696 0, /* tp_call */
2697 bytes_str, /* tp_str */
2698 PyObject_GenericGetAttr, /* tp_getattro */
2699 0, /* tp_setattro */
2700 &bytes_as_buffer, /* tp_as_buffer */
2701 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2702 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2703 bytes_doc, /* tp_doc */
2704 0, /* tp_traverse */
2705 0, /* tp_clear */
2706 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2707 0, /* tp_weaklistoffset */
2708 bytes_iter, /* tp_iter */
2709 0, /* tp_iternext */
2710 bytes_methods, /* tp_methods */
2711 0, /* tp_members */
2712 0, /* tp_getset */
2713 &PyBaseObject_Type, /* tp_base */
2714 0, /* tp_dict */
2715 0, /* tp_descr_get */
2716 0, /* tp_descr_set */
2717 0, /* tp_dictoffset */
2718 0, /* tp_init */
2719 0, /* tp_alloc */
2720 bytes_new, /* tp_new */
2721 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002722};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002723
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002724void
2725PyBytes_Concat(register PyObject **pv, register PyObject *w)
2726{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002727 register PyObject *v;
2728 assert(pv != NULL);
2729 if (*pv == NULL)
2730 return;
2731 if (w == NULL) {
2732 Py_DECREF(*pv);
2733 *pv = NULL;
2734 return;
2735 }
2736 v = bytes_concat(*pv, w);
2737 Py_DECREF(*pv);
2738 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002739}
2740
2741void
2742PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
2743{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002744 PyBytes_Concat(pv, w);
2745 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002746}
2747
2748
2749/* The following function breaks the notion that strings are immutable:
2750 it changes the size of a string. We get away with this only if there
2751 is only one module referencing the object. You can also think of it
2752 as creating a new string object and destroying the old one, only
2753 more efficiently. In any case, don't use this if the string may
2754 already be known to some other part of the code...
2755 Note that if there's not enough memory to resize the string, the original
2756 string object at *pv is deallocated, *pv is set to NULL, an "out of
2757 memory" exception is set, and -1 is returned. Else (on success) 0 is
2758 returned, and the value in *pv may or may not be the same as on input.
2759 As always, an extra byte is allocated for a trailing \0 byte (newsize
2760 does *not* include that), and a trailing \0 byte is stored.
2761*/
2762
2763int
2764_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2765{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002766 register PyObject *v;
2767 register PyBytesObject *sv;
2768 v = *pv;
2769 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2770 *pv = 0;
2771 Py_DECREF(v);
2772 PyErr_BadInternalCall();
2773 return -1;
2774 }
2775 /* XXX UNREF/NEWREF interface should be more symmetrical */
2776 _Py_DEC_REFTOTAL;
2777 _Py_ForgetReference(v);
2778 *pv = (PyObject *)
2779 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2780 if (*pv == NULL) {
2781 PyObject_Del(v);
2782 PyErr_NoMemory();
2783 return -1;
2784 }
2785 _Py_NewReference(*pv);
2786 sv = (PyBytesObject *) *pv;
2787 Py_SIZE(sv) = newsize;
2788 sv->ob_sval[newsize] = '\0';
2789 sv->ob_shash = -1; /* invalidate cached hash value */
2790 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002791}
2792
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002793void
2794PyBytes_Fini(void)
2795{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002796 int i;
2797 for (i = 0; i < UCHAR_MAX + 1; i++) {
2798 Py_XDECREF(characters[i]);
2799 characters[i] = NULL;
2800 }
2801 Py_XDECREF(nullstring);
2802 nullstring = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002803}
2804
Benjamin Peterson4116f362008-05-27 00:36:20 +00002805/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002806
2807typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002808 PyObject_HEAD
2809 Py_ssize_t it_index;
2810 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002811} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002812
2813static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002814striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002815{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002816 _PyObject_GC_UNTRACK(it);
2817 Py_XDECREF(it->it_seq);
2818 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002819}
2820
2821static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002822striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002823{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002824 Py_VISIT(it->it_seq);
2825 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002826}
2827
2828static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002829striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002830{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002831 PyBytesObject *seq;
2832 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002833
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002834 assert(it != NULL);
2835 seq = it->it_seq;
2836 if (seq == NULL)
2837 return NULL;
2838 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002839
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002840 if (it->it_index < PyBytes_GET_SIZE(seq)) {
2841 item = PyLong_FromLong(
2842 (unsigned char)seq->ob_sval[it->it_index]);
2843 if (item != NULL)
2844 ++it->it_index;
2845 return item;
2846 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002847
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002848 Py_DECREF(seq);
2849 it->it_seq = NULL;
2850 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002851}
2852
2853static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002854striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002855{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002856 Py_ssize_t len = 0;
2857 if (it->it_seq)
2858 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
2859 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002860}
2861
2862PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002863 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002864
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002865static PyObject *
2866striter_reduce(striterobject *it)
2867{
2868 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02002869 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002870 it->it_seq, it->it_index);
2871 } else {
2872 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
2873 if (u == NULL)
2874 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02002875 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002876 }
2877}
2878
2879PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2880
2881static PyObject *
2882striter_setstate(striterobject *it, PyObject *state)
2883{
2884 Py_ssize_t index = PyLong_AsSsize_t(state);
2885 if (index == -1 && PyErr_Occurred())
2886 return NULL;
2887 if (index < 0)
2888 index = 0;
2889 it->it_index = index;
2890 Py_RETURN_NONE;
2891}
2892
2893PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
2894
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002895static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002896 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
2897 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002898 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
2899 reduce_doc},
2900 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
2901 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002902 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002903};
2904
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002905PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002906 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2907 "bytes_iterator", /* tp_name */
2908 sizeof(striterobject), /* tp_basicsize */
2909 0, /* tp_itemsize */
2910 /* methods */
2911 (destructor)striter_dealloc, /* tp_dealloc */
2912 0, /* tp_print */
2913 0, /* tp_getattr */
2914 0, /* tp_setattr */
2915 0, /* tp_reserved */
2916 0, /* tp_repr */
2917 0, /* tp_as_number */
2918 0, /* tp_as_sequence */
2919 0, /* tp_as_mapping */
2920 0, /* tp_hash */
2921 0, /* tp_call */
2922 0, /* tp_str */
2923 PyObject_GenericGetAttr, /* tp_getattro */
2924 0, /* tp_setattro */
2925 0, /* tp_as_buffer */
2926 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
2927 0, /* tp_doc */
2928 (traverseproc)striter_traverse, /* tp_traverse */
2929 0, /* tp_clear */
2930 0, /* tp_richcompare */
2931 0, /* tp_weaklistoffset */
2932 PyObject_SelfIter, /* tp_iter */
2933 (iternextfunc)striter_next, /* tp_iternext */
2934 striter_methods, /* tp_methods */
2935 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002936};
2937
2938static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002939bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002940{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002941 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002942
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002943 if (!PyBytes_Check(seq)) {
2944 PyErr_BadInternalCall();
2945 return NULL;
2946 }
2947 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
2948 if (it == NULL)
2949 return NULL;
2950 it->it_index = 0;
2951 Py_INCREF(seq);
2952 it->it_seq = (PyBytesObject *)seq;
2953 _PyObject_GC_TRACK(it);
2954 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002955}