blob: 613269045bb9525af16fe8ba68bfc1505032bac1 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Antoine Pitroucfc22b42012-10-16 21:07:23 +020013 PyBufferProcs *bufferprocs;
14 if (PyBytes_CheckExact(obj)) {
15 /* Fast path, e.g. for .join() of many bytes objects */
16 Py_INCREF(obj);
17 view->obj = obj;
18 view->buf = PyBytes_AS_STRING(obj);
19 view->len = PyBytes_GET_SIZE(obj);
20 return view->len;
21 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Antoine Pitroucfc22b42012-10-16 21:07:23 +020023 bufferprocs = Py_TYPE(obj)->tp_as_buffer;
24 if (bufferprocs == NULL || bufferprocs->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000025 {
Antoine Pitroud1188562010-06-09 16:38:55 +000026 PyErr_Format(PyExc_TypeError,
27 "Type %.100s doesn't support the buffer API",
28 Py_TYPE(obj)->tp_name);
29 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000030 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000031
Antoine Pitroucfc22b42012-10-16 21:07:23 +020032 if (bufferprocs->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000033 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000034 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000035}
36
Christian Heimes2c9c7a52008-05-26 13:42:13 +000037#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000038Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000039#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000040
Christian Heimes2c9c7a52008-05-26 13:42:13 +000041static PyBytesObject *characters[UCHAR_MAX + 1];
42static PyBytesObject *nullstring;
43
Mark Dickinsonfd24b322008-12-06 15:33:31 +000044/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
45 for a string of length n should request PyBytesObject_SIZE + n bytes.
46
47 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
48 3 bytes per string allocation on a typical system.
49*/
50#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
51
Christian Heimes2c9c7a52008-05-26 13:42:13 +000052/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000053 For PyBytes_FromString(), the parameter `str' points to a null-terminated
54 string containing exactly `size' bytes.
55
56 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
57 either NULL or else points to a string containing at least `size' bytes.
58 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
59 not have to be null-terminated. (Therefore it is safe to construct a
60 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
61 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
62 bytes (setting the last byte to the null terminating character) and you can
63 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000064 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000065 alter the data yourself, since the strings may be shared.
66
67 The PyObject member `op->ob_size', which denotes the number of "extra
68 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020069 allocated for string data, not counting the null terminating character.
70 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000071 PyBytes_FromStringAndSize()) or the length of the string in the `str'
72 parameter (for PyBytes_FromString()).
73*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000074PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000076{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020077 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 if (size < 0) {
79 PyErr_SetString(PyExc_SystemError,
80 "Negative size passed to PyBytes_FromStringAndSize");
81 return NULL;
82 }
83 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000084#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000086#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 Py_INCREF(op);
88 return (PyObject *)op;
89 }
90 if (size == 1 && str != NULL &&
91 (op = characters[*str & UCHAR_MAX]) != NULL)
92 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000093#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000095#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000096 Py_INCREF(op);
97 return (PyObject *)op;
98 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000099
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000100 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
101 PyErr_SetString(PyExc_OverflowError,
102 "byte string is too large");
103 return NULL;
104 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +0000105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 /* Inline PyObject_NewVar */
107 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
108 if (op == NULL)
109 return PyErr_NoMemory();
110 PyObject_INIT_VAR(op, &PyBytes_Type, size);
111 op->ob_shash = -1;
112 if (str != NULL)
113 Py_MEMCPY(op->ob_sval, str, size);
114 op->ob_sval[size] = '\0';
115 /* share short strings */
116 if (size == 0) {
117 nullstring = op;
118 Py_INCREF(op);
119 } else if (size == 1 && str != NULL) {
120 characters[*str & UCHAR_MAX] = op;
121 Py_INCREF(op);
122 }
123 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000124}
125
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000126PyObject *
127PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000128{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200129 size_t size;
130 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000131
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 assert(str != NULL);
133 size = strlen(str);
134 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
135 PyErr_SetString(PyExc_OverflowError,
136 "byte string is too long");
137 return NULL;
138 }
139 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000140#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 Py_INCREF(op);
144 return (PyObject *)op;
145 }
146 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000147#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000148 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 Py_INCREF(op);
151 return (PyObject *)op;
152 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000153
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 /* Inline PyObject_NewVar */
155 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
156 if (op == NULL)
157 return PyErr_NoMemory();
158 PyObject_INIT_VAR(op, &PyBytes_Type, size);
159 op->ob_shash = -1;
160 Py_MEMCPY(op->ob_sval, str, size+1);
161 /* share short strings */
162 if (size == 0) {
163 nullstring = op;
164 Py_INCREF(op);
165 } else if (size == 1) {
166 characters[*str & UCHAR_MAX] = op;
167 Py_INCREF(op);
168 }
169 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000170}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000171
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000172PyObject *
173PyBytes_FromFormatV(const char *format, va_list vargs)
174{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 va_list count;
176 Py_ssize_t n = 0;
177 const char* f;
178 char *s;
179 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000180
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000181 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 /* step 1: figure out how large a buffer we need */
183 for (f = format; *f; f++) {
184 if (*f == '%') {
185 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000186 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000188
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
190 * they don't affect the amount of space we reserve.
191 */
192 if ((*f == 'l' || *f == 'z') &&
193 (f[1] == 'd' || f[1] == 'u'))
194 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000195
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000196 switch (*f) {
197 case 'c':
198 (void)va_arg(count, int);
199 /* fall through... */
200 case '%':
201 n++;
202 break;
203 case 'd': case 'u': case 'i': case 'x':
204 (void) va_arg(count, int);
205 /* 20 bytes is enough to hold a 64-bit
206 integer. Decimal takes the most space.
207 This isn't enough for octal. */
208 n += 20;
209 break;
210 case 's':
211 s = va_arg(count, char*);
212 n += strlen(s);
213 break;
214 case 'p':
215 (void) va_arg(count, int);
216 /* maximum 64-bit pointer representation:
217 * 0xffffffffffffffff
218 * so 19 characters is enough.
219 * XXX I count 18 -- what's the extra for?
220 */
221 n += 19;
222 break;
223 default:
224 /* if we stumble upon an unknown
225 formatting code, copy the rest of
226 the format string to the output
227 string. (we cannot just skip the
228 code, since there's no way to know
229 what's in the argument list) */
230 n += strlen(p);
231 goto expand;
232 }
233 } else
234 n++;
235 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000236 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 /* step 2: fill the buffer */
238 /* Since we've analyzed how much space we need for the worst case,
239 use sprintf directly instead of the slower PyOS_snprintf. */
240 string = PyBytes_FromStringAndSize(NULL, n);
241 if (!string)
242 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000243
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000244 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000245
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000246 for (f = format; *f; f++) {
247 if (*f == '%') {
248 const char* p = f++;
249 Py_ssize_t i;
250 int longflag = 0;
251 int size_tflag = 0;
252 /* parse the width.precision part (we're only
253 interested in the precision value, if any) */
254 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000255 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 n = (n*10) + *f++ - '0';
257 if (*f == '.') {
258 f++;
259 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000260 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000261 n = (n*10) + *f++ - '0';
262 }
David Malcolm96960882010-11-05 17:23:41 +0000263 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 f++;
265 /* handle the long flag, but only for %ld and %lu.
266 others can be added when necessary. */
267 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
268 longflag = 1;
269 ++f;
270 }
271 /* handle the size_t flag. */
272 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
273 size_tflag = 1;
274 ++f;
275 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000276
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000277 switch (*f) {
278 case 'c':
279 *s++ = va_arg(vargs, int);
280 break;
281 case 'd':
282 if (longflag)
283 sprintf(s, "%ld", va_arg(vargs, long));
284 else if (size_tflag)
285 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
286 va_arg(vargs, Py_ssize_t));
287 else
288 sprintf(s, "%d", va_arg(vargs, int));
289 s += strlen(s);
290 break;
291 case 'u':
292 if (longflag)
293 sprintf(s, "%lu",
294 va_arg(vargs, unsigned long));
295 else if (size_tflag)
296 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
297 va_arg(vargs, size_t));
298 else
299 sprintf(s, "%u",
300 va_arg(vargs, unsigned int));
301 s += strlen(s);
302 break;
303 case 'i':
304 sprintf(s, "%i", va_arg(vargs, int));
305 s += strlen(s);
306 break;
307 case 'x':
308 sprintf(s, "%x", va_arg(vargs, int));
309 s += strlen(s);
310 break;
311 case 's':
312 p = va_arg(vargs, char*);
313 i = strlen(p);
314 if (n > 0 && i > n)
315 i = n;
316 Py_MEMCPY(s, p, i);
317 s += i;
318 break;
319 case 'p':
320 sprintf(s, "%p", va_arg(vargs, void*));
321 /* %p is ill-defined: ensure leading 0x. */
322 if (s[1] == 'X')
323 s[1] = 'x';
324 else if (s[1] != 'x') {
325 memmove(s+2, s, strlen(s)+1);
326 s[0] = '0';
327 s[1] = 'x';
328 }
329 s += strlen(s);
330 break;
331 case '%':
332 *s++ = '%';
333 break;
334 default:
335 strcpy(s, p);
336 s += strlen(s);
337 goto end;
338 }
339 } else
340 *s++ = *f;
341 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000342
343 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
345 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000346}
347
348PyObject *
349PyBytes_FromFormat(const char *format, ...)
350{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000351 PyObject* ret;
352 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000353
354#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000355 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000356#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000357 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000358#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 ret = PyBytes_FromFormatV(format, vargs);
360 va_end(vargs);
361 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000362}
363
364static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000365bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000366{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000368}
369
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000370/* Unescape a backslash-escaped string. If unicode is non-zero,
371 the string is a u-literal. If recode_encoding is non-zero,
372 the string is UTF-8 encoded and should be re-encoded in the
373 specified encoding. */
374
375PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000376 Py_ssize_t len,
377 const char *errors,
378 Py_ssize_t unicode,
379 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000380{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 int c;
382 char *p, *buf;
383 const char *end;
384 PyObject *v;
385 Py_ssize_t newlen = recode_encoding ? 4*len:len;
386 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
387 if (v == NULL)
388 return NULL;
389 p = buf = PyBytes_AsString(v);
390 end = s + len;
391 while (s < end) {
392 if (*s != '\\') {
393 non_esc:
394 if (recode_encoding && (*s & 0x80)) {
395 PyObject *u, *w;
396 char *r;
397 const char* t;
398 Py_ssize_t rn;
399 t = s;
400 /* Decode non-ASCII bytes as UTF-8. */
401 while (t < end && (*t & 0x80)) t++;
402 u = PyUnicode_DecodeUTF8(s, t - s, errors);
403 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000404
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000405 /* Recode them in target encoding. */
406 w = PyUnicode_AsEncodedString(
407 u, recode_encoding, errors);
408 Py_DECREF(u);
409 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000410
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000411 /* Append bytes to output buffer. */
412 assert(PyBytes_Check(w));
413 r = PyBytes_AS_STRING(w);
414 rn = PyBytes_GET_SIZE(w);
415 Py_MEMCPY(p, r, rn);
416 p += rn;
417 Py_DECREF(w);
418 s = t;
419 } else {
420 *p++ = *s++;
421 }
422 continue;
423 }
424 s++;
425 if (s==end) {
426 PyErr_SetString(PyExc_ValueError,
427 "Trailing \\ in string");
428 goto failed;
429 }
430 switch (*s++) {
431 /* XXX This assumes ASCII! */
432 case '\n': break;
433 case '\\': *p++ = '\\'; break;
434 case '\'': *p++ = '\''; break;
435 case '\"': *p++ = '\"'; break;
436 case 'b': *p++ = '\b'; break;
437 case 'f': *p++ = '\014'; break; /* FF */
438 case 't': *p++ = '\t'; break;
439 case 'n': *p++ = '\n'; break;
440 case 'r': *p++ = '\r'; break;
441 case 'v': *p++ = '\013'; break; /* VT */
442 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
443 case '0': case '1': case '2': case '3':
444 case '4': case '5': case '6': case '7':
445 c = s[-1] - '0';
446 if (s < end && '0' <= *s && *s <= '7') {
447 c = (c<<3) + *s++ - '0';
448 if (s < end && '0' <= *s && *s <= '7')
449 c = (c<<3) + *s++ - '0';
450 }
451 *p++ = c;
452 break;
453 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000454 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000455 unsigned int x = 0;
456 c = Py_CHARMASK(*s);
457 s++;
David Malcolm96960882010-11-05 17:23:41 +0000458 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000460 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 x = 10 + c - 'a';
462 else
463 x = 10 + c - 'A';
464 x = x << 4;
465 c = Py_CHARMASK(*s);
466 s++;
David Malcolm96960882010-11-05 17:23:41 +0000467 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000468 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000469 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000470 x += 10 + c - 'a';
471 else
472 x += 10 + c - 'A';
473 *p++ = x;
474 break;
475 }
476 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200477 PyErr_Format(PyExc_ValueError,
478 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +0200479 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000480 goto failed;
481 }
482 if (strcmp(errors, "replace") == 0) {
483 *p++ = '?';
484 } else if (strcmp(errors, "ignore") == 0)
485 /* do nothing */;
486 else {
487 PyErr_Format(PyExc_ValueError,
488 "decoding error; unknown "
489 "error handling code: %.400s",
490 errors);
491 goto failed;
492 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +0200493 /* skip \x */
494 if (s < end && Py_ISXDIGIT(s[0]))
495 s++; /* and a hexdigit */
496 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000497 default:
498 *p++ = '\\';
499 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200500 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000501 UTF-8 bytes may follow. */
502 }
503 }
504 if (p-buf < newlen)
505 _PyBytes_Resize(&v, p - buf);
506 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000507 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000508 Py_DECREF(v);
509 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000510}
511
512/* -------------------------------------------------------------------- */
513/* object api */
514
515Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200516PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000517{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000518 if (!PyBytes_Check(op)) {
519 PyErr_Format(PyExc_TypeError,
520 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
521 return -1;
522 }
523 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000524}
525
526char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200527PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000528{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000529 if (!PyBytes_Check(op)) {
530 PyErr_Format(PyExc_TypeError,
531 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
532 return NULL;
533 }
534 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000535}
536
537int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200538PyBytes_AsStringAndSize(PyObject *obj,
539 char **s,
540 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000541{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000542 if (s == NULL) {
543 PyErr_BadInternalCall();
544 return -1;
545 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000546
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000547 if (!PyBytes_Check(obj)) {
548 PyErr_Format(PyExc_TypeError,
549 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
550 return -1;
551 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000552
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000553 *s = PyBytes_AS_STRING(obj);
554 if (len != NULL)
555 *len = PyBytes_GET_SIZE(obj);
556 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
557 PyErr_SetString(PyExc_TypeError,
558 "expected bytes with no null");
559 return -1;
560 }
561 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000562}
Neal Norwitz6968b052007-02-27 19:02:19 +0000563
564/* -------------------------------------------------------------------- */
565/* Methods */
566
Eric Smith0923d1d2009-04-16 20:16:10 +0000567#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000568
569#include "stringlib/fastsearch.h"
570#include "stringlib/count.h"
571#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +0200572#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000573#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000574#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000575#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000576
Eric Smith0f78bff2009-11-30 01:01:42 +0000577#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000578
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000579PyObject *
580PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000581{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200582 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200583 Py_ssize_t i, length = Py_SIZE(op);
584 size_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000585 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200586 unsigned char quote, *s, *p;
587
588 /* Compute size of output string */
589 squotes = dquotes = 0;
590 newsize = 3; /* b'' */
591 s = (unsigned char*)op->ob_sval;
592 for (i = 0; i < length; i++) {
593 switch(s[i]) {
594 case '\'': squotes++; newsize++; break;
595 case '"': dquotes++; newsize++; break;
596 case '\\': case '\t': case '\n': case '\r':
597 newsize += 2; break; /* \C */
598 default:
599 if (s[i] < ' ' || s[i] >= 0x7f)
600 newsize += 4; /* \xHH */
601 else
602 newsize++;
603 }
604 }
605 quote = '\'';
606 if (smartquotes && squotes && !dquotes)
607 quote = '"';
608 if (squotes && quote == '\'')
609 newsize += squotes;
Victor Stinner6430fd52011-09-29 04:02:13 +0200610
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200611 if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000612 PyErr_SetString(PyExc_OverflowError,
613 "bytes object is too large to make repr");
614 return NULL;
615 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200616
617 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000618 if (v == NULL) {
619 return NULL;
620 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200621 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000622
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200623 *p++ = 'b', *p++ = quote;
624 for (i = 0; i < length; i++) {
625 unsigned char c = op->ob_sval[i];
626 if (c == quote || c == '\\')
627 *p++ = '\\', *p++ = c;
628 else if (c == '\t')
629 *p++ = '\\', *p++ = 't';
630 else if (c == '\n')
631 *p++ = '\\', *p++ = 'n';
632 else if (c == '\r')
633 *p++ = '\\', *p++ = 'r';
634 else if (c < ' ' || c >= 0x7f) {
635 *p++ = '\\';
636 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200637 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
638 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000639 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200640 else
641 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000642 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200643 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +0200644 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200645 return v;
Neal Norwitz6968b052007-02-27 19:02:19 +0000646}
647
Neal Norwitz6968b052007-02-27 19:02:19 +0000648static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000649bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000650{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000651 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000652}
653
Neal Norwitz6968b052007-02-27 19:02:19 +0000654static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000655bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000656{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000657 if (Py_BytesWarningFlag) {
658 if (PyErr_WarnEx(PyExc_BytesWarning,
659 "str() on a bytes instance", 1))
660 return NULL;
661 }
662 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000663}
664
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000665static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000666bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000667{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000668 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000669}
Neal Norwitz6968b052007-02-27 19:02:19 +0000670
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000671/* This is also used by PyBytes_Concat() */
672static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000673bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000674{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000675 Py_ssize_t size;
676 Py_buffer va, vb;
677 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000678
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000679 va.len = -1;
680 vb.len = -1;
681 if (_getbuffer(a, &va) < 0 ||
682 _getbuffer(b, &vb) < 0) {
683 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
684 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
685 goto done;
686 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000687
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000688 /* Optimize end cases */
689 if (va.len == 0 && PyBytes_CheckExact(b)) {
690 result = b;
691 Py_INCREF(result);
692 goto done;
693 }
694 if (vb.len == 0 && PyBytes_CheckExact(a)) {
695 result = a;
696 Py_INCREF(result);
697 goto done;
698 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000699
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000700 size = va.len + vb.len;
701 if (size < 0) {
702 PyErr_NoMemory();
703 goto done;
704 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000705
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000706 result = PyBytes_FromStringAndSize(NULL, size);
707 if (result != NULL) {
708 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
709 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
710 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000711
712 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000713 if (va.len != -1)
714 PyBuffer_Release(&va);
715 if (vb.len != -1)
716 PyBuffer_Release(&vb);
717 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000718}
Neal Norwitz6968b052007-02-27 19:02:19 +0000719
720static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200721bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000722{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200723 Py_ssize_t i;
724 Py_ssize_t j;
725 Py_ssize_t size;
726 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000727 size_t nbytes;
728 if (n < 0)
729 n = 0;
730 /* watch out for overflows: the size can overflow int,
731 * and the # of bytes needed can overflow size_t
732 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000733 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000734 PyErr_SetString(PyExc_OverflowError,
735 "repeated bytes are too long");
736 return NULL;
737 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000738 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000739 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
740 Py_INCREF(a);
741 return (PyObject *)a;
742 }
743 nbytes = (size_t)size;
744 if (nbytes + PyBytesObject_SIZE <= nbytes) {
745 PyErr_SetString(PyExc_OverflowError,
746 "repeated bytes are too long");
747 return NULL;
748 }
749 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
750 if (op == NULL)
751 return PyErr_NoMemory();
752 PyObject_INIT_VAR(op, &PyBytes_Type, size);
753 op->ob_shash = -1;
754 op->ob_sval[size] = '\0';
755 if (Py_SIZE(a) == 1 && n > 0) {
756 memset(op->ob_sval, a->ob_sval[0] , n);
757 return (PyObject *) op;
758 }
759 i = 0;
760 if (i < size) {
761 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
762 i = Py_SIZE(a);
763 }
764 while (i < size) {
765 j = (i <= size-i) ? i : size-i;
766 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
767 i += j;
768 }
769 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000770}
771
Guido van Rossum98297ee2007-11-06 21:34:58 +0000772static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000773bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000774{
775 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
776 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000777 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000778 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000779 PyErr_Clear();
780 if (_getbuffer(arg, &varg) < 0)
781 return -1;
782 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
783 varg.buf, varg.len, 0);
784 PyBuffer_Release(&varg);
785 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000786 }
787 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000788 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
789 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000790 }
791
Antoine Pitrou0010d372010-08-15 17:12:55 +0000792 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000793}
794
Neal Norwitz6968b052007-02-27 19:02:19 +0000795static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200796bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000797{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000798 if (i < 0 || i >= Py_SIZE(a)) {
799 PyErr_SetString(PyExc_IndexError, "index out of range");
800 return NULL;
801 }
802 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000803}
804
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000805static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000806bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000807{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000808 int c;
809 Py_ssize_t len_a, len_b;
810 Py_ssize_t min_len;
811 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000812
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000813 /* Make sure both arguments are strings. */
814 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
815 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
816 (PyObject_IsInstance((PyObject*)a,
817 (PyObject*)&PyUnicode_Type) ||
818 PyObject_IsInstance((PyObject*)b,
819 (PyObject*)&PyUnicode_Type))) {
820 if (PyErr_WarnEx(PyExc_BytesWarning,
821 "Comparison between bytes and string", 1))
822 return NULL;
823 }
824 result = Py_NotImplemented;
825 goto out;
826 }
827 if (a == b) {
828 switch (op) {
829 case Py_EQ:case Py_LE:case Py_GE:
830 result = Py_True;
831 goto out;
832 case Py_NE:case Py_LT:case Py_GT:
833 result = Py_False;
834 goto out;
835 }
836 }
837 if (op == Py_EQ) {
838 /* Supporting Py_NE here as well does not save
839 much time, since Py_NE is rarely used. */
840 if (Py_SIZE(a) == Py_SIZE(b)
841 && (a->ob_sval[0] == b->ob_sval[0]
842 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
843 result = Py_True;
844 } else {
845 result = Py_False;
846 }
847 goto out;
848 }
849 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
850 min_len = (len_a < len_b) ? len_a : len_b;
851 if (min_len > 0) {
852 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
853 if (c==0)
854 c = memcmp(a->ob_sval, b->ob_sval, min_len);
855 } else
856 c = 0;
857 if (c == 0)
858 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
859 switch (op) {
860 case Py_LT: c = c < 0; break;
861 case Py_LE: c = c <= 0; break;
862 case Py_EQ: assert(0); break; /* unreachable */
863 case Py_NE: c = c != 0; break;
864 case Py_GT: c = c > 0; break;
865 case Py_GE: c = c >= 0; break;
866 default:
867 result = Py_NotImplemented;
868 goto out;
869 }
870 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000871 out:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000872 Py_INCREF(result);
873 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000874}
875
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000876static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000877bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000878{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100879 if (a->ob_shash == -1) {
880 /* Can't fail */
881 a->ob_shash = _Py_HashBytes((unsigned char *) a->ob_sval, Py_SIZE(a));
882 }
883 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +0000884}
885
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000886static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000887bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000888{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000889 if (PyIndex_Check(item)) {
890 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
891 if (i == -1 && PyErr_Occurred())
892 return NULL;
893 if (i < 0)
894 i += PyBytes_GET_SIZE(self);
895 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
896 PyErr_SetString(PyExc_IndexError,
897 "index out of range");
898 return NULL;
899 }
900 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
901 }
902 else if (PySlice_Check(item)) {
903 Py_ssize_t start, stop, step, slicelength, cur, i;
904 char* source_buf;
905 char* result_buf;
906 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000907
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000908 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000909 PyBytes_GET_SIZE(self),
910 &start, &stop, &step, &slicelength) < 0) {
911 return NULL;
912 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000913
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000914 if (slicelength <= 0) {
915 return PyBytes_FromStringAndSize("", 0);
916 }
917 else if (start == 0 && step == 1 &&
918 slicelength == PyBytes_GET_SIZE(self) &&
919 PyBytes_CheckExact(self)) {
920 Py_INCREF(self);
921 return (PyObject *)self;
922 }
923 else if (step == 1) {
924 return PyBytes_FromStringAndSize(
925 PyBytes_AS_STRING(self) + start,
926 slicelength);
927 }
928 else {
929 source_buf = PyBytes_AS_STRING(self);
930 result = PyBytes_FromStringAndSize(NULL, slicelength);
931 if (result == NULL)
932 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000933
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000934 result_buf = PyBytes_AS_STRING(result);
935 for (cur = start, i = 0; i < slicelength;
936 cur += step, i++) {
937 result_buf[i] = source_buf[cur];
938 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000939
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000940 return result;
941 }
942 }
943 else {
944 PyErr_Format(PyExc_TypeError,
945 "byte indices must be integers, not %.200s",
946 Py_TYPE(item)->tp_name);
947 return NULL;
948 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000949}
950
951static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000952bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000953{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000954 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
955 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000956}
957
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000958static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000959 (lenfunc)bytes_length, /*sq_length*/
960 (binaryfunc)bytes_concat, /*sq_concat*/
961 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
962 (ssizeargfunc)bytes_item, /*sq_item*/
963 0, /*sq_slice*/
964 0, /*sq_ass_item*/
965 0, /*sq_ass_slice*/
966 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000967};
968
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000969static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000970 (lenfunc)bytes_length,
971 (binaryfunc)bytes_subscript,
972 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000973};
974
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000975static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000976 (getbufferproc)bytes_buffer_getbuffer,
977 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000978};
979
980
981#define LEFTSTRIP 0
982#define RIGHTSTRIP 1
983#define BOTHSTRIP 2
984
985/* Arrays indexed by above */
986static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
987
988#define STRIPNAME(i) (stripformat[i]+3)
989
Neal Norwitz6968b052007-02-27 19:02:19 +0000990PyDoc_STRVAR(split__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200991"B.split(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +0000992\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +0000993Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000994If sep is not specified or is None, B is split on ASCII whitespace\n\
995characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +0000996If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +0000997
998static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200999bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +00001000{
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001001 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001002 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1003 Py_ssize_t maxsplit = -1;
1004 const char *s = PyBytes_AS_STRING(self), *sub;
1005 Py_buffer vsub;
1006 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001007
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001008 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split",
1009 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001010 return NULL;
1011 if (maxsplit < 0)
1012 maxsplit = PY_SSIZE_T_MAX;
1013 if (subobj == Py_None)
1014 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1015 if (_getbuffer(subobj, &vsub) < 0)
1016 return NULL;
1017 sub = vsub.buf;
1018 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001019
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1021 PyBuffer_Release(&vsub);
1022 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001023}
1024
Neal Norwitz6968b052007-02-27 19:02:19 +00001025PyDoc_STRVAR(partition__doc__,
1026"B.partition(sep) -> (head, sep, tail)\n\
1027\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001028Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001029the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001030found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001031
1032static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001033bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001034{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001035 const char *sep;
1036 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001037
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001038 if (PyBytes_Check(sep_obj)) {
1039 sep = PyBytes_AS_STRING(sep_obj);
1040 sep_len = PyBytes_GET_SIZE(sep_obj);
1041 }
1042 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1043 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001044
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001045 return stringlib_partition(
1046 (PyObject*) self,
1047 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1048 sep_obj, sep, sep_len
1049 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001050}
1051
1052PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001053"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001054\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001055Search for the separator sep in B, starting at the end of B,\n\
1056and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001057part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001058bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001059
1060static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001061bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001062{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001063 const char *sep;
1064 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001065
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001066 if (PyBytes_Check(sep_obj)) {
1067 sep = PyBytes_AS_STRING(sep_obj);
1068 sep_len = PyBytes_GET_SIZE(sep_obj);
1069 }
1070 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1071 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001072
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001073 return stringlib_rpartition(
1074 (PyObject*) self,
1075 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1076 sep_obj, sep, sep_len
1077 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001078}
1079
Neal Norwitz6968b052007-02-27 19:02:19 +00001080PyDoc_STRVAR(rsplit__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001081"B.rsplit(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001082\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001083Return a list of the sections in B, using sep as the delimiter,\n\
1084starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001085If sep is not given, B is split on ASCII whitespace characters\n\
1086(space, tab, return, newline, formfeed, vertical tab).\n\
1087If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001088
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001089
Neal Norwitz6968b052007-02-27 19:02:19 +00001090static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001091bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +00001092{
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001093 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001094 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1095 Py_ssize_t maxsplit = -1;
1096 const char *s = PyBytes_AS_STRING(self), *sub;
1097 Py_buffer vsub;
1098 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001099
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001100 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit",
1101 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001102 return NULL;
1103 if (maxsplit < 0)
1104 maxsplit = PY_SSIZE_T_MAX;
1105 if (subobj == Py_None)
1106 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1107 if (_getbuffer(subobj, &vsub) < 0)
1108 return NULL;
1109 sub = vsub.buf;
1110 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001111
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001112 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1113 PyBuffer_Release(&vsub);
1114 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001115}
1116
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001117
1118PyDoc_STRVAR(join__doc__,
1119"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001120\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001121Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001122Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1123
Neal Norwitz6968b052007-02-27 19:02:19 +00001124static PyObject *
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001125bytes_join(PyObject *self, PyObject *iterable)
Neal Norwitz6968b052007-02-27 19:02:19 +00001126{
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001127 return stringlib_bytes_join(self, iterable);
Neal Norwitz6968b052007-02-27 19:02:19 +00001128}
1129
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001130PyObject *
1131_PyBytes_Join(PyObject *sep, PyObject *x)
1132{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001133 assert(sep != NULL && PyBytes_Check(sep));
1134 assert(x != NULL);
1135 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001136}
1137
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001138/* helper macro to fixup start/end slice values */
1139#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001140 if (end > len) \
1141 end = len; \
1142 else if (end < 0) { \
1143 end += len; \
1144 if (end < 0) \
1145 end = 0; \
1146 } \
1147 if (start < 0) { \
1148 start += len; \
1149 if (start < 0) \
1150 start = 0; \
1151 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001152
1153Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001154bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001155{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001156 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001157 char byte;
1158 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001159 const char *sub;
1160 Py_ssize_t sub_len;
1161 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001162 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001163
Antoine Pitrouac65d962011-10-20 23:54:17 +02001164 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1165 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001166 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001167
Antoine Pitrouac65d962011-10-20 23:54:17 +02001168 if (subobj) {
1169 if (_getbuffer(subobj, &subbuf) < 0)
1170 return -2;
1171
1172 sub = subbuf.buf;
1173 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001174 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001175 else {
1176 sub = &byte;
1177 sub_len = 1;
1178 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001180 if (dir > 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001181 res = stringlib_find_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001182 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1183 sub, sub_len, start, end);
1184 else
Antoine Pitrouac65d962011-10-20 23:54:17 +02001185 res = stringlib_rfind_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001186 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1187 sub, sub_len, start, end);
Antoine Pitrouac65d962011-10-20 23:54:17 +02001188
1189 if (subobj)
1190 PyBuffer_Release(&subbuf);
1191
1192 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001193}
1194
1195
1196PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001197"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001198\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001199Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001200such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001201arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001202\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001203Return -1 on failure.");
1204
Neal Norwitz6968b052007-02-27 19:02:19 +00001205static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001206bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001207{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001208 Py_ssize_t result = bytes_find_internal(self, args, +1);
1209 if (result == -2)
1210 return NULL;
1211 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001212}
1213
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001214
1215PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001216"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001217\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001218Like B.find() but raise ValueError when the substring is not found.");
1219
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001220static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001221bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001222{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001223 Py_ssize_t result = bytes_find_internal(self, args, +1);
1224 if (result == -2)
1225 return NULL;
1226 if (result == -1) {
1227 PyErr_SetString(PyExc_ValueError,
1228 "substring not found");
1229 return NULL;
1230 }
1231 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001232}
1233
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001234
1235PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001236"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001237\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001238Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001239such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001240arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001241\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001242Return -1 on failure.");
1243
Neal Norwitz6968b052007-02-27 19:02:19 +00001244static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001245bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001246{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001247 Py_ssize_t result = bytes_find_internal(self, args, -1);
1248 if (result == -2)
1249 return NULL;
1250 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001251}
1252
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001253
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001254PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001255"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001256\n\
1257Like B.rfind() but raise ValueError when the substring is not found.");
1258
1259static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001260bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001261{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001262 Py_ssize_t result = bytes_find_internal(self, args, -1);
1263 if (result == -2)
1264 return NULL;
1265 if (result == -1) {
1266 PyErr_SetString(PyExc_ValueError,
1267 "substring not found");
1268 return NULL;
1269 }
1270 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001271}
1272
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001273
1274Py_LOCAL_INLINE(PyObject *)
1275do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001276{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001277 Py_buffer vsep;
1278 char *s = PyBytes_AS_STRING(self);
1279 Py_ssize_t len = PyBytes_GET_SIZE(self);
1280 char *sep;
1281 Py_ssize_t seplen;
1282 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001283
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001284 if (_getbuffer(sepobj, &vsep) < 0)
1285 return NULL;
1286 sep = vsep.buf;
1287 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001288
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001289 i = 0;
1290 if (striptype != RIGHTSTRIP) {
1291 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1292 i++;
1293 }
1294 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001295
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001296 j = len;
1297 if (striptype != LEFTSTRIP) {
1298 do {
1299 j--;
1300 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1301 j++;
1302 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001303
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001304 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001305
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001306 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1307 Py_INCREF(self);
1308 return (PyObject*)self;
1309 }
1310 else
1311 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001312}
1313
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001314
1315Py_LOCAL_INLINE(PyObject *)
1316do_strip(PyBytesObject *self, int striptype)
1317{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001318 char *s = PyBytes_AS_STRING(self);
1319 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001320
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001321 i = 0;
1322 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001323 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001324 i++;
1325 }
1326 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001327
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001328 j = len;
1329 if (striptype != LEFTSTRIP) {
1330 do {
1331 j--;
David Malcolm96960882010-11-05 17:23:41 +00001332 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001333 j++;
1334 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001335
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001336 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1337 Py_INCREF(self);
1338 return (PyObject*)self;
1339 }
1340 else
1341 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001342}
1343
1344
1345Py_LOCAL_INLINE(PyObject *)
1346do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1347{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001348 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001349
Serhiy Storchakac6792272013-10-19 21:03:34 +03001350 if (!PyArg_ParseTuple(args, stripformat[striptype], &sep))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001351 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001352
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001353 if (sep != NULL && sep != Py_None) {
1354 return do_xstrip(self, striptype, sep);
1355 }
1356 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001357}
1358
1359
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001360PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001361"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001362\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001363Strip leading and trailing bytes contained in the argument.\n\
Georg Brandlbeca27a2012-01-22 21:31:21 +01001364If the argument is omitted, strip leading and trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001365static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001366bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001367{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001368 if (PyTuple_GET_SIZE(args) == 0)
1369 return do_strip(self, BOTHSTRIP); /* Common case */
1370 else
1371 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001372}
1373
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001374
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001375PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001376"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001377\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001378Strip leading bytes contained in the argument.\n\
1379If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001380static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001381bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001382{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001383 if (PyTuple_GET_SIZE(args) == 0)
1384 return do_strip(self, LEFTSTRIP); /* Common case */
1385 else
1386 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001387}
1388
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001389
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001390PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001391"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001392\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001393Strip trailing bytes contained in the argument.\n\
1394If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001395static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001396bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001397{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001398 if (PyTuple_GET_SIZE(args) == 0)
1399 return do_strip(self, RIGHTSTRIP); /* Common case */
1400 else
1401 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001402}
Neal Norwitz6968b052007-02-27 19:02:19 +00001403
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001404
1405PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001406"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001407\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001408Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001409string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001410as in slice notation.");
1411
1412static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001413bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001414{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 PyObject *sub_obj;
1416 const char *str = PyBytes_AS_STRING(self), *sub;
1417 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001418 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001419 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001420
Antoine Pitrouac65d962011-10-20 23:54:17 +02001421 Py_buffer vsub;
1422 PyObject *count_obj;
1423
1424 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
1425 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001426 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001427
Antoine Pitrouac65d962011-10-20 23:54:17 +02001428 if (sub_obj) {
1429 if (_getbuffer(sub_obj, &vsub) < 0)
1430 return NULL;
1431
1432 sub = vsub.buf;
1433 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001434 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001435 else {
1436 sub = &byte;
1437 sub_len = 1;
1438 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001439
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001440 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001441
Antoine Pitrouac65d962011-10-20 23:54:17 +02001442 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001443 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1444 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02001445
1446 if (sub_obj)
1447 PyBuffer_Release(&vsub);
1448
1449 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001450}
1451
1452
1453PyDoc_STRVAR(translate__doc__,
1454"B.translate(table[, deletechars]) -> bytes\n\
1455\n\
1456Return a copy of B, where all characters occurring in the\n\
1457optional argument deletechars are removed, and the remaining\n\
1458characters have been mapped through the given translation\n\
1459table, which must be a bytes object of length 256.");
1460
1461static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001462bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001463{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001464 char *input, *output;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001465 const char *table;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001466 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001467 PyObject *input_obj = (PyObject*)self;
1468 const char *output_start, *del_table=NULL;
1469 Py_ssize_t inlen, tablen, dellen = 0;
1470 PyObject *result;
1471 int trans_table[256];
1472 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001473
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001474 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1475 &tableobj, &delobj))
1476 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001477
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001478 if (PyBytes_Check(tableobj)) {
1479 table = PyBytes_AS_STRING(tableobj);
1480 tablen = PyBytes_GET_SIZE(tableobj);
1481 }
1482 else if (tableobj == Py_None) {
1483 table = NULL;
1484 tablen = 256;
1485 }
1486 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1487 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001488
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001489 if (tablen != 256) {
1490 PyErr_SetString(PyExc_ValueError,
1491 "translation table must be 256 characters long");
1492 return NULL;
1493 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001494
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001495 if (delobj != NULL) {
1496 if (PyBytes_Check(delobj)) {
1497 del_table = PyBytes_AS_STRING(delobj);
1498 dellen = PyBytes_GET_SIZE(delobj);
1499 }
1500 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1501 return NULL;
1502 }
1503 else {
1504 del_table = NULL;
1505 dellen = 0;
1506 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001507
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001508 inlen = PyBytes_GET_SIZE(input_obj);
1509 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1510 if (result == NULL)
1511 return NULL;
1512 output_start = output = PyBytes_AsString(result);
1513 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001514
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001515 if (dellen == 0 && table != NULL) {
1516 /* If no deletions are required, use faster code */
1517 for (i = inlen; --i >= 0; ) {
1518 c = Py_CHARMASK(*input++);
1519 if (Py_CHARMASK((*output++ = table[c])) != c)
1520 changed = 1;
1521 }
1522 if (changed || !PyBytes_CheckExact(input_obj))
1523 return result;
1524 Py_DECREF(result);
1525 Py_INCREF(input_obj);
1526 return input_obj;
1527 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001528
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001529 if (table == NULL) {
1530 for (i = 0; i < 256; i++)
1531 trans_table[i] = Py_CHARMASK(i);
1532 } else {
1533 for (i = 0; i < 256; i++)
1534 trans_table[i] = Py_CHARMASK(table[i]);
1535 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001536
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001537 for (i = 0; i < dellen; i++)
1538 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001539
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001540 for (i = inlen; --i >= 0; ) {
1541 c = Py_CHARMASK(*input++);
1542 if (trans_table[c] != -1)
1543 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1544 continue;
1545 changed = 1;
1546 }
1547 if (!changed && PyBytes_CheckExact(input_obj)) {
1548 Py_DECREF(result);
1549 Py_INCREF(input_obj);
1550 return input_obj;
1551 }
1552 /* Fix the size of the resulting string */
1553 if (inlen > 0)
1554 _PyBytes_Resize(&result, output - output_start);
1555 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001556}
1557
1558
Georg Brandlabc38772009-04-12 15:51:51 +00001559static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001560bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001561{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001562 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001563}
1564
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001565/* find and count characters and substrings */
1566
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001567#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001568 ((char *)memchr((const void *)(target), c, target_len))
1569
1570/* String ops must return a string. */
1571/* If the object is subclass of string, create a copy */
1572Py_LOCAL(PyBytesObject *)
1573return_self(PyBytesObject *self)
1574{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001575 if (PyBytes_CheckExact(self)) {
1576 Py_INCREF(self);
1577 return self;
1578 }
1579 return (PyBytesObject *)PyBytes_FromStringAndSize(
1580 PyBytes_AS_STRING(self),
1581 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001582}
1583
1584Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001585countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001586{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001587 Py_ssize_t count=0;
1588 const char *start=target;
1589 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001590
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001591 while ( (start=findchar(start, end-start, c)) != NULL ) {
1592 count++;
1593 if (count >= maxcount)
1594 break;
1595 start += 1;
1596 }
1597 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001598}
1599
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001600
1601/* Algorithms for different cases of string replacement */
1602
1603/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1604Py_LOCAL(PyBytesObject *)
1605replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001606 const char *to_s, Py_ssize_t to_len,
1607 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001608{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001609 char *self_s, *result_s;
1610 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001611 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001612 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001613
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001614 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001615
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001616 /* 1 at the end plus 1 after every character;
1617 count = min(maxcount, self_len + 1) */
1618 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001619 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001620 else
1621 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1622 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001623
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001624 /* Check for overflow */
1625 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001626 assert(count > 0);
1627 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001628 PyErr_SetString(PyExc_OverflowError,
1629 "replacement bytes are too long");
1630 return NULL;
1631 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001632 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001633
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001634 if (! (result = (PyBytesObject *)
1635 PyBytes_FromStringAndSize(NULL, result_len)) )
1636 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001637
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001638 self_s = PyBytes_AS_STRING(self);
1639 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001640
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001641 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001642
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643 /* Lay the first one down (guaranteed this will occur) */
1644 Py_MEMCPY(result_s, to_s, to_len);
1645 result_s += to_len;
1646 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001647
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001648 for (i=0; i<count; i++) {
1649 *result_s++ = *self_s++;
1650 Py_MEMCPY(result_s, to_s, to_len);
1651 result_s += to_len;
1652 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001653
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001654 /* Copy the rest of the original string */
1655 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001656
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001657 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001658}
1659
1660/* Special case for deleting a single character */
1661/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1662Py_LOCAL(PyBytesObject *)
1663replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001664 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001665{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001666 char *self_s, *result_s;
1667 char *start, *next, *end;
1668 Py_ssize_t self_len, result_len;
1669 Py_ssize_t count;
1670 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001672 self_len = PyBytes_GET_SIZE(self);
1673 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001674
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001675 count = countchar(self_s, self_len, from_c, maxcount);
1676 if (count == 0) {
1677 return return_self(self);
1678 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001679
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001680 result_len = self_len - count; /* from_len == 1 */
1681 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001682
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001683 if ( (result = (PyBytesObject *)
1684 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1685 return NULL;
1686 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001687
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001688 start = self_s;
1689 end = self_s + self_len;
1690 while (count-- > 0) {
1691 next = findchar(start, end-start, from_c);
1692 if (next == NULL)
1693 break;
1694 Py_MEMCPY(result_s, start, next-start);
1695 result_s += (next-start);
1696 start = next+1;
1697 }
1698 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001699
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001700 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001701}
1702
1703/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1704
1705Py_LOCAL(PyBytesObject *)
1706replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001707 const char *from_s, Py_ssize_t from_len,
1708 Py_ssize_t maxcount) {
1709 char *self_s, *result_s;
1710 char *start, *next, *end;
1711 Py_ssize_t self_len, result_len;
1712 Py_ssize_t count, offset;
1713 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001714
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001715 self_len = PyBytes_GET_SIZE(self);
1716 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001717
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001718 count = stringlib_count(self_s, self_len,
1719 from_s, from_len,
1720 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001721
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001722 if (count == 0) {
1723 /* no matches */
1724 return return_self(self);
1725 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001726
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001727 result_len = self_len - (count * from_len);
1728 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001729
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001730 if ( (result = (PyBytesObject *)
1731 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1732 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001733
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001734 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001735
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001736 start = self_s;
1737 end = self_s + self_len;
1738 while (count-- > 0) {
1739 offset = stringlib_find(start, end-start,
1740 from_s, from_len,
1741 0);
1742 if (offset == -1)
1743 break;
1744 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001745
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001746 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001747
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001748 result_s += (next-start);
1749 start = next+from_len;
1750 }
1751 Py_MEMCPY(result_s, start, end-start);
1752 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001753}
1754
1755/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1756Py_LOCAL(PyBytesObject *)
1757replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001758 char from_c, char to_c,
1759 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001760{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001761 char *self_s, *result_s, *start, *end, *next;
1762 Py_ssize_t self_len;
1763 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001764
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001765 /* The result string will be the same size */
1766 self_s = PyBytes_AS_STRING(self);
1767 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001768
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001769 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001770
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001771 if (next == NULL) {
1772 /* No matches; return the original string */
1773 return return_self(self);
1774 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001775
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001776 /* Need to make a new string */
1777 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1778 if (result == NULL)
1779 return NULL;
1780 result_s = PyBytes_AS_STRING(result);
1781 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001782
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001783 /* change everything in-place, starting with this one */
1784 start = result_s + (next-self_s);
1785 *start = to_c;
1786 start++;
1787 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001788
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001789 while (--maxcount > 0) {
1790 next = findchar(start, end-start, from_c);
1791 if (next == NULL)
1792 break;
1793 *next = to_c;
1794 start = next+1;
1795 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001796
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001797 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001798}
1799
1800/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1801Py_LOCAL(PyBytesObject *)
1802replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001803 const char *from_s, Py_ssize_t from_len,
1804 const char *to_s, Py_ssize_t to_len,
1805 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001806{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001807 char *result_s, *start, *end;
1808 char *self_s;
1809 Py_ssize_t self_len, offset;
1810 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001811
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001812 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001813
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001814 self_s = PyBytes_AS_STRING(self);
1815 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001816
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001817 offset = stringlib_find(self_s, self_len,
1818 from_s, from_len,
1819 0);
1820 if (offset == -1) {
1821 /* No matches; return the original string */
1822 return return_self(self);
1823 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001824
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001825 /* Need to make a new string */
1826 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1827 if (result == NULL)
1828 return NULL;
1829 result_s = PyBytes_AS_STRING(result);
1830 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001831
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001832 /* change everything in-place, starting with this one */
1833 start = result_s + offset;
1834 Py_MEMCPY(start, to_s, from_len);
1835 start += from_len;
1836 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001837
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001838 while ( --maxcount > 0) {
1839 offset = stringlib_find(start, end-start,
1840 from_s, from_len,
1841 0);
1842 if (offset==-1)
1843 break;
1844 Py_MEMCPY(start+offset, to_s, from_len);
1845 start += offset+from_len;
1846 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001847
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001848 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001849}
1850
1851/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1852Py_LOCAL(PyBytesObject *)
1853replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001854 char from_c,
1855 const char *to_s, Py_ssize_t to_len,
1856 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001857{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001858 char *self_s, *result_s;
1859 char *start, *next, *end;
1860 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001861 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001862 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001863
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001864 self_s = PyBytes_AS_STRING(self);
1865 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001866
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001867 count = countchar(self_s, self_len, from_c, maxcount);
1868 if (count == 0) {
1869 /* no matches, return unchanged */
1870 return return_self(self);
1871 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001872
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001873 /* use the difference between current and new, hence the "-1" */
1874 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001875 assert(count > 0);
1876 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001877 PyErr_SetString(PyExc_OverflowError,
1878 "replacement bytes are too long");
1879 return NULL;
1880 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001881 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001882
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001883 if ( (result = (PyBytesObject *)
1884 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1885 return NULL;
1886 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001887
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001888 start = self_s;
1889 end = self_s + self_len;
1890 while (count-- > 0) {
1891 next = findchar(start, end-start, from_c);
1892 if (next == NULL)
1893 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001894
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001895 if (next == start) {
1896 /* replace with the 'to' */
1897 Py_MEMCPY(result_s, to_s, to_len);
1898 result_s += to_len;
1899 start += 1;
1900 } else {
1901 /* copy the unchanged old then the 'to' */
1902 Py_MEMCPY(result_s, start, next-start);
1903 result_s += (next-start);
1904 Py_MEMCPY(result_s, to_s, to_len);
1905 result_s += to_len;
1906 start = next+1;
1907 }
1908 }
1909 /* Copy the remainder of the remaining string */
1910 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001911
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001912 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001913}
1914
1915/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1916Py_LOCAL(PyBytesObject *)
1917replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001918 const char *from_s, Py_ssize_t from_len,
1919 const char *to_s, Py_ssize_t to_len,
1920 Py_ssize_t maxcount) {
1921 char *self_s, *result_s;
1922 char *start, *next, *end;
1923 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001924 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001925 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001926
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001927 self_s = PyBytes_AS_STRING(self);
1928 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001929
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001930 count = stringlib_count(self_s, self_len,
1931 from_s, from_len,
1932 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001933
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001934 if (count == 0) {
1935 /* no matches, return unchanged */
1936 return return_self(self);
1937 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001938
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001939 /* Check for overflow */
1940 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001941 assert(count > 0);
1942 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001943 PyErr_SetString(PyExc_OverflowError,
1944 "replacement bytes are too long");
1945 return NULL;
1946 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001947 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001948
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001949 if ( (result = (PyBytesObject *)
1950 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1951 return NULL;
1952 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001953
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001954 start = self_s;
1955 end = self_s + self_len;
1956 while (count-- > 0) {
1957 offset = stringlib_find(start, end-start,
1958 from_s, from_len,
1959 0);
1960 if (offset == -1)
1961 break;
1962 next = start+offset;
1963 if (next == start) {
1964 /* replace with the 'to' */
1965 Py_MEMCPY(result_s, to_s, to_len);
1966 result_s += to_len;
1967 start += from_len;
1968 } else {
1969 /* copy the unchanged old then the 'to' */
1970 Py_MEMCPY(result_s, start, next-start);
1971 result_s += (next-start);
1972 Py_MEMCPY(result_s, to_s, to_len);
1973 result_s += to_len;
1974 start = next+from_len;
1975 }
1976 }
1977 /* Copy the remainder of the remaining string */
1978 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001979
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001980 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001981}
1982
1983
1984Py_LOCAL(PyBytesObject *)
1985replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001986 const char *from_s, Py_ssize_t from_len,
1987 const char *to_s, Py_ssize_t to_len,
1988 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001989{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001990 if (maxcount < 0) {
1991 maxcount = PY_SSIZE_T_MAX;
1992 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
1993 /* nothing to do; return the original string */
1994 return return_self(self);
1995 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001996
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001997 if (maxcount == 0 ||
1998 (from_len == 0 && to_len == 0)) {
1999 /* nothing to do; return the original string */
2000 return return_self(self);
2001 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002002
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002003 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002004
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002005 if (from_len == 0) {
2006 /* insert the 'to' string everywhere. */
2007 /* >>> "Python".replace("", ".") */
2008 /* '.P.y.t.h.o.n.' */
2009 return replace_interleave(self, to_s, to_len, maxcount);
2010 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002011
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002012 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2013 /* point for an empty self string to generate a non-empty string */
2014 /* Special case so the remaining code always gets a non-empty string */
2015 if (PyBytes_GET_SIZE(self) == 0) {
2016 return return_self(self);
2017 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002018
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002019 if (to_len == 0) {
2020 /* delete all occurrences of 'from' string */
2021 if (from_len == 1) {
2022 return replace_delete_single_character(
2023 self, from_s[0], maxcount);
2024 } else {
2025 return replace_delete_substring(self, from_s,
2026 from_len, maxcount);
2027 }
2028 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002029
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002030 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002031
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002032 if (from_len == to_len) {
2033 if (from_len == 1) {
2034 return replace_single_character_in_place(
2035 self,
2036 from_s[0],
2037 to_s[0],
2038 maxcount);
2039 } else {
2040 return replace_substring_in_place(
2041 self, from_s, from_len, to_s, to_len,
2042 maxcount);
2043 }
2044 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002045
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002046 /* Otherwise use the more generic algorithms */
2047 if (from_len == 1) {
2048 return replace_single_character(self, from_s[0],
2049 to_s, to_len, maxcount);
2050 } else {
2051 /* len('from')>=2, len('to')>=1 */
2052 return replace_substring(self, from_s, from_len, to_s, to_len,
2053 maxcount);
2054 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002055}
2056
2057PyDoc_STRVAR(replace__doc__,
2058"B.replace(old, new[, count]) -> bytes\n\
2059\n\
2060Return a copy of B with all occurrences of subsection\n\
2061old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002062given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002063
2064static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002065bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002066{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002067 Py_ssize_t count = -1;
2068 PyObject *from, *to;
2069 const char *from_s, *to_s;
2070 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002072 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2073 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002074
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002075 if (PyBytes_Check(from)) {
2076 from_s = PyBytes_AS_STRING(from);
2077 from_len = PyBytes_GET_SIZE(from);
2078 }
2079 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2080 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002082 if (PyBytes_Check(to)) {
2083 to_s = PyBytes_AS_STRING(to);
2084 to_len = PyBytes_GET_SIZE(to);
2085 }
2086 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2087 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002088
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002089 return (PyObject *)replace((PyBytesObject *) self,
2090 from_s, from_len,
2091 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002092}
2093
2094/** End DALKE **/
2095
2096/* Matches the end (direction >= 0) or start (direction < 0) of self
2097 * against substr, using the start and end arguments. Returns
2098 * -1 on error, 0 if not found and 1 if found.
2099 */
2100Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002101_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002102 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002103{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002104 Py_ssize_t len = PyBytes_GET_SIZE(self);
2105 Py_ssize_t slen;
2106 const char* sub;
2107 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002108
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002109 if (PyBytes_Check(substr)) {
2110 sub = PyBytes_AS_STRING(substr);
2111 slen = PyBytes_GET_SIZE(substr);
2112 }
2113 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2114 return -1;
2115 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002116
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002117 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002118
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002119 if (direction < 0) {
2120 /* startswith */
2121 if (start+slen > len)
2122 return 0;
2123 } else {
2124 /* endswith */
2125 if (end-start < slen || start > len)
2126 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002127
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002128 if (end-slen > start)
2129 start = end - slen;
2130 }
2131 if (end-start >= slen)
2132 return ! memcmp(str+start, sub, slen);
2133 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002134}
2135
2136
2137PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002138"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002139\n\
2140Return True if B starts with the specified prefix, False otherwise.\n\
2141With optional start, test B beginning at that position.\n\
2142With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002143prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002144
2145static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002146bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002147{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002148 Py_ssize_t start = 0;
2149 Py_ssize_t end = PY_SSIZE_T_MAX;
2150 PyObject *subobj;
2151 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002152
Jesus Ceaac451502011-04-20 17:09:23 +02002153 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002154 return NULL;
2155 if (PyTuple_Check(subobj)) {
2156 Py_ssize_t i;
2157 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2158 result = _bytes_tailmatch(self,
2159 PyTuple_GET_ITEM(subobj, i),
2160 start, end, -1);
2161 if (result == -1)
2162 return NULL;
2163 else if (result) {
2164 Py_RETURN_TRUE;
2165 }
2166 }
2167 Py_RETURN_FALSE;
2168 }
2169 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002170 if (result == -1) {
2171 if (PyErr_ExceptionMatches(PyExc_TypeError))
2172 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2173 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002174 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002175 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002176 else
2177 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002178}
2179
2180
2181PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002182"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002183\n\
2184Return True if B ends with the specified suffix, False otherwise.\n\
2185With optional start, test B beginning at that position.\n\
2186With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002187suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002188
2189static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002190bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002191{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002192 Py_ssize_t start = 0;
2193 Py_ssize_t end = PY_SSIZE_T_MAX;
2194 PyObject *subobj;
2195 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002196
Jesus Ceaac451502011-04-20 17:09:23 +02002197 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002198 return NULL;
2199 if (PyTuple_Check(subobj)) {
2200 Py_ssize_t i;
2201 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2202 result = _bytes_tailmatch(self,
2203 PyTuple_GET_ITEM(subobj, i),
2204 start, end, +1);
2205 if (result == -1)
2206 return NULL;
2207 else if (result) {
2208 Py_RETURN_TRUE;
2209 }
2210 }
2211 Py_RETURN_FALSE;
2212 }
2213 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002214 if (result == -1) {
2215 if (PyErr_ExceptionMatches(PyExc_TypeError))
2216 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2217 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002218 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002219 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002220 else
2221 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002222}
2223
2224
2225PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002226"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002227\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002228Decode B using the codec registered for encoding. Default encoding\n\
2229is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002230handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2231a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002232as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002233able to handle UnicodeDecodeErrors.");
2234
2235static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002236bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002237{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002238 const char *encoding = NULL;
2239 const char *errors = NULL;
2240 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002241
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002242 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2243 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002244 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002245}
2246
Guido van Rossum20188312006-05-05 15:15:40 +00002247
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002248PyDoc_STRVAR(splitlines__doc__,
2249"B.splitlines([keepends]) -> list of lines\n\
2250\n\
2251Return a list of the lines in B, breaking at line boundaries.\n\
2252Line breaks are not included in the resulting list unless keepends\n\
2253is given and true.");
2254
2255static PyObject*
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002256bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002257{
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002258 static char *kwlist[] = {"keepends", 0};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002259 int keepends = 0;
2260
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002261 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines",
2262 kwlist, &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002263 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002264
2265 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002266 (PyObject*) self, PyBytes_AS_STRING(self),
2267 PyBytes_GET_SIZE(self), keepends
2268 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002269}
2270
2271
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002272PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002273"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002274\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002275Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002276Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002277Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002278
2279static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002280hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002281{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002282 if (c >= 128)
2283 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002284 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002285 return c - '0';
2286 else {
David Malcolm96960882010-11-05 17:23:41 +00002287 if (Py_ISUPPER(c))
2288 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002289 if (c >= 'a' && c <= 'f')
2290 return c - 'a' + 10;
2291 }
2292 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002293}
2294
2295static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002296bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002297{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002298 PyObject *newstring, *hexobj;
2299 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002300 Py_ssize_t hexlen, byteslen, i, j;
2301 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002302 void *data;
2303 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002304
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002305 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2306 return NULL;
2307 assert(PyUnicode_Check(hexobj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002308 if (PyUnicode_READY(hexobj))
2309 return NULL;
2310 kind = PyUnicode_KIND(hexobj);
2311 data = PyUnicode_DATA(hexobj);
2312 hexlen = PyUnicode_GET_LENGTH(hexobj);
2313
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002314 byteslen = hexlen/2; /* This overestimates if there are spaces */
2315 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2316 if (!newstring)
2317 return NULL;
2318 buf = PyBytes_AS_STRING(newstring);
2319 for (i = j = 0; i < hexlen; i += 2) {
2320 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002321 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002322 i++;
2323 if (i >= hexlen)
2324 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002325 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
2326 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002327 if (top == -1 || bot == -1) {
2328 PyErr_Format(PyExc_ValueError,
2329 "non-hexadecimal number found in "
2330 "fromhex() arg at position %zd", i);
2331 goto error;
2332 }
2333 buf[j++] = (top << 4) + bot;
2334 }
2335 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2336 goto error;
2337 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002338
2339 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002340 Py_XDECREF(newstring);
2341 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002342}
2343
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002344PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002345"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002346
2347static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002348bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002349{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002350 Py_ssize_t res;
2351 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2352 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002353}
2354
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002355
2356static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002357bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002358{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002359 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002360}
2361
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002362
2363static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002364bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002365 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2366 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2367 _Py_capitalize__doc__},
2368 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2369 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2370 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2371 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2372 endswith__doc__},
2373 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2374 expandtabs__doc__},
2375 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2376 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2377 fromhex_doc},
2378 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2379 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2380 _Py_isalnum__doc__},
2381 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2382 _Py_isalpha__doc__},
2383 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2384 _Py_isdigit__doc__},
2385 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2386 _Py_islower__doc__},
2387 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2388 _Py_isspace__doc__},
2389 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2390 _Py_istitle__doc__},
2391 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2392 _Py_isupper__doc__},
2393 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2394 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2395 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2396 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2397 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2398 _Py_maketrans__doc__},
2399 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2400 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2401 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2402 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2403 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2404 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2405 rpartition__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002406 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002407 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002408 {"split", (PyCFunction)bytes_split, METH_VARARGS | METH_KEYWORDS, split__doc__},
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002409 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002410 splitlines__doc__},
2411 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2412 startswith__doc__},
2413 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2414 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2415 _Py_swapcase__doc__},
2416 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2417 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2418 translate__doc__},
2419 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2420 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2421 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2422 sizeof__doc__},
2423 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002424};
2425
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002426static PyObject *
2427str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2428
2429static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002430bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002431{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002432 PyObject *x = NULL;
2433 const char *encoding = NULL;
2434 const char *errors = NULL;
2435 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002436 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002437 Py_ssize_t size;
2438 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002439 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002440
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002441 if (type != &PyBytes_Type)
2442 return str_subtype_new(type, args, kwds);
2443 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2444 &encoding, &errors))
2445 return NULL;
2446 if (x == NULL) {
2447 if (encoding != NULL || errors != NULL) {
2448 PyErr_SetString(PyExc_TypeError,
2449 "encoding or errors without sequence "
2450 "argument");
2451 return NULL;
2452 }
2453 return PyBytes_FromString("");
2454 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002455
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002456 if (PyUnicode_Check(x)) {
2457 /* Encode via the codec registry */
2458 if (encoding == NULL) {
2459 PyErr_SetString(PyExc_TypeError,
2460 "string argument without an encoding");
2461 return NULL;
2462 }
2463 new = PyUnicode_AsEncodedString(x, encoding, errors);
2464 if (new == NULL)
2465 return NULL;
2466 assert(PyBytes_Check(new));
2467 return new;
2468 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002469
2470 /* We'd like to call PyObject_Bytes here, but we need to check for an
2471 integer argument before deferring to PyBytes_FromObject, something
2472 PyObject_Bytes doesn't do. */
2473 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2474 if (func != NULL) {
2475 new = PyObject_CallFunctionObjArgs(func, NULL);
2476 Py_DECREF(func);
2477 if (new == NULL)
2478 return NULL;
2479 if (!PyBytes_Check(new)) {
2480 PyErr_Format(PyExc_TypeError,
2481 "__bytes__ returned non-bytes (type %.200s)",
2482 Py_TYPE(new)->tp_name);
2483 Py_DECREF(new);
2484 return NULL;
2485 }
2486 return new;
2487 }
2488 else if (PyErr_Occurred())
2489 return NULL;
2490
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002491 /* Is it an integer? */
2492 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2493 if (size == -1 && PyErr_Occurred()) {
2494 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2495 return NULL;
2496 PyErr_Clear();
2497 }
2498 else if (size < 0) {
2499 PyErr_SetString(PyExc_ValueError, "negative count");
2500 return NULL;
2501 }
2502 else {
2503 new = PyBytes_FromStringAndSize(NULL, size);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002504 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002505 return NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002506 if (size > 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002507 memset(((PyBytesObject*)new)->ob_sval, 0, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002508 return new;
2509 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002510
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002511 /* If it's not unicode, there can't be encoding or errors */
2512 if (encoding != NULL || errors != NULL) {
2513 PyErr_SetString(PyExc_TypeError,
2514 "encoding or errors without a string argument");
2515 return NULL;
2516 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002517
2518 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002519}
2520
2521PyObject *
2522PyBytes_FromObject(PyObject *x)
2523{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002524 PyObject *new, *it;
2525 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002526
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002527 if (x == NULL) {
2528 PyErr_BadInternalCall();
2529 return NULL;
2530 }
Larry Hastingsca28e992012-05-24 22:58:30 -07002531
2532 if (PyBytes_CheckExact(x)) {
2533 Py_INCREF(x);
2534 return x;
2535 }
2536
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002537 /* Use the modern buffer interface */
2538 if (PyObject_CheckBuffer(x)) {
2539 Py_buffer view;
2540 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2541 return NULL;
2542 new = PyBytes_FromStringAndSize(NULL, view.len);
2543 if (!new)
2544 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002545 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2546 &view, view.len, 'C') < 0)
2547 goto fail;
2548 PyBuffer_Release(&view);
2549 return new;
2550 fail:
2551 Py_XDECREF(new);
2552 PyBuffer_Release(&view);
2553 return NULL;
2554 }
2555 if (PyUnicode_Check(x)) {
2556 PyErr_SetString(PyExc_TypeError,
2557 "cannot convert unicode object to bytes");
2558 return NULL;
2559 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002560
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002561 if (PyList_CheckExact(x)) {
2562 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2563 if (new == NULL)
2564 return NULL;
2565 for (i = 0; i < Py_SIZE(x); i++) {
2566 Py_ssize_t value = PyNumber_AsSsize_t(
2567 PyList_GET_ITEM(x, i), PyExc_ValueError);
2568 if (value == -1 && PyErr_Occurred()) {
2569 Py_DECREF(new);
2570 return NULL;
2571 }
2572 if (value < 0 || value >= 256) {
2573 PyErr_SetString(PyExc_ValueError,
2574 "bytes must be in range(0, 256)");
2575 Py_DECREF(new);
2576 return NULL;
2577 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002578 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002579 }
2580 return new;
2581 }
2582 if (PyTuple_CheckExact(x)) {
2583 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2584 if (new == NULL)
2585 return NULL;
2586 for (i = 0; i < Py_SIZE(x); i++) {
2587 Py_ssize_t value = PyNumber_AsSsize_t(
2588 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2589 if (value == -1 && PyErr_Occurred()) {
2590 Py_DECREF(new);
2591 return NULL;
2592 }
2593 if (value < 0 || value >= 256) {
2594 PyErr_SetString(PyExc_ValueError,
2595 "bytes must be in range(0, 256)");
2596 Py_DECREF(new);
2597 return NULL;
2598 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002599 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002600 }
2601 return new;
2602 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002603
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002604 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002605 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002606 if (size == -1 && PyErr_Occurred())
2607 return NULL;
2608 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2609 returning a shared empty bytes string. This required because we
2610 want to call _PyBytes_Resize() the returned object, which we can
2611 only do on bytes objects with refcount == 1. */
2612 size += 1;
2613 new = PyBytes_FromStringAndSize(NULL, size);
2614 if (new == NULL)
2615 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002616
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002617 /* Get the iterator */
2618 it = PyObject_GetIter(x);
2619 if (it == NULL)
2620 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002621
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002622 /* Run the iterator to exhaustion */
2623 for (i = 0; ; i++) {
2624 PyObject *item;
2625 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002626
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002627 /* Get the next item */
2628 item = PyIter_Next(it);
2629 if (item == NULL) {
2630 if (PyErr_Occurred())
2631 goto error;
2632 break;
2633 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002634
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002635 /* Interpret it as an int (__index__) */
2636 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2637 Py_DECREF(item);
2638 if (value == -1 && PyErr_Occurred())
2639 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002640
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002641 /* Range check */
2642 if (value < 0 || value >= 256) {
2643 PyErr_SetString(PyExc_ValueError,
2644 "bytes must be in range(0, 256)");
2645 goto error;
2646 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002647
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002648 /* Append the byte */
2649 if (i >= size) {
2650 size = 2 * size + 1;
2651 if (_PyBytes_Resize(&new, size) < 0)
2652 goto error;
2653 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002654 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002655 }
2656 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002657
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002658 /* Clean up and return success */
2659 Py_DECREF(it);
2660 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002661
2662 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002663 /* Error handling when new != NULL */
2664 Py_XDECREF(it);
2665 Py_DECREF(new);
2666 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002667}
2668
2669static PyObject *
2670str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2671{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002672 PyObject *tmp, *pnew;
2673 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002674
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002675 assert(PyType_IsSubtype(type, &PyBytes_Type));
2676 tmp = bytes_new(&PyBytes_Type, args, kwds);
2677 if (tmp == NULL)
2678 return NULL;
2679 assert(PyBytes_CheckExact(tmp));
2680 n = PyBytes_GET_SIZE(tmp);
2681 pnew = type->tp_alloc(type, n);
2682 if (pnew != NULL) {
2683 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2684 PyBytes_AS_STRING(tmp), n+1);
2685 ((PyBytesObject *)pnew)->ob_shash =
2686 ((PyBytesObject *)tmp)->ob_shash;
2687 }
2688 Py_DECREF(tmp);
2689 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002690}
2691
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002692PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002693"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002694bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002695bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002696bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2697bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002698\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002699Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002700 - an iterable yielding integers in range(256)\n\
2701 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002702 - any object implementing the buffer API.\n\
2703 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002704
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002705static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002706
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002707PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002708 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2709 "bytes",
2710 PyBytesObject_SIZE,
2711 sizeof(char),
2712 bytes_dealloc, /* tp_dealloc */
2713 0, /* tp_print */
2714 0, /* tp_getattr */
2715 0, /* tp_setattr */
2716 0, /* tp_reserved */
2717 (reprfunc)bytes_repr, /* tp_repr */
2718 0, /* tp_as_number */
2719 &bytes_as_sequence, /* tp_as_sequence */
2720 &bytes_as_mapping, /* tp_as_mapping */
2721 (hashfunc)bytes_hash, /* tp_hash */
2722 0, /* tp_call */
2723 bytes_str, /* tp_str */
2724 PyObject_GenericGetAttr, /* tp_getattro */
2725 0, /* tp_setattro */
2726 &bytes_as_buffer, /* tp_as_buffer */
2727 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2728 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2729 bytes_doc, /* tp_doc */
2730 0, /* tp_traverse */
2731 0, /* tp_clear */
2732 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2733 0, /* tp_weaklistoffset */
2734 bytes_iter, /* tp_iter */
2735 0, /* tp_iternext */
2736 bytes_methods, /* tp_methods */
2737 0, /* tp_members */
2738 0, /* tp_getset */
2739 &PyBaseObject_Type, /* tp_base */
2740 0, /* tp_dict */
2741 0, /* tp_descr_get */
2742 0, /* tp_descr_set */
2743 0, /* tp_dictoffset */
2744 0, /* tp_init */
2745 0, /* tp_alloc */
2746 bytes_new, /* tp_new */
2747 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002748};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002749
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002750void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002751PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002752{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002753 PyObject *v;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002754 assert(pv != NULL);
2755 if (*pv == NULL)
2756 return;
2757 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002758 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002759 return;
2760 }
2761 v = bytes_concat(*pv, w);
2762 Py_DECREF(*pv);
2763 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002764}
2765
2766void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002767PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002768{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002769 PyBytes_Concat(pv, w);
2770 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002771}
2772
2773
2774/* The following function breaks the notion that strings are immutable:
2775 it changes the size of a string. We get away with this only if there
2776 is only one module referencing the object. You can also think of it
2777 as creating a new string object and destroying the old one, only
2778 more efficiently. In any case, don't use this if the string may
2779 already be known to some other part of the code...
2780 Note that if there's not enough memory to resize the string, the original
2781 string object at *pv is deallocated, *pv is set to NULL, an "out of
2782 memory" exception is set, and -1 is returned. Else (on success) 0 is
2783 returned, and the value in *pv may or may not be the same as on input.
2784 As always, an extra byte is allocated for a trailing \0 byte (newsize
2785 does *not* include that), and a trailing \0 byte is stored.
2786*/
2787
2788int
2789_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2790{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002791 PyObject *v;
2792 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002793 v = *pv;
2794 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2795 *pv = 0;
2796 Py_DECREF(v);
2797 PyErr_BadInternalCall();
2798 return -1;
2799 }
2800 /* XXX UNREF/NEWREF interface should be more symmetrical */
2801 _Py_DEC_REFTOTAL;
2802 _Py_ForgetReference(v);
2803 *pv = (PyObject *)
2804 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2805 if (*pv == NULL) {
2806 PyObject_Del(v);
2807 PyErr_NoMemory();
2808 return -1;
2809 }
2810 _Py_NewReference(*pv);
2811 sv = (PyBytesObject *) *pv;
2812 Py_SIZE(sv) = newsize;
2813 sv->ob_sval[newsize] = '\0';
2814 sv->ob_shash = -1; /* invalidate cached hash value */
2815 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002816}
2817
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002818void
2819PyBytes_Fini(void)
2820{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002821 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002822 for (i = 0; i < UCHAR_MAX + 1; i++)
2823 Py_CLEAR(characters[i]);
2824 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002825}
2826
Benjamin Peterson4116f362008-05-27 00:36:20 +00002827/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002828
2829typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002830 PyObject_HEAD
2831 Py_ssize_t it_index;
2832 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002833} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002834
2835static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002836striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002837{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002838 _PyObject_GC_UNTRACK(it);
2839 Py_XDECREF(it->it_seq);
2840 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002841}
2842
2843static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002844striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002845{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002846 Py_VISIT(it->it_seq);
2847 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002848}
2849
2850static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002851striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002852{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002853 PyBytesObject *seq;
2854 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002855
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002856 assert(it != NULL);
2857 seq = it->it_seq;
2858 if (seq == NULL)
2859 return NULL;
2860 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002861
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002862 if (it->it_index < PyBytes_GET_SIZE(seq)) {
2863 item = PyLong_FromLong(
2864 (unsigned char)seq->ob_sval[it->it_index]);
2865 if (item != NULL)
2866 ++it->it_index;
2867 return item;
2868 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002869
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002870 Py_DECREF(seq);
2871 it->it_seq = NULL;
2872 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002873}
2874
2875static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002876striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002877{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002878 Py_ssize_t len = 0;
2879 if (it->it_seq)
2880 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
2881 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002882}
2883
2884PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002885 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002886
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002887static PyObject *
2888striter_reduce(striterobject *it)
2889{
2890 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02002891 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002892 it->it_seq, it->it_index);
2893 } else {
2894 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
2895 if (u == NULL)
2896 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02002897 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002898 }
2899}
2900
2901PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2902
2903static PyObject *
2904striter_setstate(striterobject *it, PyObject *state)
2905{
2906 Py_ssize_t index = PyLong_AsSsize_t(state);
2907 if (index == -1 && PyErr_Occurred())
2908 return NULL;
2909 if (index < 0)
2910 index = 0;
2911 it->it_index = index;
2912 Py_RETURN_NONE;
2913}
2914
2915PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
2916
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002917static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002918 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
2919 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002920 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
2921 reduce_doc},
2922 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
2923 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002924 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002925};
2926
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002927PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002928 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2929 "bytes_iterator", /* tp_name */
2930 sizeof(striterobject), /* tp_basicsize */
2931 0, /* tp_itemsize */
2932 /* methods */
2933 (destructor)striter_dealloc, /* tp_dealloc */
2934 0, /* tp_print */
2935 0, /* tp_getattr */
2936 0, /* tp_setattr */
2937 0, /* tp_reserved */
2938 0, /* tp_repr */
2939 0, /* tp_as_number */
2940 0, /* tp_as_sequence */
2941 0, /* tp_as_mapping */
2942 0, /* tp_hash */
2943 0, /* tp_call */
2944 0, /* tp_str */
2945 PyObject_GenericGetAttr, /* tp_getattro */
2946 0, /* tp_setattro */
2947 0, /* tp_as_buffer */
2948 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
2949 0, /* tp_doc */
2950 (traverseproc)striter_traverse, /* tp_traverse */
2951 0, /* tp_clear */
2952 0, /* tp_richcompare */
2953 0, /* tp_weaklistoffset */
2954 PyObject_SelfIter, /* tp_iter */
2955 (iternextfunc)striter_next, /* tp_iternext */
2956 striter_methods, /* tp_methods */
2957 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002958};
2959
2960static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002961bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002962{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002963 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002964
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002965 if (!PyBytes_Check(seq)) {
2966 PyErr_BadInternalCall();
2967 return NULL;
2968 }
2969 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
2970 if (it == NULL)
2971 return NULL;
2972 it->it_index = 0;
2973 Py_INCREF(seq);
2974 it->it_seq = (PyBytesObject *)seq;
2975 _PyObject_GC_TRACK(it);
2976 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002977}