blob: 14bd8e68c56a236f0ffbad8287edfb68fef3fca7 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroud1188562010-06-09 16:38:55 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044 For PyBytes_FromString(), the parameter `str' points to a null-terminated
45 string containing exactly `size' bytes.
46
47 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
48 either NULL or else points to a string containing at least `size' bytes.
49 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
50 not have to be null-terminated. (Therefore it is safe to construct a
51 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
52 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
53 bytes (setting the last byte to the null terminating character) and you can
54 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000055 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000056 alter the data yourself, since the strings may be shared.
57
58 The PyObject member `op->ob_size', which denotes the number of "extra
59 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020060 allocated for string data, not counting the null terminating character.
61 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000062 PyBytes_FromStringAndSize()) or the length of the string in the `str'
63 parameter (for PyBytes_FromString()).
64*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000065PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000066PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000067{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 register PyBytesObject *op;
69 if (size < 0) {
70 PyErr_SetString(PyExc_SystemError,
71 "Negative size passed to PyBytes_FromStringAndSize");
72 return NULL;
73 }
74 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000076 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000077#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 Py_INCREF(op);
79 return (PyObject *)op;
80 }
81 if (size == 1 && str != NULL &&
82 (op = characters[*str & UCHAR_MAX]) != NULL)
83 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000084#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000086#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 Py_INCREF(op);
88 return (PyObject *)op;
89 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
92 PyErr_SetString(PyExc_OverflowError,
93 "byte string is too large");
94 return NULL;
95 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 /* Inline PyObject_NewVar */
98 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
99 if (op == NULL)
100 return PyErr_NoMemory();
101 PyObject_INIT_VAR(op, &PyBytes_Type, size);
102 op->ob_shash = -1;
103 if (str != NULL)
104 Py_MEMCPY(op->ob_sval, str, size);
105 op->ob_sval[size] = '\0';
106 /* share short strings */
107 if (size == 0) {
108 nullstring = op;
109 Py_INCREF(op);
110 } else if (size == 1 && str != NULL) {
111 characters[*str & UCHAR_MAX] = op;
112 Py_INCREF(op);
113 }
114 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000115}
116
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000117PyObject *
118PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000119{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 register size_t size;
121 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000122
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000123 assert(str != NULL);
124 size = strlen(str);
125 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
126 PyErr_SetString(PyExc_OverflowError,
127 "byte string is too long");
128 return NULL;
129 }
130 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000131#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000133#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 Py_INCREF(op);
135 return (PyObject *)op;
136 }
137 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000138#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000140#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
143 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 /* Inline PyObject_NewVar */
146 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
147 if (op == NULL)
148 return PyErr_NoMemory();
149 PyObject_INIT_VAR(op, &PyBytes_Type, size);
150 op->ob_shash = -1;
151 Py_MEMCPY(op->ob_sval, str, size+1);
152 /* share short strings */
153 if (size == 0) {
154 nullstring = op;
155 Py_INCREF(op);
156 } else if (size == 1) {
157 characters[*str & UCHAR_MAX] = op;
158 Py_INCREF(op);
159 }
160 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000161}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000162
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000163PyObject *
164PyBytes_FromFormatV(const char *format, va_list vargs)
165{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 va_list count;
167 Py_ssize_t n = 0;
168 const char* f;
169 char *s;
170 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000171
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000172 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000173 /* step 1: figure out how large a buffer we need */
174 for (f = format; *f; f++) {
175 if (*f == '%') {
176 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000177 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000178 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 * they don't affect the amount of space we reserve.
182 */
183 if ((*f == 'l' || *f == 'z') &&
184 (f[1] == 'd' || f[1] == 'u'))
185 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 switch (*f) {
188 case 'c':
189 (void)va_arg(count, int);
190 /* fall through... */
191 case '%':
192 n++;
193 break;
194 case 'd': case 'u': case 'i': case 'x':
195 (void) va_arg(count, int);
196 /* 20 bytes is enough to hold a 64-bit
197 integer. Decimal takes the most space.
198 This isn't enough for octal. */
199 n += 20;
200 break;
201 case 's':
202 s = va_arg(count, char*);
203 n += strlen(s);
204 break;
205 case 'p':
206 (void) va_arg(count, int);
207 /* maximum 64-bit pointer representation:
208 * 0xffffffffffffffff
209 * so 19 characters is enough.
210 * XXX I count 18 -- what's the extra for?
211 */
212 n += 19;
213 break;
214 default:
215 /* if we stumble upon an unknown
216 formatting code, copy the rest of
217 the format string to the output
218 string. (we cannot just skip the
219 code, since there's no way to know
220 what's in the argument list) */
221 n += strlen(p);
222 goto expand;
223 }
224 } else
225 n++;
226 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000227 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 /* step 2: fill the buffer */
229 /* Since we've analyzed how much space we need for the worst case,
230 use sprintf directly instead of the slower PyOS_snprintf. */
231 string = PyBytes_FromStringAndSize(NULL, n);
232 if (!string)
233 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000236
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 for (f = format; *f; f++) {
238 if (*f == '%') {
239 const char* p = f++;
240 Py_ssize_t i;
241 int longflag = 0;
242 int size_tflag = 0;
243 /* parse the width.precision part (we're only
244 interested in the precision value, if any) */
245 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000246 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000247 n = (n*10) + *f++ - '0';
248 if (*f == '.') {
249 f++;
250 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000251 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 n = (n*10) + *f++ - '0';
253 }
David Malcolm96960882010-11-05 17:23:41 +0000254 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 f++;
256 /* handle the long flag, but only for %ld and %lu.
257 others can be added when necessary. */
258 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
259 longflag = 1;
260 ++f;
261 }
262 /* handle the size_t flag. */
263 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
264 size_tflag = 1;
265 ++f;
266 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000267
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000268 switch (*f) {
269 case 'c':
270 *s++ = va_arg(vargs, int);
271 break;
272 case 'd':
273 if (longflag)
274 sprintf(s, "%ld", va_arg(vargs, long));
275 else if (size_tflag)
276 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
278 else
279 sprintf(s, "%d", va_arg(vargs, int));
280 s += strlen(s);
281 break;
282 case 'u':
283 if (longflag)
284 sprintf(s, "%lu",
285 va_arg(vargs, unsigned long));
286 else if (size_tflag)
287 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
288 va_arg(vargs, size_t));
289 else
290 sprintf(s, "%u",
291 va_arg(vargs, unsigned int));
292 s += strlen(s);
293 break;
294 case 'i':
295 sprintf(s, "%i", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'x':
299 sprintf(s, "%x", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 's':
303 p = va_arg(vargs, char*);
304 i = strlen(p);
305 if (n > 0 && i > n)
306 i = n;
307 Py_MEMCPY(s, p, i);
308 s += i;
309 break;
310 case 'p':
311 sprintf(s, "%p", va_arg(vargs, void*));
312 /* %p is ill-defined: ensure leading 0x. */
313 if (s[1] == 'X')
314 s[1] = 'x';
315 else if (s[1] != 'x') {
316 memmove(s+2, s, strlen(s)+1);
317 s[0] = '0';
318 s[1] = 'x';
319 }
320 s += strlen(s);
321 break;
322 case '%':
323 *s++ = '%';
324 break;
325 default:
326 strcpy(s, p);
327 s += strlen(s);
328 goto end;
329 }
330 } else
331 *s++ = *f;
332 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000333
334 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000335 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
336 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000337}
338
339PyObject *
340PyBytes_FromFormat(const char *format, ...)
341{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000342 PyObject* ret;
343 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000344
345#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000347#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000349#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 ret = PyBytes_FromFormatV(format, vargs);
351 va_end(vargs);
352 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000353}
354
355static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000356bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000357{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000358 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000359}
360
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361/* Unescape a backslash-escaped string. If unicode is non-zero,
362 the string is a u-literal. If recode_encoding is non-zero,
363 the string is UTF-8 encoded and should be re-encoded in the
364 specified encoding. */
365
366PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 Py_ssize_t len,
368 const char *errors,
369 Py_ssize_t unicode,
370 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 int c;
373 char *p, *buf;
374 const char *end;
375 PyObject *v;
376 Py_ssize_t newlen = recode_encoding ? 4*len:len;
377 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
378 if (v == NULL)
379 return NULL;
380 p = buf = PyBytes_AsString(v);
381 end = s + len;
382 while (s < end) {
383 if (*s != '\\') {
384 non_esc:
385 if (recode_encoding && (*s & 0x80)) {
386 PyObject *u, *w;
387 char *r;
388 const char* t;
389 Py_ssize_t rn;
390 t = s;
391 /* Decode non-ASCII bytes as UTF-8. */
392 while (t < end && (*t & 0x80)) t++;
393 u = PyUnicode_DecodeUTF8(s, t - s, errors);
394 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 /* Recode them in target encoding. */
397 w = PyUnicode_AsEncodedString(
398 u, recode_encoding, errors);
399 Py_DECREF(u);
400 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000401
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402 /* Append bytes to output buffer. */
403 assert(PyBytes_Check(w));
404 r = PyBytes_AS_STRING(w);
405 rn = PyBytes_GET_SIZE(w);
406 Py_MEMCPY(p, r, rn);
407 p += rn;
408 Py_DECREF(w);
409 s = t;
410 } else {
411 *p++ = *s++;
412 }
413 continue;
414 }
415 s++;
416 if (s==end) {
417 PyErr_SetString(PyExc_ValueError,
418 "Trailing \\ in string");
419 goto failed;
420 }
421 switch (*s++) {
422 /* XXX This assumes ASCII! */
423 case '\n': break;
424 case '\\': *p++ = '\\'; break;
425 case '\'': *p++ = '\''; break;
426 case '\"': *p++ = '\"'; break;
427 case 'b': *p++ = '\b'; break;
428 case 'f': *p++ = '\014'; break; /* FF */
429 case 't': *p++ = '\t'; break;
430 case 'n': *p++ = '\n'; break;
431 case 'r': *p++ = '\r'; break;
432 case 'v': *p++ = '\013'; break; /* VT */
433 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
434 case '0': case '1': case '2': case '3':
435 case '4': case '5': case '6': case '7':
436 c = s[-1] - '0';
437 if (s < end && '0' <= *s && *s <= '7') {
438 c = (c<<3) + *s++ - '0';
439 if (s < end && '0' <= *s && *s <= '7')
440 c = (c<<3) + *s++ - '0';
441 }
442 *p++ = c;
443 break;
444 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000445 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000446 unsigned int x = 0;
447 c = Py_CHARMASK(*s);
448 s++;
David Malcolm96960882010-11-05 17:23:41 +0000449 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000451 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 x = 10 + c - 'a';
453 else
454 x = 10 + c - 'A';
455 x = x << 4;
456 c = Py_CHARMASK(*s);
457 s++;
David Malcolm96960882010-11-05 17:23:41 +0000458 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000460 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 x += 10 + c - 'a';
462 else
463 x += 10 + c - 'A';
464 *p++ = x;
465 break;
466 }
467 if (!errors || strcmp(errors, "strict") == 0) {
468 PyErr_SetString(PyExc_ValueError,
469 "invalid \\x escape");
470 goto failed;
471 }
472 if (strcmp(errors, "replace") == 0) {
473 *p++ = '?';
474 } else if (strcmp(errors, "ignore") == 0)
475 /* do nothing */;
476 else {
477 PyErr_Format(PyExc_ValueError,
478 "decoding error; unknown "
479 "error handling code: %.400s",
480 errors);
481 goto failed;
482 }
483 default:
484 *p++ = '\\';
485 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200486 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000487 UTF-8 bytes may follow. */
488 }
489 }
490 if (p-buf < newlen)
491 _PyBytes_Resize(&v, p - buf);
492 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000493 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000494 Py_DECREF(v);
495 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000496}
497
498/* -------------------------------------------------------------------- */
499/* object api */
500
501Py_ssize_t
502PyBytes_Size(register PyObject *op)
503{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000504 if (!PyBytes_Check(op)) {
505 PyErr_Format(PyExc_TypeError,
506 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
507 return -1;
508 }
509 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000510}
511
512char *
513PyBytes_AsString(register PyObject *op)
514{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000515 if (!PyBytes_Check(op)) {
516 PyErr_Format(PyExc_TypeError,
517 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
518 return NULL;
519 }
520 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000521}
522
523int
524PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000525 register char **s,
526 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000527{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000528 if (s == NULL) {
529 PyErr_BadInternalCall();
530 return -1;
531 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000532
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000533 if (!PyBytes_Check(obj)) {
534 PyErr_Format(PyExc_TypeError,
535 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
536 return -1;
537 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000538
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 *s = PyBytes_AS_STRING(obj);
540 if (len != NULL)
541 *len = PyBytes_GET_SIZE(obj);
542 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
543 PyErr_SetString(PyExc_TypeError,
544 "expected bytes with no null");
545 return -1;
546 }
547 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000548}
Neal Norwitz6968b052007-02-27 19:02:19 +0000549
550/* -------------------------------------------------------------------- */
551/* Methods */
552
Eric Smith0923d1d2009-04-16 20:16:10 +0000553#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000554
555#include "stringlib/fastsearch.h"
556#include "stringlib/count.h"
557#include "stringlib/find.h"
558#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000559#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000560#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000561
Eric Smith0f78bff2009-11-30 01:01:42 +0000562#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000563
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000564PyObject *
565PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000566{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000567 register PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200568 Py_ssize_t i, length = Py_SIZE(op);
569 size_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000570 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200571 unsigned char quote, *s, *p;
572
573 /* Compute size of output string */
574 squotes = dquotes = 0;
575 newsize = 3; /* b'' */
576 s = (unsigned char*)op->ob_sval;
577 for (i = 0; i < length; i++) {
578 switch(s[i]) {
579 case '\'': squotes++; newsize++; break;
580 case '"': dquotes++; newsize++; break;
581 case '\\': case '\t': case '\n': case '\r':
582 newsize += 2; break; /* \C */
583 default:
584 if (s[i] < ' ' || s[i] >= 0x7f)
585 newsize += 4; /* \xHH */
586 else
587 newsize++;
588 }
589 }
590 quote = '\'';
591 if (smartquotes && squotes && !dquotes)
592 quote = '"';
593 if (squotes && quote == '\'')
594 newsize += squotes;
Victor Stinner6430fd52011-09-29 04:02:13 +0200595
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200596 if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000597 PyErr_SetString(PyExc_OverflowError,
598 "bytes object is too large to make repr");
599 return NULL;
600 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200601
602 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000603 if (v == NULL) {
604 return NULL;
605 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200606 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000607
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200608 *p++ = 'b', *p++ = quote;
609 for (i = 0; i < length; i++) {
610 unsigned char c = op->ob_sval[i];
611 if (c == quote || c == '\\')
612 *p++ = '\\', *p++ = c;
613 else if (c == '\t')
614 *p++ = '\\', *p++ = 't';
615 else if (c == '\n')
616 *p++ = '\\', *p++ = 'n';
617 else if (c == '\r')
618 *p++ = '\\', *p++ = 'r';
619 else if (c < ' ' || c >= 0x7f) {
620 *p++ = '\\';
621 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200622 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
623 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000624 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200625 else
626 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000627 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200628 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +0200629 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200630 return v;
Neal Norwitz6968b052007-02-27 19:02:19 +0000631}
632
Neal Norwitz6968b052007-02-27 19:02:19 +0000633static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000634bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000635{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000636 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000637}
638
Neal Norwitz6968b052007-02-27 19:02:19 +0000639static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000640bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000641{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000642 if (Py_BytesWarningFlag) {
643 if (PyErr_WarnEx(PyExc_BytesWarning,
644 "str() on a bytes instance", 1))
645 return NULL;
646 }
647 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000648}
649
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000650static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000651bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000652{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000653 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000654}
Neal Norwitz6968b052007-02-27 19:02:19 +0000655
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000656/* This is also used by PyBytes_Concat() */
657static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000658bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000659{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000660 Py_ssize_t size;
661 Py_buffer va, vb;
662 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000663
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000664 va.len = -1;
665 vb.len = -1;
666 if (_getbuffer(a, &va) < 0 ||
667 _getbuffer(b, &vb) < 0) {
668 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
669 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
670 goto done;
671 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000672
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000673 /* Optimize end cases */
674 if (va.len == 0 && PyBytes_CheckExact(b)) {
675 result = b;
676 Py_INCREF(result);
677 goto done;
678 }
679 if (vb.len == 0 && PyBytes_CheckExact(a)) {
680 result = a;
681 Py_INCREF(result);
682 goto done;
683 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000684
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000685 size = va.len + vb.len;
686 if (size < 0) {
687 PyErr_NoMemory();
688 goto done;
689 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000691 result = PyBytes_FromStringAndSize(NULL, size);
692 if (result != NULL) {
693 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
694 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
695 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000696
697 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000698 if (va.len != -1)
699 PyBuffer_Release(&va);
700 if (vb.len != -1)
701 PyBuffer_Release(&vb);
702 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000703}
Neal Norwitz6968b052007-02-27 19:02:19 +0000704
705static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000706bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000707{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000708 register Py_ssize_t i;
709 register Py_ssize_t j;
710 register Py_ssize_t size;
711 register PyBytesObject *op;
712 size_t nbytes;
713 if (n < 0)
714 n = 0;
715 /* watch out for overflows: the size can overflow int,
716 * and the # of bytes needed can overflow size_t
717 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000718 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000719 PyErr_SetString(PyExc_OverflowError,
720 "repeated bytes are too long");
721 return NULL;
722 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000723 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000724 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
725 Py_INCREF(a);
726 return (PyObject *)a;
727 }
728 nbytes = (size_t)size;
729 if (nbytes + PyBytesObject_SIZE <= nbytes) {
730 PyErr_SetString(PyExc_OverflowError,
731 "repeated bytes are too long");
732 return NULL;
733 }
734 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
735 if (op == NULL)
736 return PyErr_NoMemory();
737 PyObject_INIT_VAR(op, &PyBytes_Type, size);
738 op->ob_shash = -1;
739 op->ob_sval[size] = '\0';
740 if (Py_SIZE(a) == 1 && n > 0) {
741 memset(op->ob_sval, a->ob_sval[0] , n);
742 return (PyObject *) op;
743 }
744 i = 0;
745 if (i < size) {
746 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
747 i = Py_SIZE(a);
748 }
749 while (i < size) {
750 j = (i <= size-i) ? i : size-i;
751 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
752 i += j;
753 }
754 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000755}
756
Guido van Rossum98297ee2007-11-06 21:34:58 +0000757static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000758bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000759{
760 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
761 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000762 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000763 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000764 PyErr_Clear();
765 if (_getbuffer(arg, &varg) < 0)
766 return -1;
767 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
768 varg.buf, varg.len, 0);
769 PyBuffer_Release(&varg);
770 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000771 }
772 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000773 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
774 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000775 }
776
Antoine Pitrou0010d372010-08-15 17:12:55 +0000777 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000778}
779
Neal Norwitz6968b052007-02-27 19:02:19 +0000780static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000781bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000782{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000783 if (i < 0 || i >= Py_SIZE(a)) {
784 PyErr_SetString(PyExc_IndexError, "index out of range");
785 return NULL;
786 }
787 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000788}
789
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000790static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000791bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000792{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000793 int c;
794 Py_ssize_t len_a, len_b;
795 Py_ssize_t min_len;
796 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000797
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000798 /* Make sure both arguments are strings. */
799 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
800 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
801 (PyObject_IsInstance((PyObject*)a,
802 (PyObject*)&PyUnicode_Type) ||
803 PyObject_IsInstance((PyObject*)b,
804 (PyObject*)&PyUnicode_Type))) {
805 if (PyErr_WarnEx(PyExc_BytesWarning,
806 "Comparison between bytes and string", 1))
807 return NULL;
808 }
809 result = Py_NotImplemented;
810 goto out;
811 }
812 if (a == b) {
813 switch (op) {
814 case Py_EQ:case Py_LE:case Py_GE:
815 result = Py_True;
816 goto out;
817 case Py_NE:case Py_LT:case Py_GT:
818 result = Py_False;
819 goto out;
820 }
821 }
822 if (op == Py_EQ) {
823 /* Supporting Py_NE here as well does not save
824 much time, since Py_NE is rarely used. */
825 if (Py_SIZE(a) == Py_SIZE(b)
826 && (a->ob_sval[0] == b->ob_sval[0]
827 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
828 result = Py_True;
829 } else {
830 result = Py_False;
831 }
832 goto out;
833 }
834 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
835 min_len = (len_a < len_b) ? len_a : len_b;
836 if (min_len > 0) {
837 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
838 if (c==0)
839 c = memcmp(a->ob_sval, b->ob_sval, min_len);
840 } else
841 c = 0;
842 if (c == 0)
843 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
844 switch (op) {
845 case Py_LT: c = c < 0; break;
846 case Py_LE: c = c <= 0; break;
847 case Py_EQ: assert(0); break; /* unreachable */
848 case Py_NE: c = c != 0; break;
849 case Py_GT: c = c > 0; break;
850 case Py_GE: c = c >= 0; break;
851 default:
852 result = Py_NotImplemented;
853 goto out;
854 }
855 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000856 out:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000857 Py_INCREF(result);
858 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000859}
860
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000861static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000862bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000863{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100864 if (a->ob_shash == -1) {
865 /* Can't fail */
866 a->ob_shash = _Py_HashBytes((unsigned char *) a->ob_sval, Py_SIZE(a));
867 }
868 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +0000869}
870
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000871static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000872bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000873{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000874 if (PyIndex_Check(item)) {
875 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
876 if (i == -1 && PyErr_Occurred())
877 return NULL;
878 if (i < 0)
879 i += PyBytes_GET_SIZE(self);
880 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
881 PyErr_SetString(PyExc_IndexError,
882 "index out of range");
883 return NULL;
884 }
885 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
886 }
887 else if (PySlice_Check(item)) {
888 Py_ssize_t start, stop, step, slicelength, cur, i;
889 char* source_buf;
890 char* result_buf;
891 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000892
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000893 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000894 PyBytes_GET_SIZE(self),
895 &start, &stop, &step, &slicelength) < 0) {
896 return NULL;
897 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000898
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000899 if (slicelength <= 0) {
900 return PyBytes_FromStringAndSize("", 0);
901 }
902 else if (start == 0 && step == 1 &&
903 slicelength == PyBytes_GET_SIZE(self) &&
904 PyBytes_CheckExact(self)) {
905 Py_INCREF(self);
906 return (PyObject *)self;
907 }
908 else if (step == 1) {
909 return PyBytes_FromStringAndSize(
910 PyBytes_AS_STRING(self) + start,
911 slicelength);
912 }
913 else {
914 source_buf = PyBytes_AS_STRING(self);
915 result = PyBytes_FromStringAndSize(NULL, slicelength);
916 if (result == NULL)
917 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000918
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000919 result_buf = PyBytes_AS_STRING(result);
920 for (cur = start, i = 0; i < slicelength;
921 cur += step, i++) {
922 result_buf[i] = source_buf[cur];
923 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000924
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000925 return result;
926 }
927 }
928 else {
929 PyErr_Format(PyExc_TypeError,
930 "byte indices must be integers, not %.200s",
931 Py_TYPE(item)->tp_name);
932 return NULL;
933 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000934}
935
936static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000937bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000938{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000939 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
940 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000941}
942
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000943static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000944 (lenfunc)bytes_length, /*sq_length*/
945 (binaryfunc)bytes_concat, /*sq_concat*/
946 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
947 (ssizeargfunc)bytes_item, /*sq_item*/
948 0, /*sq_slice*/
949 0, /*sq_ass_item*/
950 0, /*sq_ass_slice*/
951 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000952};
953
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000954static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000955 (lenfunc)bytes_length,
956 (binaryfunc)bytes_subscript,
957 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000958};
959
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000960static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000961 (getbufferproc)bytes_buffer_getbuffer,
962 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000963};
964
965
966#define LEFTSTRIP 0
967#define RIGHTSTRIP 1
968#define BOTHSTRIP 2
969
970/* Arrays indexed by above */
971static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
972
973#define STRIPNAME(i) (stripformat[i]+3)
974
Neal Norwitz6968b052007-02-27 19:02:19 +0000975PyDoc_STRVAR(split__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200976"B.split(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +0000977\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +0000978Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000979If sep is not specified or is None, B is split on ASCII whitespace\n\
980characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +0000981If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +0000982
983static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200984bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +0000985{
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200986 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000987 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
988 Py_ssize_t maxsplit = -1;
989 const char *s = PyBytes_AS_STRING(self), *sub;
990 Py_buffer vsub;
991 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +0000992
Ezio Melotticda6b6d2012-02-26 09:39:55 +0200993 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split",
994 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000995 return NULL;
996 if (maxsplit < 0)
997 maxsplit = PY_SSIZE_T_MAX;
998 if (subobj == Py_None)
999 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1000 if (_getbuffer(subobj, &vsub) < 0)
1001 return NULL;
1002 sub = vsub.buf;
1003 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001004
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001005 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1006 PyBuffer_Release(&vsub);
1007 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001008}
1009
Neal Norwitz6968b052007-02-27 19:02:19 +00001010PyDoc_STRVAR(partition__doc__,
1011"B.partition(sep) -> (head, sep, tail)\n\
1012\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001013Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001014the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001015found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001016
1017static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001018bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001019{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 const char *sep;
1021 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001022
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001023 if (PyBytes_Check(sep_obj)) {
1024 sep = PyBytes_AS_STRING(sep_obj);
1025 sep_len = PyBytes_GET_SIZE(sep_obj);
1026 }
1027 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1028 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001029
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001030 return stringlib_partition(
1031 (PyObject*) self,
1032 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1033 sep_obj, sep, sep_len
1034 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001035}
1036
1037PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001038"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001039\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001040Search for the separator sep in B, starting at the end of B,\n\
1041and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001042part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001043bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001044
1045static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001046bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001047{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001048 const char *sep;
1049 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001050
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001051 if (PyBytes_Check(sep_obj)) {
1052 sep = PyBytes_AS_STRING(sep_obj);
1053 sep_len = PyBytes_GET_SIZE(sep_obj);
1054 }
1055 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1056 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001057
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001058 return stringlib_rpartition(
1059 (PyObject*) self,
1060 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1061 sep_obj, sep, sep_len
1062 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001063}
1064
Neal Norwitz6968b052007-02-27 19:02:19 +00001065PyDoc_STRVAR(rsplit__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001066"B.rsplit(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001067\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001068Return a list of the sections in B, using sep as the delimiter,\n\
1069starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001070If sep is not given, B is split on ASCII whitespace characters\n\
1071(space, tab, return, newline, formfeed, vertical tab).\n\
1072If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001073
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001074
Neal Norwitz6968b052007-02-27 19:02:19 +00001075static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001076bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +00001077{
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001078 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001079 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1080 Py_ssize_t maxsplit = -1;
1081 const char *s = PyBytes_AS_STRING(self), *sub;
1082 Py_buffer vsub;
1083 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001084
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001085 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit",
1086 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001087 return NULL;
1088 if (maxsplit < 0)
1089 maxsplit = PY_SSIZE_T_MAX;
1090 if (subobj == Py_None)
1091 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1092 if (_getbuffer(subobj, &vsub) < 0)
1093 return NULL;
1094 sub = vsub.buf;
1095 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1098 PyBuffer_Release(&vsub);
1099 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001100}
1101
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001102
1103PyDoc_STRVAR(join__doc__,
1104"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001105\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001106Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001107Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1108
Neal Norwitz6968b052007-02-27 19:02:19 +00001109static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001110bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001111{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001112 char *sep = PyBytes_AS_STRING(self);
1113 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1114 PyObject *res = NULL;
1115 char *p;
1116 Py_ssize_t seqlen = 0;
1117 size_t sz = 0;
1118 Py_ssize_t i;
1119 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001120
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001121 seq = PySequence_Fast(orig, "");
1122 if (seq == NULL) {
1123 return NULL;
1124 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001125
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001126 seqlen = PySequence_Size(seq);
1127 if (seqlen == 0) {
1128 Py_DECREF(seq);
1129 return PyBytes_FromString("");
1130 }
1131 if (seqlen == 1) {
1132 item = PySequence_Fast_GET_ITEM(seq, 0);
1133 if (PyBytes_CheckExact(item)) {
1134 Py_INCREF(item);
1135 Py_DECREF(seq);
1136 return item;
1137 }
1138 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001139
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001140 /* There are at least two things to join, or else we have a subclass
1141 * of the builtin types in the sequence.
1142 * Do a pre-pass to figure out the total amount of space we'll
1143 * need (sz), and see whether all argument are bytes.
1144 */
1145 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1146 for (i = 0; i < seqlen; i++) {
1147 const size_t old_sz = sz;
1148 item = PySequence_Fast_GET_ITEM(seq, i);
1149 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1150 PyErr_Format(PyExc_TypeError,
1151 "sequence item %zd: expected bytes,"
1152 " %.80s found",
1153 i, Py_TYPE(item)->tp_name);
1154 Py_DECREF(seq);
1155 return NULL;
1156 }
1157 sz += Py_SIZE(item);
1158 if (i != 0)
1159 sz += seplen;
1160 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1161 PyErr_SetString(PyExc_OverflowError,
1162 "join() result is too long for bytes");
1163 Py_DECREF(seq);
1164 return NULL;
1165 }
1166 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001167
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001168 /* Allocate result space. */
1169 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1170 if (res == NULL) {
1171 Py_DECREF(seq);
1172 return NULL;
1173 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001174
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001175 /* Catenate everything. */
1176 /* I'm not worried about a PyByteArray item growing because there's
1177 nowhere in this function where we release the GIL. */
1178 p = PyBytes_AS_STRING(res);
1179 for (i = 0; i < seqlen; ++i) {
1180 size_t n;
1181 char *q;
1182 if (i) {
1183 Py_MEMCPY(p, sep, seplen);
1184 p += seplen;
1185 }
1186 item = PySequence_Fast_GET_ITEM(seq, i);
1187 n = Py_SIZE(item);
1188 if (PyBytes_Check(item))
1189 q = PyBytes_AS_STRING(item);
1190 else
1191 q = PyByteArray_AS_STRING(item);
1192 Py_MEMCPY(p, q, n);
1193 p += n;
1194 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001195
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001196 Py_DECREF(seq);
1197 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001198}
1199
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001200PyObject *
1201_PyBytes_Join(PyObject *sep, PyObject *x)
1202{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001203 assert(sep != NULL && PyBytes_Check(sep));
1204 assert(x != NULL);
1205 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001206}
1207
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001208/* helper macro to fixup start/end slice values */
1209#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001210 if (end > len) \
1211 end = len; \
1212 else if (end < 0) { \
1213 end += len; \
1214 if (end < 0) \
1215 end = 0; \
1216 } \
1217 if (start < 0) { \
1218 start += len; \
1219 if (start < 0) \
1220 start = 0; \
1221 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001222
1223Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001224bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001225{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001227 char byte;
1228 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001229 const char *sub;
1230 Py_ssize_t sub_len;
1231 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001232 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001233
Antoine Pitrouac65d962011-10-20 23:54:17 +02001234 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1235 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001236 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001237
Antoine Pitrouac65d962011-10-20 23:54:17 +02001238 if (subobj) {
1239 if (_getbuffer(subobj, &subbuf) < 0)
1240 return -2;
1241
1242 sub = subbuf.buf;
1243 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001244 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001245 else {
1246 sub = &byte;
1247 sub_len = 1;
1248 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001249
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 if (dir > 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001251 res = stringlib_find_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001252 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1253 sub, sub_len, start, end);
1254 else
Antoine Pitrouac65d962011-10-20 23:54:17 +02001255 res = stringlib_rfind_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001256 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1257 sub, sub_len, start, end);
Antoine Pitrouac65d962011-10-20 23:54:17 +02001258
1259 if (subobj)
1260 PyBuffer_Release(&subbuf);
1261
1262 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001263}
1264
1265
1266PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001267"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001268\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001269Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001270such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001271arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001272\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001273Return -1 on failure.");
1274
Neal Norwitz6968b052007-02-27 19:02:19 +00001275static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001276bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001277{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001278 Py_ssize_t result = bytes_find_internal(self, args, +1);
1279 if (result == -2)
1280 return NULL;
1281 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001282}
1283
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001284
1285PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001286"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001287\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001288Like B.find() but raise ValueError when the substring is not found.");
1289
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001290static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001291bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001292{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001293 Py_ssize_t result = bytes_find_internal(self, args, +1);
1294 if (result == -2)
1295 return NULL;
1296 if (result == -1) {
1297 PyErr_SetString(PyExc_ValueError,
1298 "substring not found");
1299 return NULL;
1300 }
1301 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001302}
1303
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001304
1305PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001306"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001307\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001308Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001309such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001310arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001311\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001312Return -1 on failure.");
1313
Neal Norwitz6968b052007-02-27 19:02:19 +00001314static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001315bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001316{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001317 Py_ssize_t result = bytes_find_internal(self, args, -1);
1318 if (result == -2)
1319 return NULL;
1320 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001321}
1322
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001323
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001324PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001325"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001326\n\
1327Like B.rfind() but raise ValueError when the substring is not found.");
1328
1329static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001330bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001331{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001332 Py_ssize_t result = bytes_find_internal(self, args, -1);
1333 if (result == -2)
1334 return NULL;
1335 if (result == -1) {
1336 PyErr_SetString(PyExc_ValueError,
1337 "substring not found");
1338 return NULL;
1339 }
1340 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001341}
1342
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001343
1344Py_LOCAL_INLINE(PyObject *)
1345do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001346{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001347 Py_buffer vsep;
1348 char *s = PyBytes_AS_STRING(self);
1349 Py_ssize_t len = PyBytes_GET_SIZE(self);
1350 char *sep;
1351 Py_ssize_t seplen;
1352 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001353
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001354 if (_getbuffer(sepobj, &vsep) < 0)
1355 return NULL;
1356 sep = vsep.buf;
1357 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001358
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001359 i = 0;
1360 if (striptype != RIGHTSTRIP) {
1361 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1362 i++;
1363 }
1364 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001365
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001366 j = len;
1367 if (striptype != LEFTSTRIP) {
1368 do {
1369 j--;
1370 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1371 j++;
1372 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001373
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001374 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001375
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001376 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1377 Py_INCREF(self);
1378 return (PyObject*)self;
1379 }
1380 else
1381 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001382}
1383
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001384
1385Py_LOCAL_INLINE(PyObject *)
1386do_strip(PyBytesObject *self, int striptype)
1387{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001388 char *s = PyBytes_AS_STRING(self);
1389 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001390
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001391 i = 0;
1392 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001393 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 i++;
1395 }
1396 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001397
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001398 j = len;
1399 if (striptype != LEFTSTRIP) {
1400 do {
1401 j--;
David Malcolm96960882010-11-05 17:23:41 +00001402 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001403 j++;
1404 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001405
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001406 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1407 Py_INCREF(self);
1408 return (PyObject*)self;
1409 }
1410 else
1411 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001412}
1413
1414
1415Py_LOCAL_INLINE(PyObject *)
1416do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1417{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001418 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001419
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001420 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1421 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001422
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001423 if (sep != NULL && sep != Py_None) {
1424 return do_xstrip(self, striptype, sep);
1425 }
1426 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001427}
1428
1429
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001430PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001431"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001432\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001433Strip leading and trailing bytes contained in the argument.\n\
Georg Brandlbeca27a2012-01-22 21:31:21 +01001434If the argument is omitted, strip leading and trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001435static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001436bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001437{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001438 if (PyTuple_GET_SIZE(args) == 0)
1439 return do_strip(self, BOTHSTRIP); /* Common case */
1440 else
1441 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001442}
1443
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001444
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001445PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001446"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001447\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001448Strip leading bytes contained in the argument.\n\
1449If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001450static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001451bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001452{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001453 if (PyTuple_GET_SIZE(args) == 0)
1454 return do_strip(self, LEFTSTRIP); /* Common case */
1455 else
1456 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001457}
1458
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001459
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001460PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001461"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001462\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001463Strip trailing bytes contained in the argument.\n\
1464If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001465static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001466bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001467{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001468 if (PyTuple_GET_SIZE(args) == 0)
1469 return do_strip(self, RIGHTSTRIP); /* Common case */
1470 else
1471 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001472}
Neal Norwitz6968b052007-02-27 19:02:19 +00001473
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001474
1475PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001476"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001477\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001478Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001479string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001480as in slice notation.");
1481
1482static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001483bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001484{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001485 PyObject *sub_obj;
1486 const char *str = PyBytes_AS_STRING(self), *sub;
1487 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001488 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001489 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001490
Antoine Pitrouac65d962011-10-20 23:54:17 +02001491 Py_buffer vsub;
1492 PyObject *count_obj;
1493
1494 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
1495 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001496 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001497
Antoine Pitrouac65d962011-10-20 23:54:17 +02001498 if (sub_obj) {
1499 if (_getbuffer(sub_obj, &vsub) < 0)
1500 return NULL;
1501
1502 sub = vsub.buf;
1503 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001504 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001505 else {
1506 sub = &byte;
1507 sub_len = 1;
1508 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001509
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001510 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001511
Antoine Pitrouac65d962011-10-20 23:54:17 +02001512 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001513 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1514 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02001515
1516 if (sub_obj)
1517 PyBuffer_Release(&vsub);
1518
1519 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001520}
1521
1522
1523PyDoc_STRVAR(translate__doc__,
1524"B.translate(table[, deletechars]) -> bytes\n\
1525\n\
1526Return a copy of B, where all characters occurring in the\n\
1527optional argument deletechars are removed, and the remaining\n\
1528characters have been mapped through the given translation\n\
1529table, which must be a bytes object of length 256.");
1530
1531static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001532bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001533{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001534 register char *input, *output;
1535 const char *table;
1536 register Py_ssize_t i, c, changed = 0;
1537 PyObject *input_obj = (PyObject*)self;
1538 const char *output_start, *del_table=NULL;
1539 Py_ssize_t inlen, tablen, dellen = 0;
1540 PyObject *result;
1541 int trans_table[256];
1542 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001543
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001544 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1545 &tableobj, &delobj))
1546 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001547
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001548 if (PyBytes_Check(tableobj)) {
1549 table = PyBytes_AS_STRING(tableobj);
1550 tablen = PyBytes_GET_SIZE(tableobj);
1551 }
1552 else if (tableobj == Py_None) {
1553 table = NULL;
1554 tablen = 256;
1555 }
1556 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1557 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001558
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001559 if (tablen != 256) {
1560 PyErr_SetString(PyExc_ValueError,
1561 "translation table must be 256 characters long");
1562 return NULL;
1563 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001564
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001565 if (delobj != NULL) {
1566 if (PyBytes_Check(delobj)) {
1567 del_table = PyBytes_AS_STRING(delobj);
1568 dellen = PyBytes_GET_SIZE(delobj);
1569 }
1570 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1571 return NULL;
1572 }
1573 else {
1574 del_table = NULL;
1575 dellen = 0;
1576 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001577
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001578 inlen = PyBytes_GET_SIZE(input_obj);
1579 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1580 if (result == NULL)
1581 return NULL;
1582 output_start = output = PyBytes_AsString(result);
1583 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001584
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001585 if (dellen == 0 && table != NULL) {
1586 /* If no deletions are required, use faster code */
1587 for (i = inlen; --i >= 0; ) {
1588 c = Py_CHARMASK(*input++);
1589 if (Py_CHARMASK((*output++ = table[c])) != c)
1590 changed = 1;
1591 }
1592 if (changed || !PyBytes_CheckExact(input_obj))
1593 return result;
1594 Py_DECREF(result);
1595 Py_INCREF(input_obj);
1596 return input_obj;
1597 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001598
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001599 if (table == NULL) {
1600 for (i = 0; i < 256; i++)
1601 trans_table[i] = Py_CHARMASK(i);
1602 } else {
1603 for (i = 0; i < 256; i++)
1604 trans_table[i] = Py_CHARMASK(table[i]);
1605 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001606
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001607 for (i = 0; i < dellen; i++)
1608 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001609
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001610 for (i = inlen; --i >= 0; ) {
1611 c = Py_CHARMASK(*input++);
1612 if (trans_table[c] != -1)
1613 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1614 continue;
1615 changed = 1;
1616 }
1617 if (!changed && PyBytes_CheckExact(input_obj)) {
1618 Py_DECREF(result);
1619 Py_INCREF(input_obj);
1620 return input_obj;
1621 }
1622 /* Fix the size of the resulting string */
1623 if (inlen > 0)
1624 _PyBytes_Resize(&result, output - output_start);
1625 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001626}
1627
1628
Georg Brandlabc38772009-04-12 15:51:51 +00001629static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001630bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001631{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001632 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001633}
1634
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001635/* find and count characters and substrings */
1636
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001637#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001638 ((char *)memchr((const void *)(target), c, target_len))
1639
1640/* String ops must return a string. */
1641/* If the object is subclass of string, create a copy */
1642Py_LOCAL(PyBytesObject *)
1643return_self(PyBytesObject *self)
1644{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001645 if (PyBytes_CheckExact(self)) {
1646 Py_INCREF(self);
1647 return self;
1648 }
1649 return (PyBytesObject *)PyBytes_FromStringAndSize(
1650 PyBytes_AS_STRING(self),
1651 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001652}
1653
1654Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001655countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001656{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001657 Py_ssize_t count=0;
1658 const char *start=target;
1659 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001660
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001661 while ( (start=findchar(start, end-start, c)) != NULL ) {
1662 count++;
1663 if (count >= maxcount)
1664 break;
1665 start += 1;
1666 }
1667 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001668}
1669
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001670
1671/* Algorithms for different cases of string replacement */
1672
1673/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1674Py_LOCAL(PyBytesObject *)
1675replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001676 const char *to_s, Py_ssize_t to_len,
1677 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001678{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001679 char *self_s, *result_s;
1680 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001681 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001682 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001684 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001685
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001686 /* 1 at the end plus 1 after every character;
1687 count = min(maxcount, self_len + 1) */
1688 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001689 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001690 else
1691 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1692 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001693
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001694 /* Check for overflow */
1695 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001696 assert(count > 0);
1697 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001698 PyErr_SetString(PyExc_OverflowError,
1699 "replacement bytes are too long");
1700 return NULL;
1701 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001702 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001703
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001704 if (! (result = (PyBytesObject *)
1705 PyBytes_FromStringAndSize(NULL, result_len)) )
1706 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001707
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001708 self_s = PyBytes_AS_STRING(self);
1709 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001710
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001711 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001712
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001713 /* Lay the first one down (guaranteed this will occur) */
1714 Py_MEMCPY(result_s, to_s, to_len);
1715 result_s += to_len;
1716 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001717
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001718 for (i=0; i<count; i++) {
1719 *result_s++ = *self_s++;
1720 Py_MEMCPY(result_s, to_s, to_len);
1721 result_s += to_len;
1722 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001723
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001724 /* Copy the rest of the original string */
1725 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001726
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001727 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001728}
1729
1730/* Special case for deleting a single character */
1731/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1732Py_LOCAL(PyBytesObject *)
1733replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001734 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001735{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001736 char *self_s, *result_s;
1737 char *start, *next, *end;
1738 Py_ssize_t self_len, result_len;
1739 Py_ssize_t count;
1740 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001741
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001742 self_len = PyBytes_GET_SIZE(self);
1743 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001744
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001745 count = countchar(self_s, self_len, from_c, maxcount);
1746 if (count == 0) {
1747 return return_self(self);
1748 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001749
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001750 result_len = self_len - count; /* from_len == 1 */
1751 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001752
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001753 if ( (result = (PyBytesObject *)
1754 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1755 return NULL;
1756 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001757
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001758 start = self_s;
1759 end = self_s + self_len;
1760 while (count-- > 0) {
1761 next = findchar(start, end-start, from_c);
1762 if (next == NULL)
1763 break;
1764 Py_MEMCPY(result_s, start, next-start);
1765 result_s += (next-start);
1766 start = next+1;
1767 }
1768 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001769
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001770 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001771}
1772
1773/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1774
1775Py_LOCAL(PyBytesObject *)
1776replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001777 const char *from_s, Py_ssize_t from_len,
1778 Py_ssize_t maxcount) {
1779 char *self_s, *result_s;
1780 char *start, *next, *end;
1781 Py_ssize_t self_len, result_len;
1782 Py_ssize_t count, offset;
1783 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001784
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001785 self_len = PyBytes_GET_SIZE(self);
1786 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001787
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001788 count = stringlib_count(self_s, self_len,
1789 from_s, from_len,
1790 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001791
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001792 if (count == 0) {
1793 /* no matches */
1794 return return_self(self);
1795 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001796
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001797 result_len = self_len - (count * from_len);
1798 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001799
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001800 if ( (result = (PyBytesObject *)
1801 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1802 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001803
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001804 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001805
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001806 start = self_s;
1807 end = self_s + self_len;
1808 while (count-- > 0) {
1809 offset = stringlib_find(start, end-start,
1810 from_s, from_len,
1811 0);
1812 if (offset == -1)
1813 break;
1814 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001815
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001816 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001817
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001818 result_s += (next-start);
1819 start = next+from_len;
1820 }
1821 Py_MEMCPY(result_s, start, end-start);
1822 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001823}
1824
1825/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1826Py_LOCAL(PyBytesObject *)
1827replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001828 char from_c, char to_c,
1829 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001830{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001831 char *self_s, *result_s, *start, *end, *next;
1832 Py_ssize_t self_len;
1833 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001834
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001835 /* The result string will be the same size */
1836 self_s = PyBytes_AS_STRING(self);
1837 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001838
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001839 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001840
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001841 if (next == NULL) {
1842 /* No matches; return the original string */
1843 return return_self(self);
1844 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001845
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001846 /* Need to make a new string */
1847 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1848 if (result == NULL)
1849 return NULL;
1850 result_s = PyBytes_AS_STRING(result);
1851 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001852
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001853 /* change everything in-place, starting with this one */
1854 start = result_s + (next-self_s);
1855 *start = to_c;
1856 start++;
1857 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001858
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001859 while (--maxcount > 0) {
1860 next = findchar(start, end-start, from_c);
1861 if (next == NULL)
1862 break;
1863 *next = to_c;
1864 start = next+1;
1865 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001866
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001867 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001868}
1869
1870/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1871Py_LOCAL(PyBytesObject *)
1872replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001873 const char *from_s, Py_ssize_t from_len,
1874 const char *to_s, Py_ssize_t to_len,
1875 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001876{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001877 char *result_s, *start, *end;
1878 char *self_s;
1879 Py_ssize_t self_len, offset;
1880 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001881
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001882 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001883
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001884 self_s = PyBytes_AS_STRING(self);
1885 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001886
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001887 offset = stringlib_find(self_s, self_len,
1888 from_s, from_len,
1889 0);
1890 if (offset == -1) {
1891 /* No matches; return the original string */
1892 return return_self(self);
1893 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001894
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001895 /* Need to make a new string */
1896 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1897 if (result == NULL)
1898 return NULL;
1899 result_s = PyBytes_AS_STRING(result);
1900 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001901
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001902 /* change everything in-place, starting with this one */
1903 start = result_s + offset;
1904 Py_MEMCPY(start, to_s, from_len);
1905 start += from_len;
1906 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001907
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001908 while ( --maxcount > 0) {
1909 offset = stringlib_find(start, end-start,
1910 from_s, from_len,
1911 0);
1912 if (offset==-1)
1913 break;
1914 Py_MEMCPY(start+offset, to_s, from_len);
1915 start += offset+from_len;
1916 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001917
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001918 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001919}
1920
1921/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1922Py_LOCAL(PyBytesObject *)
1923replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001924 char from_c,
1925 const char *to_s, Py_ssize_t to_len,
1926 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001927{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001928 char *self_s, *result_s;
1929 char *start, *next, *end;
1930 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001931 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001932 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001933
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001934 self_s = PyBytes_AS_STRING(self);
1935 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001936
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001937 count = countchar(self_s, self_len, from_c, maxcount);
1938 if (count == 0) {
1939 /* no matches, return unchanged */
1940 return return_self(self);
1941 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001942
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001943 /* use the difference between current and new, hence the "-1" */
1944 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001945 assert(count > 0);
1946 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001947 PyErr_SetString(PyExc_OverflowError,
1948 "replacement bytes are too long");
1949 return NULL;
1950 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001951 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001952
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001953 if ( (result = (PyBytesObject *)
1954 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1955 return NULL;
1956 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001957
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001958 start = self_s;
1959 end = self_s + self_len;
1960 while (count-- > 0) {
1961 next = findchar(start, end-start, from_c);
1962 if (next == NULL)
1963 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001964
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001965 if (next == start) {
1966 /* replace with the 'to' */
1967 Py_MEMCPY(result_s, to_s, to_len);
1968 result_s += to_len;
1969 start += 1;
1970 } else {
1971 /* copy the unchanged old then the 'to' */
1972 Py_MEMCPY(result_s, start, next-start);
1973 result_s += (next-start);
1974 Py_MEMCPY(result_s, to_s, to_len);
1975 result_s += to_len;
1976 start = next+1;
1977 }
1978 }
1979 /* Copy the remainder of the remaining string */
1980 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001981
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001982 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001983}
1984
1985/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1986Py_LOCAL(PyBytesObject *)
1987replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001988 const char *from_s, Py_ssize_t from_len,
1989 const char *to_s, Py_ssize_t to_len,
1990 Py_ssize_t maxcount) {
1991 char *self_s, *result_s;
1992 char *start, *next, *end;
1993 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001994 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001995 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001996
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001997 self_s = PyBytes_AS_STRING(self);
1998 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001999
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002000 count = stringlib_count(self_s, self_len,
2001 from_s, from_len,
2002 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002003
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002004 if (count == 0) {
2005 /* no matches, return unchanged */
2006 return return_self(self);
2007 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002008
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002009 /* Check for overflow */
2010 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002011 assert(count > 0);
2012 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002013 PyErr_SetString(PyExc_OverflowError,
2014 "replacement bytes are too long");
2015 return NULL;
2016 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002017 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002018
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002019 if ( (result = (PyBytesObject *)
2020 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2021 return NULL;
2022 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002023
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002024 start = self_s;
2025 end = self_s + self_len;
2026 while (count-- > 0) {
2027 offset = stringlib_find(start, end-start,
2028 from_s, from_len,
2029 0);
2030 if (offset == -1)
2031 break;
2032 next = start+offset;
2033 if (next == start) {
2034 /* replace with the 'to' */
2035 Py_MEMCPY(result_s, to_s, to_len);
2036 result_s += to_len;
2037 start += from_len;
2038 } else {
2039 /* copy the unchanged old then the 'to' */
2040 Py_MEMCPY(result_s, start, next-start);
2041 result_s += (next-start);
2042 Py_MEMCPY(result_s, to_s, to_len);
2043 result_s += to_len;
2044 start = next+from_len;
2045 }
2046 }
2047 /* Copy the remainder of the remaining string */
2048 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002049
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002050 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002051}
2052
2053
2054Py_LOCAL(PyBytesObject *)
2055replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002056 const char *from_s, Py_ssize_t from_len,
2057 const char *to_s, Py_ssize_t to_len,
2058 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002059{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002060 if (maxcount < 0) {
2061 maxcount = PY_SSIZE_T_MAX;
2062 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2063 /* nothing to do; return the original string */
2064 return return_self(self);
2065 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002066
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002067 if (maxcount == 0 ||
2068 (from_len == 0 && to_len == 0)) {
2069 /* nothing to do; return the original string */
2070 return return_self(self);
2071 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002072
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002073 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002074
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002075 if (from_len == 0) {
2076 /* insert the 'to' string everywhere. */
2077 /* >>> "Python".replace("", ".") */
2078 /* '.P.y.t.h.o.n.' */
2079 return replace_interleave(self, to_s, to_len, maxcount);
2080 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002082 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2083 /* point for an empty self string to generate a non-empty string */
2084 /* Special case so the remaining code always gets a non-empty string */
2085 if (PyBytes_GET_SIZE(self) == 0) {
2086 return return_self(self);
2087 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002088
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002089 if (to_len == 0) {
2090 /* delete all occurrences of 'from' string */
2091 if (from_len == 1) {
2092 return replace_delete_single_character(
2093 self, from_s[0], maxcount);
2094 } else {
2095 return replace_delete_substring(self, from_s,
2096 from_len, maxcount);
2097 }
2098 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002099
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002100 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002101
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002102 if (from_len == to_len) {
2103 if (from_len == 1) {
2104 return replace_single_character_in_place(
2105 self,
2106 from_s[0],
2107 to_s[0],
2108 maxcount);
2109 } else {
2110 return replace_substring_in_place(
2111 self, from_s, from_len, to_s, to_len,
2112 maxcount);
2113 }
2114 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002115
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002116 /* Otherwise use the more generic algorithms */
2117 if (from_len == 1) {
2118 return replace_single_character(self, from_s[0],
2119 to_s, to_len, maxcount);
2120 } else {
2121 /* len('from')>=2, len('to')>=1 */
2122 return replace_substring(self, from_s, from_len, to_s, to_len,
2123 maxcount);
2124 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002125}
2126
2127PyDoc_STRVAR(replace__doc__,
2128"B.replace(old, new[, count]) -> bytes\n\
2129\n\
2130Return a copy of B with all occurrences of subsection\n\
2131old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002132given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002133
2134static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002135bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002136{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002137 Py_ssize_t count = -1;
2138 PyObject *from, *to;
2139 const char *from_s, *to_s;
2140 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002141
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002142 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2143 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002145 if (PyBytes_Check(from)) {
2146 from_s = PyBytes_AS_STRING(from);
2147 from_len = PyBytes_GET_SIZE(from);
2148 }
2149 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2150 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002151
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002152 if (PyBytes_Check(to)) {
2153 to_s = PyBytes_AS_STRING(to);
2154 to_len = PyBytes_GET_SIZE(to);
2155 }
2156 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2157 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002158
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002159 return (PyObject *)replace((PyBytesObject *) self,
2160 from_s, from_len,
2161 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002162}
2163
2164/** End DALKE **/
2165
2166/* Matches the end (direction >= 0) or start (direction < 0) of self
2167 * against substr, using the start and end arguments. Returns
2168 * -1 on error, 0 if not found and 1 if found.
2169 */
2170Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002171_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002172 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002173{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002174 Py_ssize_t len = PyBytes_GET_SIZE(self);
2175 Py_ssize_t slen;
2176 const char* sub;
2177 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002178
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002179 if (PyBytes_Check(substr)) {
2180 sub = PyBytes_AS_STRING(substr);
2181 slen = PyBytes_GET_SIZE(substr);
2182 }
2183 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2184 return -1;
2185 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002187 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002188
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002189 if (direction < 0) {
2190 /* startswith */
2191 if (start+slen > len)
2192 return 0;
2193 } else {
2194 /* endswith */
2195 if (end-start < slen || start > len)
2196 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002197
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002198 if (end-slen > start)
2199 start = end - slen;
2200 }
2201 if (end-start >= slen)
2202 return ! memcmp(str+start, sub, slen);
2203 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002204}
2205
2206
2207PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002208"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002209\n\
2210Return True if B starts with the specified prefix, False otherwise.\n\
2211With optional start, test B beginning at that position.\n\
2212With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002213prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002214
2215static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002216bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002217{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002218 Py_ssize_t start = 0;
2219 Py_ssize_t end = PY_SSIZE_T_MAX;
2220 PyObject *subobj;
2221 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002222
Jesus Ceaac451502011-04-20 17:09:23 +02002223 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002224 return NULL;
2225 if (PyTuple_Check(subobj)) {
2226 Py_ssize_t i;
2227 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2228 result = _bytes_tailmatch(self,
2229 PyTuple_GET_ITEM(subobj, i),
2230 start, end, -1);
2231 if (result == -1)
2232 return NULL;
2233 else if (result) {
2234 Py_RETURN_TRUE;
2235 }
2236 }
2237 Py_RETURN_FALSE;
2238 }
2239 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002240 if (result == -1) {
2241 if (PyErr_ExceptionMatches(PyExc_TypeError))
2242 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2243 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002244 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002245 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002246 else
2247 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002248}
2249
2250
2251PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002252"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002253\n\
2254Return True if B ends with the specified suffix, False otherwise.\n\
2255With optional start, test B beginning at that position.\n\
2256With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002257suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002258
2259static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002260bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002261{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002262 Py_ssize_t start = 0;
2263 Py_ssize_t end = PY_SSIZE_T_MAX;
2264 PyObject *subobj;
2265 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002266
Jesus Ceaac451502011-04-20 17:09:23 +02002267 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002268 return NULL;
2269 if (PyTuple_Check(subobj)) {
2270 Py_ssize_t i;
2271 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2272 result = _bytes_tailmatch(self,
2273 PyTuple_GET_ITEM(subobj, i),
2274 start, end, +1);
2275 if (result == -1)
2276 return NULL;
2277 else if (result) {
2278 Py_RETURN_TRUE;
2279 }
2280 }
2281 Py_RETURN_FALSE;
2282 }
2283 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002284 if (result == -1) {
2285 if (PyErr_ExceptionMatches(PyExc_TypeError))
2286 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2287 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002288 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002289 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002290 else
2291 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002292}
2293
2294
2295PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002296"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002297\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002298Decode B using the codec registered for encoding. Default encoding\n\
2299is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002300handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2301a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002302as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002303able to handle UnicodeDecodeErrors.");
2304
2305static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002306bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002307{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002308 const char *encoding = NULL;
2309 const char *errors = NULL;
2310 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002311
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002312 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2313 return NULL;
2314 if (encoding == NULL)
2315 encoding = PyUnicode_GetDefaultEncoding();
2316 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002317}
2318
Guido van Rossum20188312006-05-05 15:15:40 +00002319
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002320PyDoc_STRVAR(splitlines__doc__,
2321"B.splitlines([keepends]) -> list of lines\n\
2322\n\
2323Return a list of the lines in B, breaking at line boundaries.\n\
2324Line breaks are not included in the resulting list unless keepends\n\
2325is given and true.");
2326
2327static PyObject*
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002328bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002329{
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002330 static char *kwlist[] = {"keepends", 0};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002331 int keepends = 0;
2332
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002333 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines",
2334 kwlist, &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002335 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002336
2337 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002338 (PyObject*) self, PyBytes_AS_STRING(self),
2339 PyBytes_GET_SIZE(self), keepends
2340 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002341}
2342
2343
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002344PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002345"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002346\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002347Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002348Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002349Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002350
2351static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002352hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002353{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002354 if (c >= 128)
2355 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002356 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002357 return c - '0';
2358 else {
David Malcolm96960882010-11-05 17:23:41 +00002359 if (Py_ISUPPER(c))
2360 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002361 if (c >= 'a' && c <= 'f')
2362 return c - 'a' + 10;
2363 }
2364 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002365}
2366
2367static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002368bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002369{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002370 PyObject *newstring, *hexobj;
2371 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002372 Py_ssize_t hexlen, byteslen, i, j;
2373 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002374 void *data;
2375 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002376
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002377 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2378 return NULL;
2379 assert(PyUnicode_Check(hexobj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002380 if (PyUnicode_READY(hexobj))
2381 return NULL;
2382 kind = PyUnicode_KIND(hexobj);
2383 data = PyUnicode_DATA(hexobj);
2384 hexlen = PyUnicode_GET_LENGTH(hexobj);
2385
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002386 byteslen = hexlen/2; /* This overestimates if there are spaces */
2387 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2388 if (!newstring)
2389 return NULL;
2390 buf = PyBytes_AS_STRING(newstring);
2391 for (i = j = 0; i < hexlen; i += 2) {
2392 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002393 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002394 i++;
2395 if (i >= hexlen)
2396 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002397 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
2398 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002399 if (top == -1 || bot == -1) {
2400 PyErr_Format(PyExc_ValueError,
2401 "non-hexadecimal number found in "
2402 "fromhex() arg at position %zd", i);
2403 goto error;
2404 }
2405 buf[j++] = (top << 4) + bot;
2406 }
2407 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2408 goto error;
2409 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002410
2411 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002412 Py_XDECREF(newstring);
2413 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002414}
2415
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002416PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002417"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002418
2419static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002420bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002421{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002422 Py_ssize_t res;
2423 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2424 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002425}
2426
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002427
2428static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002429bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002430{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002431 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002432}
2433
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002434
2435static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002436bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002437 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2438 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2439 _Py_capitalize__doc__},
2440 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2441 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2442 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2443 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2444 endswith__doc__},
2445 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2446 expandtabs__doc__},
2447 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2448 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2449 fromhex_doc},
2450 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2451 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2452 _Py_isalnum__doc__},
2453 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2454 _Py_isalpha__doc__},
2455 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2456 _Py_isdigit__doc__},
2457 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2458 _Py_islower__doc__},
2459 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2460 _Py_isspace__doc__},
2461 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2462 _Py_istitle__doc__},
2463 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2464 _Py_isupper__doc__},
2465 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2466 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2467 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2468 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2469 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2470 _Py_maketrans__doc__},
2471 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2472 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2473 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2474 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2475 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2476 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2477 rpartition__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002478 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002479 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002480 {"split", (PyCFunction)bytes_split, METH_VARARGS | METH_KEYWORDS, split__doc__},
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002481 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002482 splitlines__doc__},
2483 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2484 startswith__doc__},
2485 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2486 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2487 _Py_swapcase__doc__},
2488 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2489 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2490 translate__doc__},
2491 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2492 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2493 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2494 sizeof__doc__},
2495 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002496};
2497
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002498static PyObject *
2499str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2500
2501static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002502bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002503{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002504 PyObject *x = NULL;
2505 const char *encoding = NULL;
2506 const char *errors = NULL;
2507 PyObject *new = NULL;
2508 Py_ssize_t size;
2509 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002510
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002511 if (type != &PyBytes_Type)
2512 return str_subtype_new(type, args, kwds);
2513 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2514 &encoding, &errors))
2515 return NULL;
2516 if (x == NULL) {
2517 if (encoding != NULL || errors != NULL) {
2518 PyErr_SetString(PyExc_TypeError,
2519 "encoding or errors without sequence "
2520 "argument");
2521 return NULL;
2522 }
2523 return PyBytes_FromString("");
2524 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002525
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002526 if (PyUnicode_Check(x)) {
2527 /* Encode via the codec registry */
2528 if (encoding == NULL) {
2529 PyErr_SetString(PyExc_TypeError,
2530 "string argument without an encoding");
2531 return NULL;
2532 }
2533 new = PyUnicode_AsEncodedString(x, encoding, errors);
2534 if (new == NULL)
2535 return NULL;
2536 assert(PyBytes_Check(new));
2537 return new;
2538 }
2539 /* Is it an integer? */
2540 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2541 if (size == -1 && PyErr_Occurred()) {
2542 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2543 return NULL;
2544 PyErr_Clear();
2545 }
2546 else if (size < 0) {
2547 PyErr_SetString(PyExc_ValueError, "negative count");
2548 return NULL;
2549 }
2550 else {
2551 new = PyBytes_FromStringAndSize(NULL, size);
2552 if (new == NULL) {
2553 return NULL;
2554 }
2555 if (size > 0) {
2556 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2557 }
2558 return new;
2559 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002560
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002561 /* If it's not unicode, there can't be encoding or errors */
2562 if (encoding != NULL || errors != NULL) {
2563 PyErr_SetString(PyExc_TypeError,
2564 "encoding or errors without a string argument");
2565 return NULL;
2566 }
2567 return PyObject_Bytes(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002568}
2569
2570PyObject *
2571PyBytes_FromObject(PyObject *x)
2572{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002573 PyObject *new, *it;
2574 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002575
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002576 if (x == NULL) {
2577 PyErr_BadInternalCall();
2578 return NULL;
2579 }
Larry Hastingsca28e992012-05-24 22:58:30 -07002580
2581 if (PyBytes_CheckExact(x)) {
2582 Py_INCREF(x);
2583 return x;
2584 }
2585
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002586 /* Use the modern buffer interface */
2587 if (PyObject_CheckBuffer(x)) {
2588 Py_buffer view;
2589 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2590 return NULL;
2591 new = PyBytes_FromStringAndSize(NULL, view.len);
2592 if (!new)
2593 goto fail;
2594 /* XXX(brett.cannon): Better way to get to internal buffer? */
2595 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2596 &view, view.len, 'C') < 0)
2597 goto fail;
2598 PyBuffer_Release(&view);
2599 return new;
2600 fail:
2601 Py_XDECREF(new);
2602 PyBuffer_Release(&view);
2603 return NULL;
2604 }
2605 if (PyUnicode_Check(x)) {
2606 PyErr_SetString(PyExc_TypeError,
2607 "cannot convert unicode object to bytes");
2608 return NULL;
2609 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002610
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002611 if (PyList_CheckExact(x)) {
2612 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2613 if (new == NULL)
2614 return NULL;
2615 for (i = 0; i < Py_SIZE(x); i++) {
2616 Py_ssize_t value = PyNumber_AsSsize_t(
2617 PyList_GET_ITEM(x, i), PyExc_ValueError);
2618 if (value == -1 && PyErr_Occurred()) {
2619 Py_DECREF(new);
2620 return NULL;
2621 }
2622 if (value < 0 || value >= 256) {
2623 PyErr_SetString(PyExc_ValueError,
2624 "bytes must be in range(0, 256)");
2625 Py_DECREF(new);
2626 return NULL;
2627 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002628 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002629 }
2630 return new;
2631 }
2632 if (PyTuple_CheckExact(x)) {
2633 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2634 if (new == NULL)
2635 return NULL;
2636 for (i = 0; i < Py_SIZE(x); i++) {
2637 Py_ssize_t value = PyNumber_AsSsize_t(
2638 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2639 if (value == -1 && PyErr_Occurred()) {
2640 Py_DECREF(new);
2641 return NULL;
2642 }
2643 if (value < 0 || value >= 256) {
2644 PyErr_SetString(PyExc_ValueError,
2645 "bytes must be in range(0, 256)");
2646 Py_DECREF(new);
2647 return NULL;
2648 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002649 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002650 }
2651 return new;
2652 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002653
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002654 /* For iterator version, create a string object and resize as needed */
2655 size = _PyObject_LengthHint(x, 64);
2656 if (size == -1 && PyErr_Occurred())
2657 return NULL;
2658 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2659 returning a shared empty bytes string. This required because we
2660 want to call _PyBytes_Resize() the returned object, which we can
2661 only do on bytes objects with refcount == 1. */
2662 size += 1;
2663 new = PyBytes_FromStringAndSize(NULL, size);
2664 if (new == NULL)
2665 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002666
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002667 /* Get the iterator */
2668 it = PyObject_GetIter(x);
2669 if (it == NULL)
2670 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002672 /* Run the iterator to exhaustion */
2673 for (i = 0; ; i++) {
2674 PyObject *item;
2675 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002676
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002677 /* Get the next item */
2678 item = PyIter_Next(it);
2679 if (item == NULL) {
2680 if (PyErr_Occurred())
2681 goto error;
2682 break;
2683 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002684
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002685 /* Interpret it as an int (__index__) */
2686 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2687 Py_DECREF(item);
2688 if (value == -1 && PyErr_Occurred())
2689 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002691 /* Range check */
2692 if (value < 0 || value >= 256) {
2693 PyErr_SetString(PyExc_ValueError,
2694 "bytes must be in range(0, 256)");
2695 goto error;
2696 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002697
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002698 /* Append the byte */
2699 if (i >= size) {
2700 size = 2 * size + 1;
2701 if (_PyBytes_Resize(&new, size) < 0)
2702 goto error;
2703 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002704 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002705 }
2706 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002707
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002708 /* Clean up and return success */
2709 Py_DECREF(it);
2710 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002711
2712 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002713 /* Error handling when new != NULL */
2714 Py_XDECREF(it);
2715 Py_DECREF(new);
2716 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002717}
2718
2719static PyObject *
2720str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2721{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002722 PyObject *tmp, *pnew;
2723 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002724
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002725 assert(PyType_IsSubtype(type, &PyBytes_Type));
2726 tmp = bytes_new(&PyBytes_Type, args, kwds);
2727 if (tmp == NULL)
2728 return NULL;
2729 assert(PyBytes_CheckExact(tmp));
2730 n = PyBytes_GET_SIZE(tmp);
2731 pnew = type->tp_alloc(type, n);
2732 if (pnew != NULL) {
2733 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2734 PyBytes_AS_STRING(tmp), n+1);
2735 ((PyBytesObject *)pnew)->ob_shash =
2736 ((PyBytesObject *)tmp)->ob_shash;
2737 }
2738 Py_DECREF(tmp);
2739 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002740}
2741
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002742PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002743"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002744bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002745bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002746bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2747bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002748\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002749Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002750 - an iterable yielding integers in range(256)\n\
2751 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002752 - any object implementing the buffer API.\n\
2753 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002754
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002755static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002756
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002757PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002758 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2759 "bytes",
2760 PyBytesObject_SIZE,
2761 sizeof(char),
2762 bytes_dealloc, /* tp_dealloc */
2763 0, /* tp_print */
2764 0, /* tp_getattr */
2765 0, /* tp_setattr */
2766 0, /* tp_reserved */
2767 (reprfunc)bytes_repr, /* tp_repr */
2768 0, /* tp_as_number */
2769 &bytes_as_sequence, /* tp_as_sequence */
2770 &bytes_as_mapping, /* tp_as_mapping */
2771 (hashfunc)bytes_hash, /* tp_hash */
2772 0, /* tp_call */
2773 bytes_str, /* tp_str */
2774 PyObject_GenericGetAttr, /* tp_getattro */
2775 0, /* tp_setattro */
2776 &bytes_as_buffer, /* tp_as_buffer */
2777 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2778 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2779 bytes_doc, /* tp_doc */
2780 0, /* tp_traverse */
2781 0, /* tp_clear */
2782 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2783 0, /* tp_weaklistoffset */
2784 bytes_iter, /* tp_iter */
2785 0, /* tp_iternext */
2786 bytes_methods, /* tp_methods */
2787 0, /* tp_members */
2788 0, /* tp_getset */
2789 &PyBaseObject_Type, /* tp_base */
2790 0, /* tp_dict */
2791 0, /* tp_descr_get */
2792 0, /* tp_descr_set */
2793 0, /* tp_dictoffset */
2794 0, /* tp_init */
2795 0, /* tp_alloc */
2796 bytes_new, /* tp_new */
2797 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002798};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002799
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002800void
2801PyBytes_Concat(register PyObject **pv, register PyObject *w)
2802{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002803 register PyObject *v;
2804 assert(pv != NULL);
2805 if (*pv == NULL)
2806 return;
2807 if (w == NULL) {
2808 Py_DECREF(*pv);
2809 *pv = NULL;
2810 return;
2811 }
2812 v = bytes_concat(*pv, w);
2813 Py_DECREF(*pv);
2814 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002815}
2816
2817void
2818PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
2819{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002820 PyBytes_Concat(pv, w);
2821 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002822}
2823
2824
2825/* The following function breaks the notion that strings are immutable:
2826 it changes the size of a string. We get away with this only if there
2827 is only one module referencing the object. You can also think of it
2828 as creating a new string object and destroying the old one, only
2829 more efficiently. In any case, don't use this if the string may
2830 already be known to some other part of the code...
2831 Note that if there's not enough memory to resize the string, the original
2832 string object at *pv is deallocated, *pv is set to NULL, an "out of
2833 memory" exception is set, and -1 is returned. Else (on success) 0 is
2834 returned, and the value in *pv may or may not be the same as on input.
2835 As always, an extra byte is allocated for a trailing \0 byte (newsize
2836 does *not* include that), and a trailing \0 byte is stored.
2837*/
2838
2839int
2840_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2841{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002842 register PyObject *v;
2843 register PyBytesObject *sv;
2844 v = *pv;
2845 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2846 *pv = 0;
2847 Py_DECREF(v);
2848 PyErr_BadInternalCall();
2849 return -1;
2850 }
2851 /* XXX UNREF/NEWREF interface should be more symmetrical */
2852 _Py_DEC_REFTOTAL;
2853 _Py_ForgetReference(v);
2854 *pv = (PyObject *)
2855 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2856 if (*pv == NULL) {
2857 PyObject_Del(v);
2858 PyErr_NoMemory();
2859 return -1;
2860 }
2861 _Py_NewReference(*pv);
2862 sv = (PyBytesObject *) *pv;
2863 Py_SIZE(sv) = newsize;
2864 sv->ob_sval[newsize] = '\0';
2865 sv->ob_shash = -1; /* invalidate cached hash value */
2866 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002867}
2868
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002869void
2870PyBytes_Fini(void)
2871{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002872 int i;
2873 for (i = 0; i < UCHAR_MAX + 1; i++) {
2874 Py_XDECREF(characters[i]);
2875 characters[i] = NULL;
2876 }
2877 Py_XDECREF(nullstring);
2878 nullstring = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002879}
2880
Benjamin Peterson4116f362008-05-27 00:36:20 +00002881/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002882
2883typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002884 PyObject_HEAD
2885 Py_ssize_t it_index;
2886 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002887} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002888
2889static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002890striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002891{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002892 _PyObject_GC_UNTRACK(it);
2893 Py_XDECREF(it->it_seq);
2894 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002895}
2896
2897static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002898striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002899{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002900 Py_VISIT(it->it_seq);
2901 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002902}
2903
2904static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002905striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002906{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002907 PyBytesObject *seq;
2908 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002909
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002910 assert(it != NULL);
2911 seq = it->it_seq;
2912 if (seq == NULL)
2913 return NULL;
2914 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002915
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002916 if (it->it_index < PyBytes_GET_SIZE(seq)) {
2917 item = PyLong_FromLong(
2918 (unsigned char)seq->ob_sval[it->it_index]);
2919 if (item != NULL)
2920 ++it->it_index;
2921 return item;
2922 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002923
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002924 Py_DECREF(seq);
2925 it->it_seq = NULL;
2926 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002927}
2928
2929static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002930striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002931{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002932 Py_ssize_t len = 0;
2933 if (it->it_seq)
2934 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
2935 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002936}
2937
2938PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002939 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002940
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002941static PyObject *
2942striter_reduce(striterobject *it)
2943{
2944 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02002945 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002946 it->it_seq, it->it_index);
2947 } else {
2948 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
2949 if (u == NULL)
2950 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02002951 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002952 }
2953}
2954
2955PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2956
2957static PyObject *
2958striter_setstate(striterobject *it, PyObject *state)
2959{
2960 Py_ssize_t index = PyLong_AsSsize_t(state);
2961 if (index == -1 && PyErr_Occurred())
2962 return NULL;
2963 if (index < 0)
2964 index = 0;
2965 it->it_index = index;
2966 Py_RETURN_NONE;
2967}
2968
2969PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
2970
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002971static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002972 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
2973 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002974 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
2975 reduce_doc},
2976 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
2977 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002978 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002979};
2980
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002981PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002982 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2983 "bytes_iterator", /* tp_name */
2984 sizeof(striterobject), /* tp_basicsize */
2985 0, /* tp_itemsize */
2986 /* methods */
2987 (destructor)striter_dealloc, /* tp_dealloc */
2988 0, /* tp_print */
2989 0, /* tp_getattr */
2990 0, /* tp_setattr */
2991 0, /* tp_reserved */
2992 0, /* tp_repr */
2993 0, /* tp_as_number */
2994 0, /* tp_as_sequence */
2995 0, /* tp_as_mapping */
2996 0, /* tp_hash */
2997 0, /* tp_call */
2998 0, /* tp_str */
2999 PyObject_GenericGetAttr, /* tp_getattro */
3000 0, /* tp_setattro */
3001 0, /* tp_as_buffer */
3002 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3003 0, /* tp_doc */
3004 (traverseproc)striter_traverse, /* tp_traverse */
3005 0, /* tp_clear */
3006 0, /* tp_richcompare */
3007 0, /* tp_weaklistoffset */
3008 PyObject_SelfIter, /* tp_iter */
3009 (iternextfunc)striter_next, /* tp_iternext */
3010 striter_methods, /* tp_methods */
3011 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003012};
3013
3014static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003015bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003016{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003017 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003018
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003019 if (!PyBytes_Check(seq)) {
3020 PyErr_BadInternalCall();
3021 return NULL;
3022 }
3023 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3024 if (it == NULL)
3025 return NULL;
3026 it->it_index = 0;
3027 Py_INCREF(seq);
3028 it->it_seq = (PyBytesObject *)seq;
3029 _PyObject_GC_TRACK(it);
3030 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003031}