blob: 614978b395506edebd86b1444065abdbdd499a27 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Antoine Pitroucfc22b42012-10-16 21:07:23 +020013 PyBufferProcs *bufferprocs;
14 if (PyBytes_CheckExact(obj)) {
15 /* Fast path, e.g. for .join() of many bytes objects */
16 Py_INCREF(obj);
17 view->obj = obj;
18 view->buf = PyBytes_AS_STRING(obj);
19 view->len = PyBytes_GET_SIZE(obj);
20 return view->len;
21 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Antoine Pitroucfc22b42012-10-16 21:07:23 +020023 bufferprocs = Py_TYPE(obj)->tp_as_buffer;
24 if (bufferprocs == NULL || bufferprocs->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000025 {
Antoine Pitroud1188562010-06-09 16:38:55 +000026 PyErr_Format(PyExc_TypeError,
27 "Type %.100s doesn't support the buffer API",
28 Py_TYPE(obj)->tp_name);
29 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000030 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000031
Antoine Pitroucfc22b42012-10-16 21:07:23 +020032 if (bufferprocs->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000033 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000034 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000035}
36
Christian Heimes2c9c7a52008-05-26 13:42:13 +000037#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000038Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000039#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000040
Christian Heimes2c9c7a52008-05-26 13:42:13 +000041static PyBytesObject *characters[UCHAR_MAX + 1];
42static PyBytesObject *nullstring;
43
Mark Dickinsonfd24b322008-12-06 15:33:31 +000044/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
45 for a string of length n should request PyBytesObject_SIZE + n bytes.
46
47 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
48 3 bytes per string allocation on a typical system.
49*/
50#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
51
Christian Heimes2c9c7a52008-05-26 13:42:13 +000052/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000053 For PyBytes_FromString(), the parameter `str' points to a null-terminated
54 string containing exactly `size' bytes.
55
56 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
57 either NULL or else points to a string containing at least `size' bytes.
58 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
59 not have to be null-terminated. (Therefore it is safe to construct a
60 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
61 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
62 bytes (setting the last byte to the null terminating character) and you can
63 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000064 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000065 alter the data yourself, since the strings may be shared.
66
67 The PyObject member `op->ob_size', which denotes the number of "extra
68 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020069 allocated for string data, not counting the null terminating character.
70 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000071 PyBytes_FromStringAndSize()) or the length of the string in the `str'
72 parameter (for PyBytes_FromString()).
73*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000074PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000076{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020077 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 if (size < 0) {
79 PyErr_SetString(PyExc_SystemError,
80 "Negative size passed to PyBytes_FromStringAndSize");
81 return NULL;
82 }
83 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000084#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000086#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 Py_INCREF(op);
88 return (PyObject *)op;
89 }
90 if (size == 1 && str != NULL &&
91 (op = characters[*str & UCHAR_MAX]) != NULL)
92 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000093#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000095#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000096 Py_INCREF(op);
97 return (PyObject *)op;
98 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000099
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000100 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
101 PyErr_SetString(PyExc_OverflowError,
102 "byte string is too large");
103 return NULL;
104 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +0000105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 /* Inline PyObject_NewVar */
107 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
108 if (op == NULL)
109 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100110 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 op->ob_shash = -1;
112 if (str != NULL)
113 Py_MEMCPY(op->ob_sval, str, size);
114 op->ob_sval[size] = '\0';
115 /* share short strings */
116 if (size == 0) {
117 nullstring = op;
118 Py_INCREF(op);
119 } else if (size == 1 && str != NULL) {
120 characters[*str & UCHAR_MAX] = op;
121 Py_INCREF(op);
122 }
123 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000124}
125
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000126PyObject *
127PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000128{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200129 size_t size;
130 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000131
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 assert(str != NULL);
133 size = strlen(str);
134 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
135 PyErr_SetString(PyExc_OverflowError,
136 "byte string is too long");
137 return NULL;
138 }
139 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000140#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 Py_INCREF(op);
144 return (PyObject *)op;
145 }
146 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000147#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000148 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 Py_INCREF(op);
151 return (PyObject *)op;
152 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000153
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 /* Inline PyObject_NewVar */
155 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
156 if (op == NULL)
157 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100158 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000159 op->ob_shash = -1;
160 Py_MEMCPY(op->ob_sval, str, size+1);
161 /* share short strings */
162 if (size == 0) {
163 nullstring = op;
164 Py_INCREF(op);
165 } else if (size == 1) {
166 characters[*str & UCHAR_MAX] = op;
167 Py_INCREF(op);
168 }
169 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000170}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000171
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000172PyObject *
173PyBytes_FromFormatV(const char *format, va_list vargs)
174{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 va_list count;
176 Py_ssize_t n = 0;
177 const char* f;
178 char *s;
179 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000180
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000181 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 /* step 1: figure out how large a buffer we need */
183 for (f = format; *f; f++) {
184 if (*f == '%') {
185 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000186 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000188
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
190 * they don't affect the amount of space we reserve.
191 */
192 if ((*f == 'l' || *f == 'z') &&
193 (f[1] == 'd' || f[1] == 'u'))
194 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000195
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000196 switch (*f) {
197 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100198 {
199 int c = va_arg(count, int);
200 if (c < 0 || c > 255) {
201 PyErr_SetString(PyExc_OverflowError,
202 "PyBytes_FromFormatV(): %c format "
203 "expects an integer in range [0; 255]");
204 return NULL;
205 }
206 n++;
207 break;
208 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000209 case '%':
210 n++;
211 break;
212 case 'd': case 'u': case 'i': case 'x':
213 (void) va_arg(count, int);
214 /* 20 bytes is enough to hold a 64-bit
215 integer. Decimal takes the most space.
216 This isn't enough for octal. */
217 n += 20;
218 break;
219 case 's':
220 s = va_arg(count, char*);
221 n += strlen(s);
222 break;
223 case 'p':
224 (void) va_arg(count, int);
225 /* maximum 64-bit pointer representation:
226 * 0xffffffffffffffff
227 * so 19 characters is enough.
228 * XXX I count 18 -- what's the extra for?
229 */
230 n += 19;
231 break;
232 default:
233 /* if we stumble upon an unknown
234 formatting code, copy the rest of
235 the format string to the output
236 string. (we cannot just skip the
237 code, since there's no way to know
238 what's in the argument list) */
239 n += strlen(p);
240 goto expand;
241 }
242 } else
243 n++;
244 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000245 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000246 /* step 2: fill the buffer */
247 /* Since we've analyzed how much space we need for the worst case,
248 use sprintf directly instead of the slower PyOS_snprintf. */
249 string = PyBytes_FromStringAndSize(NULL, n);
250 if (!string)
251 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000252
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000253 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000254
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 for (f = format; *f; f++) {
256 if (*f == '%') {
257 const char* p = f++;
258 Py_ssize_t i;
259 int longflag = 0;
260 int size_tflag = 0;
261 /* parse the width.precision part (we're only
262 interested in the precision value, if any) */
263 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000264 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000265 n = (n*10) + *f++ - '0';
266 if (*f == '.') {
267 f++;
268 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000269 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000270 n = (n*10) + *f++ - '0';
271 }
David Malcolm96960882010-11-05 17:23:41 +0000272 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 f++;
274 /* handle the long flag, but only for %ld and %lu.
275 others can be added when necessary. */
276 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
277 longflag = 1;
278 ++f;
279 }
280 /* handle the size_t flag. */
281 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
282 size_tflag = 1;
283 ++f;
284 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000285
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000286 switch (*f) {
287 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100288 {
289 int c = va_arg(vargs, int);
290 /* c has been checked for overflow in the first step */
291 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000292 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100293 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000294 case 'd':
295 if (longflag)
296 sprintf(s, "%ld", va_arg(vargs, long));
297 else if (size_tflag)
298 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
299 va_arg(vargs, Py_ssize_t));
300 else
301 sprintf(s, "%d", va_arg(vargs, int));
302 s += strlen(s);
303 break;
304 case 'u':
305 if (longflag)
306 sprintf(s, "%lu",
307 va_arg(vargs, unsigned long));
308 else if (size_tflag)
309 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
310 va_arg(vargs, size_t));
311 else
312 sprintf(s, "%u",
313 va_arg(vargs, unsigned int));
314 s += strlen(s);
315 break;
316 case 'i':
317 sprintf(s, "%i", va_arg(vargs, int));
318 s += strlen(s);
319 break;
320 case 'x':
321 sprintf(s, "%x", va_arg(vargs, int));
322 s += strlen(s);
323 break;
324 case 's':
325 p = va_arg(vargs, char*);
326 i = strlen(p);
327 if (n > 0 && i > n)
328 i = n;
329 Py_MEMCPY(s, p, i);
330 s += i;
331 break;
332 case 'p':
333 sprintf(s, "%p", va_arg(vargs, void*));
334 /* %p is ill-defined: ensure leading 0x. */
335 if (s[1] == 'X')
336 s[1] = 'x';
337 else if (s[1] != 'x') {
338 memmove(s+2, s, strlen(s)+1);
339 s[0] = '0';
340 s[1] = 'x';
341 }
342 s += strlen(s);
343 break;
344 case '%':
345 *s++ = '%';
346 break;
347 default:
348 strcpy(s, p);
349 s += strlen(s);
350 goto end;
351 }
352 } else
353 *s++ = *f;
354 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000355
356 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000357 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
358 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000359}
360
361PyObject *
362PyBytes_FromFormat(const char *format, ...)
363{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000364 PyObject* ret;
365 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000366
367#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000368 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000369#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 ret = PyBytes_FromFormatV(format, vargs);
373 va_end(vargs);
374 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000375}
376
377static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000378bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000379{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000380 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000381}
382
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000383/* Unescape a backslash-escaped string. If unicode is non-zero,
384 the string is a u-literal. If recode_encoding is non-zero,
385 the string is UTF-8 encoded and should be re-encoded in the
386 specified encoding. */
387
388PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000389 Py_ssize_t len,
390 const char *errors,
391 Py_ssize_t unicode,
392 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000393{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000394 int c;
395 char *p, *buf;
396 const char *end;
397 PyObject *v;
398 Py_ssize_t newlen = recode_encoding ? 4*len:len;
399 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
400 if (v == NULL)
401 return NULL;
402 p = buf = PyBytes_AsString(v);
403 end = s + len;
404 while (s < end) {
405 if (*s != '\\') {
406 non_esc:
407 if (recode_encoding && (*s & 0x80)) {
408 PyObject *u, *w;
409 char *r;
410 const char* t;
411 Py_ssize_t rn;
412 t = s;
413 /* Decode non-ASCII bytes as UTF-8. */
414 while (t < end && (*t & 0x80)) t++;
415 u = PyUnicode_DecodeUTF8(s, t - s, errors);
416 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000417
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000418 /* Recode them in target encoding. */
419 w = PyUnicode_AsEncodedString(
420 u, recode_encoding, errors);
421 Py_DECREF(u);
422 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000423
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000424 /* Append bytes to output buffer. */
425 assert(PyBytes_Check(w));
426 r = PyBytes_AS_STRING(w);
427 rn = PyBytes_GET_SIZE(w);
428 Py_MEMCPY(p, r, rn);
429 p += rn;
430 Py_DECREF(w);
431 s = t;
432 } else {
433 *p++ = *s++;
434 }
435 continue;
436 }
437 s++;
438 if (s==end) {
439 PyErr_SetString(PyExc_ValueError,
440 "Trailing \\ in string");
441 goto failed;
442 }
443 switch (*s++) {
444 /* XXX This assumes ASCII! */
445 case '\n': break;
446 case '\\': *p++ = '\\'; break;
447 case '\'': *p++ = '\''; break;
448 case '\"': *p++ = '\"'; break;
449 case 'b': *p++ = '\b'; break;
450 case 'f': *p++ = '\014'; break; /* FF */
451 case 't': *p++ = '\t'; break;
452 case 'n': *p++ = '\n'; break;
453 case 'r': *p++ = '\r'; break;
454 case 'v': *p++ = '\013'; break; /* VT */
455 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
456 case '0': case '1': case '2': case '3':
457 case '4': case '5': case '6': case '7':
458 c = s[-1] - '0';
459 if (s < end && '0' <= *s && *s <= '7') {
460 c = (c<<3) + *s++ - '0';
461 if (s < end && '0' <= *s && *s <= '7')
462 c = (c<<3) + *s++ - '0';
463 }
464 *p++ = c;
465 break;
466 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000467 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000468 unsigned int x = 0;
469 c = Py_CHARMASK(*s);
470 s++;
David Malcolm96960882010-11-05 17:23:41 +0000471 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000472 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000473 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000474 x = 10 + c - 'a';
475 else
476 x = 10 + c - 'A';
477 x = x << 4;
478 c = Py_CHARMASK(*s);
479 s++;
David Malcolm96960882010-11-05 17:23:41 +0000480 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000481 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000482 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000483 x += 10 + c - 'a';
484 else
485 x += 10 + c - 'A';
486 *p++ = x;
487 break;
488 }
489 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200490 PyErr_Format(PyExc_ValueError,
491 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +0200492 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000493 goto failed;
494 }
495 if (strcmp(errors, "replace") == 0) {
496 *p++ = '?';
497 } else if (strcmp(errors, "ignore") == 0)
498 /* do nothing */;
499 else {
500 PyErr_Format(PyExc_ValueError,
501 "decoding error; unknown "
502 "error handling code: %.400s",
503 errors);
504 goto failed;
505 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +0200506 /* skip \x */
507 if (s < end && Py_ISXDIGIT(s[0]))
508 s++; /* and a hexdigit */
509 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000510 default:
511 *p++ = '\\';
512 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200513 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000514 UTF-8 bytes may follow. */
515 }
516 }
517 if (p-buf < newlen)
518 _PyBytes_Resize(&v, p - buf);
519 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000520 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000521 Py_DECREF(v);
522 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000523}
524
525/* -------------------------------------------------------------------- */
526/* object api */
527
528Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200529PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000530{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000531 if (!PyBytes_Check(op)) {
532 PyErr_Format(PyExc_TypeError,
533 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
534 return -1;
535 }
536 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000537}
538
539char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200540PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000541{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000542 if (!PyBytes_Check(op)) {
543 PyErr_Format(PyExc_TypeError,
544 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
545 return NULL;
546 }
547 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000548}
549
550int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200551PyBytes_AsStringAndSize(PyObject *obj,
552 char **s,
553 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000554{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000555 if (s == NULL) {
556 PyErr_BadInternalCall();
557 return -1;
558 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000559
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000560 if (!PyBytes_Check(obj)) {
561 PyErr_Format(PyExc_TypeError,
562 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
563 return -1;
564 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000565
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000566 *s = PyBytes_AS_STRING(obj);
567 if (len != NULL)
568 *len = PyBytes_GET_SIZE(obj);
569 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
570 PyErr_SetString(PyExc_TypeError,
571 "expected bytes with no null");
572 return -1;
573 }
574 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000575}
Neal Norwitz6968b052007-02-27 19:02:19 +0000576
577/* -------------------------------------------------------------------- */
578/* Methods */
579
Eric Smith0923d1d2009-04-16 20:16:10 +0000580#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000581
582#include "stringlib/fastsearch.h"
583#include "stringlib/count.h"
584#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +0200585#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000586#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000587#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000588#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000589
Eric Smith0f78bff2009-11-30 01:01:42 +0000590#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000591
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000592PyObject *
593PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000594{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200595 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200596 Py_ssize_t i, length = Py_SIZE(op);
597 size_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000598 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200599 unsigned char quote, *s, *p;
600
601 /* Compute size of output string */
602 squotes = dquotes = 0;
603 newsize = 3; /* b'' */
604 s = (unsigned char*)op->ob_sval;
605 for (i = 0; i < length; i++) {
606 switch(s[i]) {
607 case '\'': squotes++; newsize++; break;
608 case '"': dquotes++; newsize++; break;
609 case '\\': case '\t': case '\n': case '\r':
610 newsize += 2; break; /* \C */
611 default:
612 if (s[i] < ' ' || s[i] >= 0x7f)
613 newsize += 4; /* \xHH */
614 else
615 newsize++;
616 }
617 }
618 quote = '\'';
619 if (smartquotes && squotes && !dquotes)
620 quote = '"';
621 if (squotes && quote == '\'')
622 newsize += squotes;
Victor Stinner6430fd52011-09-29 04:02:13 +0200623
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200624 if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000625 PyErr_SetString(PyExc_OverflowError,
626 "bytes object is too large to make repr");
627 return NULL;
628 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200629
630 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000631 if (v == NULL) {
632 return NULL;
633 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200634 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000635
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200636 *p++ = 'b', *p++ = quote;
637 for (i = 0; i < length; i++) {
638 unsigned char c = op->ob_sval[i];
639 if (c == quote || c == '\\')
640 *p++ = '\\', *p++ = c;
641 else if (c == '\t')
642 *p++ = '\\', *p++ = 't';
643 else if (c == '\n')
644 *p++ = '\\', *p++ = 'n';
645 else if (c == '\r')
646 *p++ = '\\', *p++ = 'r';
647 else if (c < ' ' || c >= 0x7f) {
648 *p++ = '\\';
649 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200650 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
651 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000652 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200653 else
654 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000655 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200656 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +0200657 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200658 return v;
Neal Norwitz6968b052007-02-27 19:02:19 +0000659}
660
Neal Norwitz6968b052007-02-27 19:02:19 +0000661static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000662bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000663{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000664 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000665}
666
Neal Norwitz6968b052007-02-27 19:02:19 +0000667static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000668bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000669{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000670 if (Py_BytesWarningFlag) {
671 if (PyErr_WarnEx(PyExc_BytesWarning,
672 "str() on a bytes instance", 1))
673 return NULL;
674 }
675 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000676}
677
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000678static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000679bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000680{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000681 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000682}
Neal Norwitz6968b052007-02-27 19:02:19 +0000683
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000684/* This is also used by PyBytes_Concat() */
685static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000686bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000687{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000688 Py_ssize_t size;
689 Py_buffer va, vb;
690 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000691
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000692 va.len = -1;
693 vb.len = -1;
694 if (_getbuffer(a, &va) < 0 ||
695 _getbuffer(b, &vb) < 0) {
696 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
697 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
698 goto done;
699 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000700
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000701 /* Optimize end cases */
702 if (va.len == 0 && PyBytes_CheckExact(b)) {
703 result = b;
704 Py_INCREF(result);
705 goto done;
706 }
707 if (vb.len == 0 && PyBytes_CheckExact(a)) {
708 result = a;
709 Py_INCREF(result);
710 goto done;
711 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000712
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000713 size = va.len + vb.len;
714 if (size < 0) {
715 PyErr_NoMemory();
716 goto done;
717 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000718
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000719 result = PyBytes_FromStringAndSize(NULL, size);
720 if (result != NULL) {
721 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
722 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
723 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000724
725 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000726 if (va.len != -1)
727 PyBuffer_Release(&va);
728 if (vb.len != -1)
729 PyBuffer_Release(&vb);
730 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000731}
Neal Norwitz6968b052007-02-27 19:02:19 +0000732
733static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200734bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000735{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200736 Py_ssize_t i;
737 Py_ssize_t j;
738 Py_ssize_t size;
739 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000740 size_t nbytes;
741 if (n < 0)
742 n = 0;
743 /* watch out for overflows: the size can overflow int,
744 * and the # of bytes needed can overflow size_t
745 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000746 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000747 PyErr_SetString(PyExc_OverflowError,
748 "repeated bytes are too long");
749 return NULL;
750 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000751 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000752 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
753 Py_INCREF(a);
754 return (PyObject *)a;
755 }
756 nbytes = (size_t)size;
757 if (nbytes + PyBytesObject_SIZE <= nbytes) {
758 PyErr_SetString(PyExc_OverflowError,
759 "repeated bytes are too long");
760 return NULL;
761 }
762 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
763 if (op == NULL)
764 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100765 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000766 op->ob_shash = -1;
767 op->ob_sval[size] = '\0';
768 if (Py_SIZE(a) == 1 && n > 0) {
769 memset(op->ob_sval, a->ob_sval[0] , n);
770 return (PyObject *) op;
771 }
772 i = 0;
773 if (i < size) {
774 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
775 i = Py_SIZE(a);
776 }
777 while (i < size) {
778 j = (i <= size-i) ? i : size-i;
779 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
780 i += j;
781 }
782 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000783}
784
Guido van Rossum98297ee2007-11-06 21:34:58 +0000785static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000786bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000787{
788 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
789 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000790 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000791 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000792 PyErr_Clear();
793 if (_getbuffer(arg, &varg) < 0)
794 return -1;
795 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
796 varg.buf, varg.len, 0);
797 PyBuffer_Release(&varg);
798 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000799 }
800 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000801 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
802 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000803 }
804
Antoine Pitrou0010d372010-08-15 17:12:55 +0000805 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000806}
807
Neal Norwitz6968b052007-02-27 19:02:19 +0000808static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200809bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000810{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000811 if (i < 0 || i >= Py_SIZE(a)) {
812 PyErr_SetString(PyExc_IndexError, "index out of range");
813 return NULL;
814 }
815 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000816}
817
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100818Py_LOCAL(int)
819bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
820{
821 int cmp;
822 Py_ssize_t len;
823
824 len = Py_SIZE(a);
825 if (Py_SIZE(b) != len)
826 return 0;
827
828 if (a->ob_sval[0] != b->ob_sval[0])
829 return 0;
830
831 cmp = memcmp(a->ob_sval, b->ob_sval, len);
832 return (cmp == 0);
833}
834
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000835static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000836bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000837{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000838 int c;
839 Py_ssize_t len_a, len_b;
840 Py_ssize_t min_len;
841 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000842
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000843 /* Make sure both arguments are strings. */
844 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
845 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
846 (PyObject_IsInstance((PyObject*)a,
847 (PyObject*)&PyUnicode_Type) ||
848 PyObject_IsInstance((PyObject*)b,
849 (PyObject*)&PyUnicode_Type))) {
850 if (PyErr_WarnEx(PyExc_BytesWarning,
851 "Comparison between bytes and string", 1))
852 return NULL;
853 }
854 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000855 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100856 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000857 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100858 case Py_EQ:
859 case Py_LE:
860 case Py_GE:
861 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000862 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100863 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100864 case Py_NE:
865 case Py_LT:
866 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000867 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100868 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100869 default:
870 PyErr_BadArgument();
871 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000872 }
873 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100874 else if (op == Py_EQ || op == Py_NE) {
875 int eq = bytes_compare_eq(a, b);
876 eq ^= (op == Py_NE);
877 result = eq ? Py_True : Py_False;
878 }
879 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100880 len_a = Py_SIZE(a);
881 len_b = Py_SIZE(b);
882 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100883 if (min_len > 0) {
884 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100885 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100886 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000887 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100888 else
889 c = 0;
890 if (c == 0)
891 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
892 switch (op) {
893 case Py_LT: c = c < 0; break;
894 case Py_LE: c = c <= 0; break;
895 case Py_GT: c = c > 0; break;
896 case Py_GE: c = c >= 0; break;
897 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100898 PyErr_BadArgument();
899 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100900 }
901 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000902 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100903
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000904 Py_INCREF(result);
905 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000906}
907
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000908static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000909bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000910{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100911 if (a->ob_shash == -1) {
912 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +0100913 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100914 }
915 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +0000916}
917
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000918static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000919bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000920{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000921 if (PyIndex_Check(item)) {
922 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
923 if (i == -1 && PyErr_Occurred())
924 return NULL;
925 if (i < 0)
926 i += PyBytes_GET_SIZE(self);
927 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
928 PyErr_SetString(PyExc_IndexError,
929 "index out of range");
930 return NULL;
931 }
932 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
933 }
934 else if (PySlice_Check(item)) {
935 Py_ssize_t start, stop, step, slicelength, cur, i;
936 char* source_buf;
937 char* result_buf;
938 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000939
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000940 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000941 PyBytes_GET_SIZE(self),
942 &start, &stop, &step, &slicelength) < 0) {
943 return NULL;
944 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000945
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000946 if (slicelength <= 0) {
947 return PyBytes_FromStringAndSize("", 0);
948 }
949 else if (start == 0 && step == 1 &&
950 slicelength == PyBytes_GET_SIZE(self) &&
951 PyBytes_CheckExact(self)) {
952 Py_INCREF(self);
953 return (PyObject *)self;
954 }
955 else if (step == 1) {
956 return PyBytes_FromStringAndSize(
957 PyBytes_AS_STRING(self) + start,
958 slicelength);
959 }
960 else {
961 source_buf = PyBytes_AS_STRING(self);
962 result = PyBytes_FromStringAndSize(NULL, slicelength);
963 if (result == NULL)
964 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000965
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000966 result_buf = PyBytes_AS_STRING(result);
967 for (cur = start, i = 0; i < slicelength;
968 cur += step, i++) {
969 result_buf[i] = source_buf[cur];
970 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000971
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000972 return result;
973 }
974 }
975 else {
976 PyErr_Format(PyExc_TypeError,
977 "byte indices must be integers, not %.200s",
978 Py_TYPE(item)->tp_name);
979 return NULL;
980 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000981}
982
983static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000984bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000985{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000986 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
987 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000988}
989
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000990static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000991 (lenfunc)bytes_length, /*sq_length*/
992 (binaryfunc)bytes_concat, /*sq_concat*/
993 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
994 (ssizeargfunc)bytes_item, /*sq_item*/
995 0, /*sq_slice*/
996 0, /*sq_ass_item*/
997 0, /*sq_ass_slice*/
998 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000999};
1000
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001001static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001002 (lenfunc)bytes_length,
1003 (binaryfunc)bytes_subscript,
1004 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001005};
1006
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001007static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001008 (getbufferproc)bytes_buffer_getbuffer,
1009 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001010};
1011
1012
1013#define LEFTSTRIP 0
1014#define RIGHTSTRIP 1
1015#define BOTHSTRIP 2
1016
1017/* Arrays indexed by above */
1018static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1019
1020#define STRIPNAME(i) (stripformat[i]+3)
1021
Neal Norwitz6968b052007-02-27 19:02:19 +00001022PyDoc_STRVAR(split__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001023"B.split(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001024\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001025Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001026If sep is not specified or is None, B is split on ASCII whitespace\n\
1027characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001028If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001029
1030static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001031bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +00001032{
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001033 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001034 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1035 Py_ssize_t maxsplit = -1;
1036 const char *s = PyBytes_AS_STRING(self), *sub;
1037 Py_buffer vsub;
1038 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001039
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001040 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split",
1041 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001042 return NULL;
1043 if (maxsplit < 0)
1044 maxsplit = PY_SSIZE_T_MAX;
1045 if (subobj == Py_None)
1046 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1047 if (_getbuffer(subobj, &vsub) < 0)
1048 return NULL;
1049 sub = vsub.buf;
1050 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001051
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001052 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1053 PyBuffer_Release(&vsub);
1054 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001055}
1056
Neal Norwitz6968b052007-02-27 19:02:19 +00001057PyDoc_STRVAR(partition__doc__,
1058"B.partition(sep) -> (head, sep, tail)\n\
1059\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001060Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001061the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001062found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001063
1064static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001065bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001066{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001067 const char *sep;
1068 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001069
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001070 if (PyBytes_Check(sep_obj)) {
1071 sep = PyBytes_AS_STRING(sep_obj);
1072 sep_len = PyBytes_GET_SIZE(sep_obj);
1073 }
1074 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1075 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001077 return stringlib_partition(
1078 (PyObject*) self,
1079 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1080 sep_obj, sep, sep_len
1081 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001082}
1083
1084PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001085"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001086\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001087Search for the separator sep in B, starting at the end of B,\n\
1088and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001089part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001090bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001091
1092static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001093bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001094{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001095 const char *sep;
1096 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001097
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001098 if (PyBytes_Check(sep_obj)) {
1099 sep = PyBytes_AS_STRING(sep_obj);
1100 sep_len = PyBytes_GET_SIZE(sep_obj);
1101 }
1102 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1103 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001104
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001105 return stringlib_rpartition(
1106 (PyObject*) self,
1107 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1108 sep_obj, sep, sep_len
1109 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001110}
1111
Neal Norwitz6968b052007-02-27 19:02:19 +00001112PyDoc_STRVAR(rsplit__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001113"B.rsplit(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001114\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001115Return a list of the sections in B, using sep as the delimiter,\n\
1116starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001117If sep is not given, B is split on ASCII whitespace characters\n\
1118(space, tab, return, newline, formfeed, vertical tab).\n\
1119If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001120
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001121
Neal Norwitz6968b052007-02-27 19:02:19 +00001122static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001123bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +00001124{
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001125 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001126 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1127 Py_ssize_t maxsplit = -1;
1128 const char *s = PyBytes_AS_STRING(self), *sub;
1129 Py_buffer vsub;
1130 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001131
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001132 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit",
1133 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001134 return NULL;
1135 if (maxsplit < 0)
1136 maxsplit = PY_SSIZE_T_MAX;
1137 if (subobj == Py_None)
1138 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1139 if (_getbuffer(subobj, &vsub) < 0)
1140 return NULL;
1141 sub = vsub.buf;
1142 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001143
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001144 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1145 PyBuffer_Release(&vsub);
1146 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001147}
1148
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001149
1150PyDoc_STRVAR(join__doc__,
1151"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001152\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001153Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001154Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1155
Neal Norwitz6968b052007-02-27 19:02:19 +00001156static PyObject *
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001157bytes_join(PyObject *self, PyObject *iterable)
Neal Norwitz6968b052007-02-27 19:02:19 +00001158{
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001159 return stringlib_bytes_join(self, iterable);
Neal Norwitz6968b052007-02-27 19:02:19 +00001160}
1161
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001162PyObject *
1163_PyBytes_Join(PyObject *sep, PyObject *x)
1164{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001165 assert(sep != NULL && PyBytes_Check(sep));
1166 assert(x != NULL);
1167 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001168}
1169
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001170/* helper macro to fixup start/end slice values */
1171#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001172 if (end > len) \
1173 end = len; \
1174 else if (end < 0) { \
1175 end += len; \
1176 if (end < 0) \
1177 end = 0; \
1178 } \
1179 if (start < 0) { \
1180 start += len; \
1181 if (start < 0) \
1182 start = 0; \
1183 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001184
1185Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001186bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001187{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001188 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001189 char byte;
1190 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001191 const char *sub;
1192 Py_ssize_t sub_len;
1193 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001194 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001195
Antoine Pitrouac65d962011-10-20 23:54:17 +02001196 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1197 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001198 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001199
Antoine Pitrouac65d962011-10-20 23:54:17 +02001200 if (subobj) {
1201 if (_getbuffer(subobj, &subbuf) < 0)
1202 return -2;
1203
1204 sub = subbuf.buf;
1205 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001206 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001207 else {
1208 sub = &byte;
1209 sub_len = 1;
1210 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001211
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001212 if (dir > 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001213 res = stringlib_find_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001214 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1215 sub, sub_len, start, end);
1216 else
Antoine Pitrouac65d962011-10-20 23:54:17 +02001217 res = stringlib_rfind_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001218 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1219 sub, sub_len, start, end);
Antoine Pitrouac65d962011-10-20 23:54:17 +02001220
1221 if (subobj)
1222 PyBuffer_Release(&subbuf);
1223
1224 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001225}
1226
1227
1228PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001229"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001230\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001231Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001232such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001233arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001234\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001235Return -1 on failure.");
1236
Neal Norwitz6968b052007-02-27 19:02:19 +00001237static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001238bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001239{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001240 Py_ssize_t result = bytes_find_internal(self, args, +1);
1241 if (result == -2)
1242 return NULL;
1243 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001244}
1245
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001246
1247PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001248"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001249\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001250Like B.find() but raise ValueError when the substring is not found.");
1251
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001252static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001253bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001254{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001255 Py_ssize_t result = bytes_find_internal(self, args, +1);
1256 if (result == -2)
1257 return NULL;
1258 if (result == -1) {
1259 PyErr_SetString(PyExc_ValueError,
1260 "substring not found");
1261 return NULL;
1262 }
1263 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001264}
1265
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001266
1267PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001268"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001269\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001270Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001271such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001272arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001273\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001274Return -1 on failure.");
1275
Neal Norwitz6968b052007-02-27 19:02:19 +00001276static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001277bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001278{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001279 Py_ssize_t result = bytes_find_internal(self, args, -1);
1280 if (result == -2)
1281 return NULL;
1282 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001283}
1284
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001285
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001286PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001287"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001288\n\
1289Like B.rfind() but raise ValueError when the substring is not found.");
1290
1291static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001292bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001293{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001294 Py_ssize_t result = bytes_find_internal(self, args, -1);
1295 if (result == -2)
1296 return NULL;
1297 if (result == -1) {
1298 PyErr_SetString(PyExc_ValueError,
1299 "substring not found");
1300 return NULL;
1301 }
1302 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001303}
1304
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001305
1306Py_LOCAL_INLINE(PyObject *)
1307do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001308{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001309 Py_buffer vsep;
1310 char *s = PyBytes_AS_STRING(self);
1311 Py_ssize_t len = PyBytes_GET_SIZE(self);
1312 char *sep;
1313 Py_ssize_t seplen;
1314 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001315
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001316 if (_getbuffer(sepobj, &vsep) < 0)
1317 return NULL;
1318 sep = vsep.buf;
1319 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001320
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001321 i = 0;
1322 if (striptype != RIGHTSTRIP) {
1323 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1324 i++;
1325 }
1326 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001327
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001328 j = len;
1329 if (striptype != LEFTSTRIP) {
1330 do {
1331 j--;
1332 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1333 j++;
1334 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001335
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001336 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001337
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001338 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1339 Py_INCREF(self);
1340 return (PyObject*)self;
1341 }
1342 else
1343 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001344}
1345
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001346
1347Py_LOCAL_INLINE(PyObject *)
1348do_strip(PyBytesObject *self, int striptype)
1349{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001350 char *s = PyBytes_AS_STRING(self);
1351 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001352
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001353 i = 0;
1354 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001355 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001356 i++;
1357 }
1358 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001359
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001360 j = len;
1361 if (striptype != LEFTSTRIP) {
1362 do {
1363 j--;
David Malcolm96960882010-11-05 17:23:41 +00001364 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001365 j++;
1366 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001367
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001368 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1369 Py_INCREF(self);
1370 return (PyObject*)self;
1371 }
1372 else
1373 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001374}
1375
1376
1377Py_LOCAL_INLINE(PyObject *)
1378do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1379{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001380 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001381
Serhiy Storchakac6792272013-10-19 21:03:34 +03001382 if (!PyArg_ParseTuple(args, stripformat[striptype], &sep))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001383 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001384
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001385 if (sep != NULL && sep != Py_None) {
1386 return do_xstrip(self, striptype, sep);
1387 }
1388 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001389}
1390
1391
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001392PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001393"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001394\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001395Strip leading and trailing bytes contained in the argument.\n\
Georg Brandlbeca27a2012-01-22 21:31:21 +01001396If the argument is omitted, strip leading and trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001397static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001398bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001399{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001400 if (PyTuple_GET_SIZE(args) == 0)
1401 return do_strip(self, BOTHSTRIP); /* Common case */
1402 else
1403 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001404}
1405
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001406
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001407PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001408"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001409\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001410Strip leading bytes contained in the argument.\n\
1411If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001412static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001413bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001414{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 if (PyTuple_GET_SIZE(args) == 0)
1416 return do_strip(self, LEFTSTRIP); /* Common case */
1417 else
1418 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001419}
1420
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001421
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001422PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001423"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001424\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001425Strip trailing bytes contained in the argument.\n\
1426If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001427static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001428bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001429{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 if (PyTuple_GET_SIZE(args) == 0)
1431 return do_strip(self, RIGHTSTRIP); /* Common case */
1432 else
1433 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001434}
Neal Norwitz6968b052007-02-27 19:02:19 +00001435
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001436
1437PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001438"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001439\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001440Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001441string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001442as in slice notation.");
1443
1444static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001445bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001446{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001447 PyObject *sub_obj;
1448 const char *str = PyBytes_AS_STRING(self), *sub;
1449 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001450 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001451 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001452
Antoine Pitrouac65d962011-10-20 23:54:17 +02001453 Py_buffer vsub;
1454 PyObject *count_obj;
1455
1456 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
1457 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001458 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001459
Antoine Pitrouac65d962011-10-20 23:54:17 +02001460 if (sub_obj) {
1461 if (_getbuffer(sub_obj, &vsub) < 0)
1462 return NULL;
1463
1464 sub = vsub.buf;
1465 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001466 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001467 else {
1468 sub = &byte;
1469 sub_len = 1;
1470 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001471
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001472 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001473
Antoine Pitrouac65d962011-10-20 23:54:17 +02001474 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001475 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1476 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02001477
1478 if (sub_obj)
1479 PyBuffer_Release(&vsub);
1480
1481 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001482}
1483
1484
1485PyDoc_STRVAR(translate__doc__,
1486"B.translate(table[, deletechars]) -> bytes\n\
1487\n\
1488Return a copy of B, where all characters occurring in the\n\
1489optional argument deletechars are removed, and the remaining\n\
1490characters have been mapped through the given translation\n\
1491table, which must be a bytes object of length 256.");
1492
1493static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001494bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001495{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001496 char *input, *output;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001497 const char *table;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001498 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001499 PyObject *input_obj = (PyObject*)self;
1500 const char *output_start, *del_table=NULL;
1501 Py_ssize_t inlen, tablen, dellen = 0;
1502 PyObject *result;
1503 int trans_table[256];
1504 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001505
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001506 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1507 &tableobj, &delobj))
1508 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001509
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001510 if (PyBytes_Check(tableobj)) {
1511 table = PyBytes_AS_STRING(tableobj);
1512 tablen = PyBytes_GET_SIZE(tableobj);
1513 }
1514 else if (tableobj == Py_None) {
1515 table = NULL;
1516 tablen = 256;
1517 }
1518 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1519 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001520
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001521 if (tablen != 256) {
1522 PyErr_SetString(PyExc_ValueError,
1523 "translation table must be 256 characters long");
1524 return NULL;
1525 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001526
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001527 if (delobj != NULL) {
1528 if (PyBytes_Check(delobj)) {
1529 del_table = PyBytes_AS_STRING(delobj);
1530 dellen = PyBytes_GET_SIZE(delobj);
1531 }
1532 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1533 return NULL;
1534 }
1535 else {
1536 del_table = NULL;
1537 dellen = 0;
1538 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001539
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001540 inlen = PyBytes_GET_SIZE(input_obj);
1541 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1542 if (result == NULL)
1543 return NULL;
1544 output_start = output = PyBytes_AsString(result);
1545 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001546
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001547 if (dellen == 0 && table != NULL) {
1548 /* If no deletions are required, use faster code */
1549 for (i = inlen; --i >= 0; ) {
1550 c = Py_CHARMASK(*input++);
1551 if (Py_CHARMASK((*output++ = table[c])) != c)
1552 changed = 1;
1553 }
1554 if (changed || !PyBytes_CheckExact(input_obj))
1555 return result;
1556 Py_DECREF(result);
1557 Py_INCREF(input_obj);
1558 return input_obj;
1559 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001560
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001561 if (table == NULL) {
1562 for (i = 0; i < 256; i++)
1563 trans_table[i] = Py_CHARMASK(i);
1564 } else {
1565 for (i = 0; i < 256; i++)
1566 trans_table[i] = Py_CHARMASK(table[i]);
1567 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001568
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001569 for (i = 0; i < dellen; i++)
1570 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001571
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001572 for (i = inlen; --i >= 0; ) {
1573 c = Py_CHARMASK(*input++);
1574 if (trans_table[c] != -1)
1575 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1576 continue;
1577 changed = 1;
1578 }
1579 if (!changed && PyBytes_CheckExact(input_obj)) {
1580 Py_DECREF(result);
1581 Py_INCREF(input_obj);
1582 return input_obj;
1583 }
1584 /* Fix the size of the resulting string */
1585 if (inlen > 0)
1586 _PyBytes_Resize(&result, output - output_start);
1587 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001588}
1589
1590
Georg Brandlabc38772009-04-12 15:51:51 +00001591static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001592bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001593{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001594 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001595}
1596
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001597/* find and count characters and substrings */
1598
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001599#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001600 ((char *)memchr((const void *)(target), c, target_len))
1601
1602/* String ops must return a string. */
1603/* If the object is subclass of string, create a copy */
1604Py_LOCAL(PyBytesObject *)
1605return_self(PyBytesObject *self)
1606{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001607 if (PyBytes_CheckExact(self)) {
1608 Py_INCREF(self);
1609 return self;
1610 }
1611 return (PyBytesObject *)PyBytes_FromStringAndSize(
1612 PyBytes_AS_STRING(self),
1613 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001614}
1615
1616Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001617countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001618{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001619 Py_ssize_t count=0;
1620 const char *start=target;
1621 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001622
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001623 while ( (start=findchar(start, end-start, c)) != NULL ) {
1624 count++;
1625 if (count >= maxcount)
1626 break;
1627 start += 1;
1628 }
1629 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001630}
1631
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001632
1633/* Algorithms for different cases of string replacement */
1634
1635/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1636Py_LOCAL(PyBytesObject *)
1637replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001638 const char *to_s, Py_ssize_t to_len,
1639 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001640{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001641 char *self_s, *result_s;
1642 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001643 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001644 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001645
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001646 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001647
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001648 /* 1 at the end plus 1 after every character;
1649 count = min(maxcount, self_len + 1) */
1650 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001651 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001652 else
1653 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1654 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001655
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001656 /* Check for overflow */
1657 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001658 assert(count > 0);
1659 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001660 PyErr_SetString(PyExc_OverflowError,
1661 "replacement bytes are too long");
1662 return NULL;
1663 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001664 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001665
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001666 if (! (result = (PyBytesObject *)
1667 PyBytes_FromStringAndSize(NULL, result_len)) )
1668 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001669
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001670 self_s = PyBytes_AS_STRING(self);
1671 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001672
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001673 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001674
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001675 /* Lay the first one down (guaranteed this will occur) */
1676 Py_MEMCPY(result_s, to_s, to_len);
1677 result_s += to_len;
1678 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001679
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001680 for (i=0; i<count; i++) {
1681 *result_s++ = *self_s++;
1682 Py_MEMCPY(result_s, to_s, to_len);
1683 result_s += to_len;
1684 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001685
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001686 /* Copy the rest of the original string */
1687 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001688
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001689 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001690}
1691
1692/* Special case for deleting a single character */
1693/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1694Py_LOCAL(PyBytesObject *)
1695replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001696 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001697{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001698 char *self_s, *result_s;
1699 char *start, *next, *end;
1700 Py_ssize_t self_len, result_len;
1701 Py_ssize_t count;
1702 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001703
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001704 self_len = PyBytes_GET_SIZE(self);
1705 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001706
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001707 count = countchar(self_s, self_len, from_c, maxcount);
1708 if (count == 0) {
1709 return return_self(self);
1710 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001711
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001712 result_len = self_len - count; /* from_len == 1 */
1713 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001714
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001715 if ( (result = (PyBytesObject *)
1716 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1717 return NULL;
1718 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001719
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001720 start = self_s;
1721 end = self_s + self_len;
1722 while (count-- > 0) {
1723 next = findchar(start, end-start, from_c);
1724 if (next == NULL)
1725 break;
1726 Py_MEMCPY(result_s, start, next-start);
1727 result_s += (next-start);
1728 start = next+1;
1729 }
1730 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001731
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001732 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001733}
1734
1735/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1736
1737Py_LOCAL(PyBytesObject *)
1738replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001739 const char *from_s, Py_ssize_t from_len,
1740 Py_ssize_t maxcount) {
1741 char *self_s, *result_s;
1742 char *start, *next, *end;
1743 Py_ssize_t self_len, result_len;
1744 Py_ssize_t count, offset;
1745 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001746
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001747 self_len = PyBytes_GET_SIZE(self);
1748 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001749
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001750 count = stringlib_count(self_s, self_len,
1751 from_s, from_len,
1752 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001753
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001754 if (count == 0) {
1755 /* no matches */
1756 return return_self(self);
1757 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001758
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001759 result_len = self_len - (count * from_len);
1760 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001761
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001762 if ( (result = (PyBytesObject *)
1763 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1764 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001765
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001766 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001767
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001768 start = self_s;
1769 end = self_s + self_len;
1770 while (count-- > 0) {
1771 offset = stringlib_find(start, end-start,
1772 from_s, from_len,
1773 0);
1774 if (offset == -1)
1775 break;
1776 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001777
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001778 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001779
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001780 result_s += (next-start);
1781 start = next+from_len;
1782 }
1783 Py_MEMCPY(result_s, start, end-start);
1784 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001785}
1786
1787/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1788Py_LOCAL(PyBytesObject *)
1789replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001790 char from_c, char to_c,
1791 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001792{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001793 char *self_s, *result_s, *start, *end, *next;
1794 Py_ssize_t self_len;
1795 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001796
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001797 /* The result string will be the same size */
1798 self_s = PyBytes_AS_STRING(self);
1799 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001800
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001801 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001802
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001803 if (next == NULL) {
1804 /* No matches; return the original string */
1805 return return_self(self);
1806 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001807
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001808 /* Need to make a new string */
1809 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1810 if (result == NULL)
1811 return NULL;
1812 result_s = PyBytes_AS_STRING(result);
1813 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001814
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001815 /* change everything in-place, starting with this one */
1816 start = result_s + (next-self_s);
1817 *start = to_c;
1818 start++;
1819 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001820
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001821 while (--maxcount > 0) {
1822 next = findchar(start, end-start, from_c);
1823 if (next == NULL)
1824 break;
1825 *next = to_c;
1826 start = next+1;
1827 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001828
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001829 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001830}
1831
1832/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1833Py_LOCAL(PyBytesObject *)
1834replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001835 const char *from_s, Py_ssize_t from_len,
1836 const char *to_s, Py_ssize_t to_len,
1837 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001838{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001839 char *result_s, *start, *end;
1840 char *self_s;
1841 Py_ssize_t self_len, offset;
1842 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001843
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001844 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001845
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001846 self_s = PyBytes_AS_STRING(self);
1847 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001848
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001849 offset = stringlib_find(self_s, self_len,
1850 from_s, from_len,
1851 0);
1852 if (offset == -1) {
1853 /* No matches; return the original string */
1854 return return_self(self);
1855 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001856
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001857 /* Need to make a new string */
1858 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1859 if (result == NULL)
1860 return NULL;
1861 result_s = PyBytes_AS_STRING(result);
1862 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001863
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001864 /* change everything in-place, starting with this one */
1865 start = result_s + offset;
1866 Py_MEMCPY(start, to_s, from_len);
1867 start += from_len;
1868 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001869
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001870 while ( --maxcount > 0) {
1871 offset = stringlib_find(start, end-start,
1872 from_s, from_len,
1873 0);
1874 if (offset==-1)
1875 break;
1876 Py_MEMCPY(start+offset, to_s, from_len);
1877 start += offset+from_len;
1878 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001879
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001880 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001881}
1882
1883/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1884Py_LOCAL(PyBytesObject *)
1885replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001886 char from_c,
1887 const char *to_s, Py_ssize_t to_len,
1888 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001889{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001890 char *self_s, *result_s;
1891 char *start, *next, *end;
1892 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001893 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001894 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001895
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001896 self_s = PyBytes_AS_STRING(self);
1897 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001898
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001899 count = countchar(self_s, self_len, from_c, maxcount);
1900 if (count == 0) {
1901 /* no matches, return unchanged */
1902 return return_self(self);
1903 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001904
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001905 /* use the difference between current and new, hence the "-1" */
1906 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001907 assert(count > 0);
1908 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001909 PyErr_SetString(PyExc_OverflowError,
1910 "replacement bytes are too long");
1911 return NULL;
1912 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001913 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001914
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001915 if ( (result = (PyBytesObject *)
1916 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1917 return NULL;
1918 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001919
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001920 start = self_s;
1921 end = self_s + self_len;
1922 while (count-- > 0) {
1923 next = findchar(start, end-start, from_c);
1924 if (next == NULL)
1925 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001926
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001927 if (next == start) {
1928 /* replace with the 'to' */
1929 Py_MEMCPY(result_s, to_s, to_len);
1930 result_s += to_len;
1931 start += 1;
1932 } else {
1933 /* copy the unchanged old then the 'to' */
1934 Py_MEMCPY(result_s, start, next-start);
1935 result_s += (next-start);
1936 Py_MEMCPY(result_s, to_s, to_len);
1937 result_s += to_len;
1938 start = next+1;
1939 }
1940 }
1941 /* Copy the remainder of the remaining string */
1942 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001943
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001944 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001945}
1946
1947/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1948Py_LOCAL(PyBytesObject *)
1949replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001950 const char *from_s, Py_ssize_t from_len,
1951 const char *to_s, Py_ssize_t to_len,
1952 Py_ssize_t maxcount) {
1953 char *self_s, *result_s;
1954 char *start, *next, *end;
1955 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001956 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001957 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001958
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001959 self_s = PyBytes_AS_STRING(self);
1960 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001961
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001962 count = stringlib_count(self_s, self_len,
1963 from_s, from_len,
1964 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001965
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001966 if (count == 0) {
1967 /* no matches, return unchanged */
1968 return return_self(self);
1969 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001970
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001971 /* Check for overflow */
1972 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001973 assert(count > 0);
1974 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001975 PyErr_SetString(PyExc_OverflowError,
1976 "replacement bytes are too long");
1977 return NULL;
1978 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001979 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001980
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001981 if ( (result = (PyBytesObject *)
1982 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1983 return NULL;
1984 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001985
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001986 start = self_s;
1987 end = self_s + self_len;
1988 while (count-- > 0) {
1989 offset = stringlib_find(start, end-start,
1990 from_s, from_len,
1991 0);
1992 if (offset == -1)
1993 break;
1994 next = start+offset;
1995 if (next == start) {
1996 /* replace with the 'to' */
1997 Py_MEMCPY(result_s, to_s, to_len);
1998 result_s += to_len;
1999 start += from_len;
2000 } else {
2001 /* copy the unchanged old then the 'to' */
2002 Py_MEMCPY(result_s, start, next-start);
2003 result_s += (next-start);
2004 Py_MEMCPY(result_s, to_s, to_len);
2005 result_s += to_len;
2006 start = next+from_len;
2007 }
2008 }
2009 /* Copy the remainder of the remaining string */
2010 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002011
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002012 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002013}
2014
2015
2016Py_LOCAL(PyBytesObject *)
2017replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002018 const char *from_s, Py_ssize_t from_len,
2019 const char *to_s, Py_ssize_t to_len,
2020 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002021{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002022 if (maxcount < 0) {
2023 maxcount = PY_SSIZE_T_MAX;
2024 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2025 /* nothing to do; return the original string */
2026 return return_self(self);
2027 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002028
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002029 if (maxcount == 0 ||
2030 (from_len == 0 && to_len == 0)) {
2031 /* nothing to do; return the original string */
2032 return return_self(self);
2033 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002034
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002035 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002036
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002037 if (from_len == 0) {
2038 /* insert the 'to' string everywhere. */
2039 /* >>> "Python".replace("", ".") */
2040 /* '.P.y.t.h.o.n.' */
2041 return replace_interleave(self, to_s, to_len, maxcount);
2042 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002043
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002044 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2045 /* point for an empty self string to generate a non-empty string */
2046 /* Special case so the remaining code always gets a non-empty string */
2047 if (PyBytes_GET_SIZE(self) == 0) {
2048 return return_self(self);
2049 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002050
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002051 if (to_len == 0) {
2052 /* delete all occurrences of 'from' string */
2053 if (from_len == 1) {
2054 return replace_delete_single_character(
2055 self, from_s[0], maxcount);
2056 } else {
2057 return replace_delete_substring(self, from_s,
2058 from_len, maxcount);
2059 }
2060 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002061
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002062 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002063
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002064 if (from_len == to_len) {
2065 if (from_len == 1) {
2066 return replace_single_character_in_place(
2067 self,
2068 from_s[0],
2069 to_s[0],
2070 maxcount);
2071 } else {
2072 return replace_substring_in_place(
2073 self, from_s, from_len, to_s, to_len,
2074 maxcount);
2075 }
2076 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002077
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002078 /* Otherwise use the more generic algorithms */
2079 if (from_len == 1) {
2080 return replace_single_character(self, from_s[0],
2081 to_s, to_len, maxcount);
2082 } else {
2083 /* len('from')>=2, len('to')>=1 */
2084 return replace_substring(self, from_s, from_len, to_s, to_len,
2085 maxcount);
2086 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002087}
2088
2089PyDoc_STRVAR(replace__doc__,
2090"B.replace(old, new[, count]) -> bytes\n\
2091\n\
2092Return a copy of B with all occurrences of subsection\n\
2093old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002094given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002095
2096static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002097bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002098{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002099 Py_ssize_t count = -1;
2100 PyObject *from, *to;
2101 const char *from_s, *to_s;
2102 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002103
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002104 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2105 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002106
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002107 if (PyBytes_Check(from)) {
2108 from_s = PyBytes_AS_STRING(from);
2109 from_len = PyBytes_GET_SIZE(from);
2110 }
2111 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2112 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002113
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002114 if (PyBytes_Check(to)) {
2115 to_s = PyBytes_AS_STRING(to);
2116 to_len = PyBytes_GET_SIZE(to);
2117 }
2118 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2119 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002120
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002121 return (PyObject *)replace((PyBytesObject *) self,
2122 from_s, from_len,
2123 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002124}
2125
2126/** End DALKE **/
2127
2128/* Matches the end (direction >= 0) or start (direction < 0) of self
2129 * against substr, using the start and end arguments. Returns
2130 * -1 on error, 0 if not found and 1 if found.
2131 */
2132Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002133_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002134 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002135{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002136 Py_ssize_t len = PyBytes_GET_SIZE(self);
2137 Py_ssize_t slen;
2138 const char* sub;
2139 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002141 if (PyBytes_Check(substr)) {
2142 sub = PyBytes_AS_STRING(substr);
2143 slen = PyBytes_GET_SIZE(substr);
2144 }
2145 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2146 return -1;
2147 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002149 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002150
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002151 if (direction < 0) {
2152 /* startswith */
2153 if (start+slen > len)
2154 return 0;
2155 } else {
2156 /* endswith */
2157 if (end-start < slen || start > len)
2158 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002160 if (end-slen > start)
2161 start = end - slen;
2162 }
2163 if (end-start >= slen)
2164 return ! memcmp(str+start, sub, slen);
2165 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002166}
2167
2168
2169PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002170"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002171\n\
2172Return True if B starts with the specified prefix, False otherwise.\n\
2173With optional start, test B beginning at that position.\n\
2174With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002175prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002176
2177static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002178bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002179{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002180 Py_ssize_t start = 0;
2181 Py_ssize_t end = PY_SSIZE_T_MAX;
2182 PyObject *subobj;
2183 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002184
Jesus Ceaac451502011-04-20 17:09:23 +02002185 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002186 return NULL;
2187 if (PyTuple_Check(subobj)) {
2188 Py_ssize_t i;
2189 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2190 result = _bytes_tailmatch(self,
2191 PyTuple_GET_ITEM(subobj, i),
2192 start, end, -1);
2193 if (result == -1)
2194 return NULL;
2195 else if (result) {
2196 Py_RETURN_TRUE;
2197 }
2198 }
2199 Py_RETURN_FALSE;
2200 }
2201 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002202 if (result == -1) {
2203 if (PyErr_ExceptionMatches(PyExc_TypeError))
2204 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2205 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002206 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002207 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002208 else
2209 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002210}
2211
2212
2213PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002214"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002215\n\
2216Return True if B ends with the specified suffix, False otherwise.\n\
2217With optional start, test B beginning at that position.\n\
2218With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002219suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002220
2221static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002222bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002223{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002224 Py_ssize_t start = 0;
2225 Py_ssize_t end = PY_SSIZE_T_MAX;
2226 PyObject *subobj;
2227 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002228
Jesus Ceaac451502011-04-20 17:09:23 +02002229 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002230 return NULL;
2231 if (PyTuple_Check(subobj)) {
2232 Py_ssize_t i;
2233 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2234 result = _bytes_tailmatch(self,
2235 PyTuple_GET_ITEM(subobj, i),
2236 start, end, +1);
2237 if (result == -1)
2238 return NULL;
2239 else if (result) {
2240 Py_RETURN_TRUE;
2241 }
2242 }
2243 Py_RETURN_FALSE;
2244 }
2245 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002246 if (result == -1) {
2247 if (PyErr_ExceptionMatches(PyExc_TypeError))
2248 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2249 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002250 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002251 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002252 else
2253 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002254}
2255
2256
2257PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002258"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002259\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002260Decode B using the codec registered for encoding. Default encoding\n\
2261is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002262handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2263a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002264as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002265able to handle UnicodeDecodeErrors.");
2266
2267static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002268bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002269{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002270 const char *encoding = NULL;
2271 const char *errors = NULL;
2272 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002273
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002274 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2275 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002276 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002277}
2278
Guido van Rossum20188312006-05-05 15:15:40 +00002279
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002280PyDoc_STRVAR(splitlines__doc__,
2281"B.splitlines([keepends]) -> list of lines\n\
2282\n\
2283Return a list of the lines in B, breaking at line boundaries.\n\
2284Line breaks are not included in the resulting list unless keepends\n\
2285is given and true.");
2286
2287static PyObject*
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002288bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002289{
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002290 static char *kwlist[] = {"keepends", 0};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002291 int keepends = 0;
2292
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002293 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines",
2294 kwlist, &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002295 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002296
2297 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002298 (PyObject*) self, PyBytes_AS_STRING(self),
2299 PyBytes_GET_SIZE(self), keepends
2300 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002301}
2302
2303
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002304PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002305"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002306\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002307Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002308Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002309Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002310
2311static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002312hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002313{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002314 if (c >= 128)
2315 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002316 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002317 return c - '0';
2318 else {
David Malcolm96960882010-11-05 17:23:41 +00002319 if (Py_ISUPPER(c))
2320 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002321 if (c >= 'a' && c <= 'f')
2322 return c - 'a' + 10;
2323 }
2324 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002325}
2326
2327static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002328bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002329{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002330 PyObject *newstring, *hexobj;
2331 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002332 Py_ssize_t hexlen, byteslen, i, j;
2333 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002334 void *data;
2335 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002336
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002337 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2338 return NULL;
2339 assert(PyUnicode_Check(hexobj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002340 if (PyUnicode_READY(hexobj))
2341 return NULL;
2342 kind = PyUnicode_KIND(hexobj);
2343 data = PyUnicode_DATA(hexobj);
2344 hexlen = PyUnicode_GET_LENGTH(hexobj);
2345
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002346 byteslen = hexlen/2; /* This overestimates if there are spaces */
2347 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2348 if (!newstring)
2349 return NULL;
2350 buf = PyBytes_AS_STRING(newstring);
2351 for (i = j = 0; i < hexlen; i += 2) {
2352 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002353 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002354 i++;
2355 if (i >= hexlen)
2356 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002357 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
2358 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002359 if (top == -1 || bot == -1) {
2360 PyErr_Format(PyExc_ValueError,
2361 "non-hexadecimal number found in "
2362 "fromhex() arg at position %zd", i);
2363 goto error;
2364 }
2365 buf[j++] = (top << 4) + bot;
2366 }
2367 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2368 goto error;
2369 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002370
2371 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002372 Py_XDECREF(newstring);
2373 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002374}
2375
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002376PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002377"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002378
2379static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002380bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002381{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002382 Py_ssize_t res;
2383 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2384 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002385}
2386
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002387
2388static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002389bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002390{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002391 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002392}
2393
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002394
2395static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002396bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002397 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2398 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2399 _Py_capitalize__doc__},
2400 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2401 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2402 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2403 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2404 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02002405 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002406 expandtabs__doc__},
2407 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2408 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2409 fromhex_doc},
2410 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2411 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2412 _Py_isalnum__doc__},
2413 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2414 _Py_isalpha__doc__},
2415 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2416 _Py_isdigit__doc__},
2417 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2418 _Py_islower__doc__},
2419 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2420 _Py_isspace__doc__},
2421 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2422 _Py_istitle__doc__},
2423 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2424 _Py_isupper__doc__},
2425 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2426 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2427 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2428 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2429 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2430 _Py_maketrans__doc__},
2431 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2432 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2433 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2434 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2435 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2436 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2437 rpartition__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002438 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002439 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002440 {"split", (PyCFunction)bytes_split, METH_VARARGS | METH_KEYWORDS, split__doc__},
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002441 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002442 splitlines__doc__},
2443 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2444 startswith__doc__},
2445 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2446 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2447 _Py_swapcase__doc__},
2448 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2449 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2450 translate__doc__},
2451 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2452 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2453 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2454 sizeof__doc__},
2455 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002456};
2457
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002458static PyObject *
2459str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2460
2461static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002462bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002463{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002464 PyObject *x = NULL;
2465 const char *encoding = NULL;
2466 const char *errors = NULL;
2467 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002468 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002469 Py_ssize_t size;
2470 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002471 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002472
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002473 if (type != &PyBytes_Type)
2474 return str_subtype_new(type, args, kwds);
2475 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2476 &encoding, &errors))
2477 return NULL;
2478 if (x == NULL) {
2479 if (encoding != NULL || errors != NULL) {
2480 PyErr_SetString(PyExc_TypeError,
2481 "encoding or errors without sequence "
2482 "argument");
2483 return NULL;
2484 }
2485 return PyBytes_FromString("");
2486 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002487
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002488 if (PyUnicode_Check(x)) {
2489 /* Encode via the codec registry */
2490 if (encoding == NULL) {
2491 PyErr_SetString(PyExc_TypeError,
2492 "string argument without an encoding");
2493 return NULL;
2494 }
2495 new = PyUnicode_AsEncodedString(x, encoding, errors);
2496 if (new == NULL)
2497 return NULL;
2498 assert(PyBytes_Check(new));
2499 return new;
2500 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002501
2502 /* We'd like to call PyObject_Bytes here, but we need to check for an
2503 integer argument before deferring to PyBytes_FromObject, something
2504 PyObject_Bytes doesn't do. */
2505 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2506 if (func != NULL) {
2507 new = PyObject_CallFunctionObjArgs(func, NULL);
2508 Py_DECREF(func);
2509 if (new == NULL)
2510 return NULL;
2511 if (!PyBytes_Check(new)) {
2512 PyErr_Format(PyExc_TypeError,
2513 "__bytes__ returned non-bytes (type %.200s)",
2514 Py_TYPE(new)->tp_name);
2515 Py_DECREF(new);
2516 return NULL;
2517 }
2518 return new;
2519 }
2520 else if (PyErr_Occurred())
2521 return NULL;
2522
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002523 /* Is it an integer? */
2524 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2525 if (size == -1 && PyErr_Occurred()) {
2526 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2527 return NULL;
2528 PyErr_Clear();
2529 }
2530 else if (size < 0) {
2531 PyErr_SetString(PyExc_ValueError, "negative count");
2532 return NULL;
2533 }
2534 else {
2535 new = PyBytes_FromStringAndSize(NULL, size);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002536 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002537 return NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002538 if (size > 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002539 memset(((PyBytesObject*)new)->ob_sval, 0, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002540 return new;
2541 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002542
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002543 /* If it's not unicode, there can't be encoding or errors */
2544 if (encoding != NULL || errors != NULL) {
2545 PyErr_SetString(PyExc_TypeError,
2546 "encoding or errors without a string argument");
2547 return NULL;
2548 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002549
2550 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002551}
2552
2553PyObject *
2554PyBytes_FromObject(PyObject *x)
2555{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002556 PyObject *new, *it;
2557 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002558
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002559 if (x == NULL) {
2560 PyErr_BadInternalCall();
2561 return NULL;
2562 }
Larry Hastingsca28e992012-05-24 22:58:30 -07002563
2564 if (PyBytes_CheckExact(x)) {
2565 Py_INCREF(x);
2566 return x;
2567 }
2568
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002569 /* Use the modern buffer interface */
2570 if (PyObject_CheckBuffer(x)) {
2571 Py_buffer view;
2572 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2573 return NULL;
2574 new = PyBytes_FromStringAndSize(NULL, view.len);
2575 if (!new)
2576 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002577 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2578 &view, view.len, 'C') < 0)
2579 goto fail;
2580 PyBuffer_Release(&view);
2581 return new;
2582 fail:
2583 Py_XDECREF(new);
2584 PyBuffer_Release(&view);
2585 return NULL;
2586 }
2587 if (PyUnicode_Check(x)) {
2588 PyErr_SetString(PyExc_TypeError,
2589 "cannot convert unicode object to bytes");
2590 return NULL;
2591 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002592
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002593 if (PyList_CheckExact(x)) {
2594 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2595 if (new == NULL)
2596 return NULL;
2597 for (i = 0; i < Py_SIZE(x); i++) {
2598 Py_ssize_t value = PyNumber_AsSsize_t(
2599 PyList_GET_ITEM(x, i), PyExc_ValueError);
2600 if (value == -1 && PyErr_Occurred()) {
2601 Py_DECREF(new);
2602 return NULL;
2603 }
2604 if (value < 0 || value >= 256) {
2605 PyErr_SetString(PyExc_ValueError,
2606 "bytes must be in range(0, 256)");
2607 Py_DECREF(new);
2608 return NULL;
2609 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002610 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002611 }
2612 return new;
2613 }
2614 if (PyTuple_CheckExact(x)) {
2615 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2616 if (new == NULL)
2617 return NULL;
2618 for (i = 0; i < Py_SIZE(x); i++) {
2619 Py_ssize_t value = PyNumber_AsSsize_t(
2620 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2621 if (value == -1 && PyErr_Occurred()) {
2622 Py_DECREF(new);
2623 return NULL;
2624 }
2625 if (value < 0 || value >= 256) {
2626 PyErr_SetString(PyExc_ValueError,
2627 "bytes must be in range(0, 256)");
2628 Py_DECREF(new);
2629 return NULL;
2630 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002631 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002632 }
2633 return new;
2634 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002635
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002636 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002637 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002638 if (size == -1 && PyErr_Occurred())
2639 return NULL;
2640 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2641 returning a shared empty bytes string. This required because we
2642 want to call _PyBytes_Resize() the returned object, which we can
2643 only do on bytes objects with refcount == 1. */
2644 size += 1;
2645 new = PyBytes_FromStringAndSize(NULL, size);
2646 if (new == NULL)
2647 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002648
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002649 /* Get the iterator */
2650 it = PyObject_GetIter(x);
2651 if (it == NULL)
2652 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002653
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002654 /* Run the iterator to exhaustion */
2655 for (i = 0; ; i++) {
2656 PyObject *item;
2657 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002658
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002659 /* Get the next item */
2660 item = PyIter_Next(it);
2661 if (item == NULL) {
2662 if (PyErr_Occurred())
2663 goto error;
2664 break;
2665 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002666
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002667 /* Interpret it as an int (__index__) */
2668 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2669 Py_DECREF(item);
2670 if (value == -1 && PyErr_Occurred())
2671 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002672
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002673 /* Range check */
2674 if (value < 0 || value >= 256) {
2675 PyErr_SetString(PyExc_ValueError,
2676 "bytes must be in range(0, 256)");
2677 goto error;
2678 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002679
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002680 /* Append the byte */
2681 if (i >= size) {
2682 size = 2 * size + 1;
2683 if (_PyBytes_Resize(&new, size) < 0)
2684 goto error;
2685 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002686 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002687 }
2688 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002689
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002690 /* Clean up and return success */
2691 Py_DECREF(it);
2692 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002693
2694 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002695 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01002696 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002697 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002698}
2699
2700static PyObject *
2701str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2702{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002703 PyObject *tmp, *pnew;
2704 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002705
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002706 assert(PyType_IsSubtype(type, &PyBytes_Type));
2707 tmp = bytes_new(&PyBytes_Type, args, kwds);
2708 if (tmp == NULL)
2709 return NULL;
2710 assert(PyBytes_CheckExact(tmp));
2711 n = PyBytes_GET_SIZE(tmp);
2712 pnew = type->tp_alloc(type, n);
2713 if (pnew != NULL) {
2714 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2715 PyBytes_AS_STRING(tmp), n+1);
2716 ((PyBytesObject *)pnew)->ob_shash =
2717 ((PyBytesObject *)tmp)->ob_shash;
2718 }
2719 Py_DECREF(tmp);
2720 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002721}
2722
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002723PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002724"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002725bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002726bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002727bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2728bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002729\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002730Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002731 - an iterable yielding integers in range(256)\n\
2732 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002733 - any object implementing the buffer API.\n\
2734 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002735
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002736static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002737
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002738PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002739 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2740 "bytes",
2741 PyBytesObject_SIZE,
2742 sizeof(char),
2743 bytes_dealloc, /* tp_dealloc */
2744 0, /* tp_print */
2745 0, /* tp_getattr */
2746 0, /* tp_setattr */
2747 0, /* tp_reserved */
2748 (reprfunc)bytes_repr, /* tp_repr */
2749 0, /* tp_as_number */
2750 &bytes_as_sequence, /* tp_as_sequence */
2751 &bytes_as_mapping, /* tp_as_mapping */
2752 (hashfunc)bytes_hash, /* tp_hash */
2753 0, /* tp_call */
2754 bytes_str, /* tp_str */
2755 PyObject_GenericGetAttr, /* tp_getattro */
2756 0, /* tp_setattro */
2757 &bytes_as_buffer, /* tp_as_buffer */
2758 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2759 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2760 bytes_doc, /* tp_doc */
2761 0, /* tp_traverse */
2762 0, /* tp_clear */
2763 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2764 0, /* tp_weaklistoffset */
2765 bytes_iter, /* tp_iter */
2766 0, /* tp_iternext */
2767 bytes_methods, /* tp_methods */
2768 0, /* tp_members */
2769 0, /* tp_getset */
2770 &PyBaseObject_Type, /* tp_base */
2771 0, /* tp_dict */
2772 0, /* tp_descr_get */
2773 0, /* tp_descr_set */
2774 0, /* tp_dictoffset */
2775 0, /* tp_init */
2776 0, /* tp_alloc */
2777 bytes_new, /* tp_new */
2778 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002779};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002780
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002781void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002782PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002783{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002784 PyObject *v;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002785 assert(pv != NULL);
2786 if (*pv == NULL)
2787 return;
2788 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002789 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002790 return;
2791 }
2792 v = bytes_concat(*pv, w);
2793 Py_DECREF(*pv);
2794 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002795}
2796
2797void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002798PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002799{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002800 PyBytes_Concat(pv, w);
2801 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002802}
2803
2804
2805/* The following function breaks the notion that strings are immutable:
2806 it changes the size of a string. We get away with this only if there
2807 is only one module referencing the object. You can also think of it
2808 as creating a new string object and destroying the old one, only
2809 more efficiently. In any case, don't use this if the string may
2810 already be known to some other part of the code...
2811 Note that if there's not enough memory to resize the string, the original
2812 string object at *pv is deallocated, *pv is set to NULL, an "out of
2813 memory" exception is set, and -1 is returned. Else (on success) 0 is
2814 returned, and the value in *pv may or may not be the same as on input.
2815 As always, an extra byte is allocated for a trailing \0 byte (newsize
2816 does *not* include that), and a trailing \0 byte is stored.
2817*/
2818
2819int
2820_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2821{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002822 PyObject *v;
2823 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002824 v = *pv;
2825 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2826 *pv = 0;
2827 Py_DECREF(v);
2828 PyErr_BadInternalCall();
2829 return -1;
2830 }
2831 /* XXX UNREF/NEWREF interface should be more symmetrical */
2832 _Py_DEC_REFTOTAL;
2833 _Py_ForgetReference(v);
2834 *pv = (PyObject *)
2835 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2836 if (*pv == NULL) {
2837 PyObject_Del(v);
2838 PyErr_NoMemory();
2839 return -1;
2840 }
2841 _Py_NewReference(*pv);
2842 sv = (PyBytesObject *) *pv;
2843 Py_SIZE(sv) = newsize;
2844 sv->ob_sval[newsize] = '\0';
2845 sv->ob_shash = -1; /* invalidate cached hash value */
2846 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002847}
2848
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002849void
2850PyBytes_Fini(void)
2851{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002852 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002853 for (i = 0; i < UCHAR_MAX + 1; i++)
2854 Py_CLEAR(characters[i]);
2855 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002856}
2857
Benjamin Peterson4116f362008-05-27 00:36:20 +00002858/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002859
2860typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002861 PyObject_HEAD
2862 Py_ssize_t it_index;
2863 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002864} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002865
2866static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002867striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002868{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002869 _PyObject_GC_UNTRACK(it);
2870 Py_XDECREF(it->it_seq);
2871 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002872}
2873
2874static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002875striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002876{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002877 Py_VISIT(it->it_seq);
2878 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002879}
2880
2881static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002882striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002883{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002884 PyBytesObject *seq;
2885 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002886
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002887 assert(it != NULL);
2888 seq = it->it_seq;
2889 if (seq == NULL)
2890 return NULL;
2891 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002892
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002893 if (it->it_index < PyBytes_GET_SIZE(seq)) {
2894 item = PyLong_FromLong(
2895 (unsigned char)seq->ob_sval[it->it_index]);
2896 if (item != NULL)
2897 ++it->it_index;
2898 return item;
2899 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002900
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002901 Py_DECREF(seq);
2902 it->it_seq = NULL;
2903 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002904}
2905
2906static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002907striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002908{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002909 Py_ssize_t len = 0;
2910 if (it->it_seq)
2911 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
2912 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002913}
2914
2915PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002916 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002917
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002918static PyObject *
2919striter_reduce(striterobject *it)
2920{
2921 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02002922 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002923 it->it_seq, it->it_index);
2924 } else {
2925 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
2926 if (u == NULL)
2927 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02002928 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002929 }
2930}
2931
2932PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2933
2934static PyObject *
2935striter_setstate(striterobject *it, PyObject *state)
2936{
2937 Py_ssize_t index = PyLong_AsSsize_t(state);
2938 if (index == -1 && PyErr_Occurred())
2939 return NULL;
2940 if (index < 0)
2941 index = 0;
2942 it->it_index = index;
2943 Py_RETURN_NONE;
2944}
2945
2946PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
2947
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002948static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002949 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
2950 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002951 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
2952 reduce_doc},
2953 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
2954 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002955 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002956};
2957
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002958PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002959 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2960 "bytes_iterator", /* tp_name */
2961 sizeof(striterobject), /* tp_basicsize */
2962 0, /* tp_itemsize */
2963 /* methods */
2964 (destructor)striter_dealloc, /* tp_dealloc */
2965 0, /* tp_print */
2966 0, /* tp_getattr */
2967 0, /* tp_setattr */
2968 0, /* tp_reserved */
2969 0, /* tp_repr */
2970 0, /* tp_as_number */
2971 0, /* tp_as_sequence */
2972 0, /* tp_as_mapping */
2973 0, /* tp_hash */
2974 0, /* tp_call */
2975 0, /* tp_str */
2976 PyObject_GenericGetAttr, /* tp_getattro */
2977 0, /* tp_setattro */
2978 0, /* tp_as_buffer */
2979 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
2980 0, /* tp_doc */
2981 (traverseproc)striter_traverse, /* tp_traverse */
2982 0, /* tp_clear */
2983 0, /* tp_richcompare */
2984 0, /* tp_weaklistoffset */
2985 PyObject_SelfIter, /* tp_iter */
2986 (iternextfunc)striter_next, /* tp_iternext */
2987 striter_methods, /* tp_methods */
2988 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002989};
2990
2991static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002992bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002993{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002994 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002995
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002996 if (!PyBytes_Check(seq)) {
2997 PyErr_BadInternalCall();
2998 return NULL;
2999 }
3000 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3001 if (it == NULL)
3002 return NULL;
3003 it->it_index = 0;
3004 Py_INCREF(seq);
3005 it->it_seq = (PyBytesObject *)seq;
3006 _PyObject_GC_TRACK(it);
3007 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003008}