blob: 8217b1eab30a47673f6dd51cd27f1aef9f1795ba [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Antoine Pitroucfc22b42012-10-16 21:07:23 +020013 PyBufferProcs *bufferprocs;
14 if (PyBytes_CheckExact(obj)) {
15 /* Fast path, e.g. for .join() of many bytes objects */
16 Py_INCREF(obj);
17 view->obj = obj;
18 view->buf = PyBytes_AS_STRING(obj);
19 view->len = PyBytes_GET_SIZE(obj);
20 return view->len;
21 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Antoine Pitroucfc22b42012-10-16 21:07:23 +020023 bufferprocs = Py_TYPE(obj)->tp_as_buffer;
24 if (bufferprocs == NULL || bufferprocs->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000025 {
Antoine Pitroud1188562010-06-09 16:38:55 +000026 PyErr_Format(PyExc_TypeError,
27 "Type %.100s doesn't support the buffer API",
28 Py_TYPE(obj)->tp_name);
29 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000030 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000031
Antoine Pitroucfc22b42012-10-16 21:07:23 +020032 if (bufferprocs->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000033 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000034 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000035}
36
Christian Heimes2c9c7a52008-05-26 13:42:13 +000037#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000038Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000039#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000040
Christian Heimes2c9c7a52008-05-26 13:42:13 +000041static PyBytesObject *characters[UCHAR_MAX + 1];
42static PyBytesObject *nullstring;
43
Mark Dickinsonfd24b322008-12-06 15:33:31 +000044/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
45 for a string of length n should request PyBytesObject_SIZE + n bytes.
46
47 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
48 3 bytes per string allocation on a typical system.
49*/
50#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
51
Christian Heimes2c9c7a52008-05-26 13:42:13 +000052/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000053 For PyBytes_FromString(), the parameter `str' points to a null-terminated
54 string containing exactly `size' bytes.
55
56 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
57 either NULL or else points to a string containing at least `size' bytes.
58 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
59 not have to be null-terminated. (Therefore it is safe to construct a
60 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
61 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
62 bytes (setting the last byte to the null terminating character) and you can
63 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000064 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000065 alter the data yourself, since the strings may be shared.
66
67 The PyObject member `op->ob_size', which denotes the number of "extra
68 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020069 allocated for string data, not counting the null terminating character.
70 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000071 PyBytes_FromStringAndSize()) or the length of the string in the `str'
72 parameter (for PyBytes_FromString()).
73*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000074PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000076{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020077 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 if (size < 0) {
79 PyErr_SetString(PyExc_SystemError,
80 "Negative size passed to PyBytes_FromStringAndSize");
81 return NULL;
82 }
83 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000084#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000086#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 Py_INCREF(op);
88 return (PyObject *)op;
89 }
90 if (size == 1 && str != NULL &&
91 (op = characters[*str & UCHAR_MAX]) != NULL)
92 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000093#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000095#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000096 Py_INCREF(op);
97 return (PyObject *)op;
98 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000099
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000100 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
101 PyErr_SetString(PyExc_OverflowError,
102 "byte string is too large");
103 return NULL;
104 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +0000105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 /* Inline PyObject_NewVar */
107 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
108 if (op == NULL)
109 return PyErr_NoMemory();
110 PyObject_INIT_VAR(op, &PyBytes_Type, size);
111 op->ob_shash = -1;
112 if (str != NULL)
113 Py_MEMCPY(op->ob_sval, str, size);
114 op->ob_sval[size] = '\0';
115 /* share short strings */
116 if (size == 0) {
117 nullstring = op;
118 Py_INCREF(op);
119 } else if (size == 1 && str != NULL) {
120 characters[*str & UCHAR_MAX] = op;
121 Py_INCREF(op);
122 }
123 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000124}
125
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000126PyObject *
127PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000128{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200129 size_t size;
130 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000131
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 assert(str != NULL);
133 size = strlen(str);
134 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
135 PyErr_SetString(PyExc_OverflowError,
136 "byte string is too long");
137 return NULL;
138 }
139 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000140#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 Py_INCREF(op);
144 return (PyObject *)op;
145 }
146 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000147#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000148 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 Py_INCREF(op);
151 return (PyObject *)op;
152 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000153
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 /* Inline PyObject_NewVar */
155 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
156 if (op == NULL)
157 return PyErr_NoMemory();
158 PyObject_INIT_VAR(op, &PyBytes_Type, size);
159 op->ob_shash = -1;
160 Py_MEMCPY(op->ob_sval, str, size+1);
161 /* share short strings */
162 if (size == 0) {
163 nullstring = op;
164 Py_INCREF(op);
165 } else if (size == 1) {
166 characters[*str & UCHAR_MAX] = op;
167 Py_INCREF(op);
168 }
169 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000170}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000171
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000172PyObject *
173PyBytes_FromFormatV(const char *format, va_list vargs)
174{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 va_list count;
176 Py_ssize_t n = 0;
177 const char* f;
178 char *s;
179 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000180
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000181 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 /* step 1: figure out how large a buffer we need */
183 for (f = format; *f; f++) {
184 if (*f == '%') {
185 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000186 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000188
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
190 * they don't affect the amount of space we reserve.
191 */
192 if ((*f == 'l' || *f == 'z') &&
193 (f[1] == 'd' || f[1] == 'u'))
194 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000195
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000196 switch (*f) {
197 case 'c':
198 (void)va_arg(count, int);
199 /* fall through... */
200 case '%':
201 n++;
202 break;
203 case 'd': case 'u': case 'i': case 'x':
204 (void) va_arg(count, int);
205 /* 20 bytes is enough to hold a 64-bit
206 integer. Decimal takes the most space.
207 This isn't enough for octal. */
208 n += 20;
209 break;
210 case 's':
211 s = va_arg(count, char*);
212 n += strlen(s);
213 break;
214 case 'p':
215 (void) va_arg(count, int);
216 /* maximum 64-bit pointer representation:
217 * 0xffffffffffffffff
218 * so 19 characters is enough.
219 * XXX I count 18 -- what's the extra for?
220 */
221 n += 19;
222 break;
223 default:
224 /* if we stumble upon an unknown
225 formatting code, copy the rest of
226 the format string to the output
227 string. (we cannot just skip the
228 code, since there's no way to know
229 what's in the argument list) */
230 n += strlen(p);
231 goto expand;
232 }
233 } else
234 n++;
235 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000236 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 /* step 2: fill the buffer */
238 /* Since we've analyzed how much space we need for the worst case,
239 use sprintf directly instead of the slower PyOS_snprintf. */
240 string = PyBytes_FromStringAndSize(NULL, n);
241 if (!string)
242 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000243
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000244 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000245
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000246 for (f = format; *f; f++) {
247 if (*f == '%') {
248 const char* p = f++;
249 Py_ssize_t i;
250 int longflag = 0;
251 int size_tflag = 0;
252 /* parse the width.precision part (we're only
253 interested in the precision value, if any) */
254 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000255 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 n = (n*10) + *f++ - '0';
257 if (*f == '.') {
258 f++;
259 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000260 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000261 n = (n*10) + *f++ - '0';
262 }
David Malcolm96960882010-11-05 17:23:41 +0000263 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 f++;
265 /* handle the long flag, but only for %ld and %lu.
266 others can be added when necessary. */
267 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
268 longflag = 1;
269 ++f;
270 }
271 /* handle the size_t flag. */
272 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
273 size_tflag = 1;
274 ++f;
275 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000276
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000277 switch (*f) {
278 case 'c':
279 *s++ = va_arg(vargs, int);
280 break;
281 case 'd':
282 if (longflag)
283 sprintf(s, "%ld", va_arg(vargs, long));
284 else if (size_tflag)
285 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
286 va_arg(vargs, Py_ssize_t));
287 else
288 sprintf(s, "%d", va_arg(vargs, int));
289 s += strlen(s);
290 break;
291 case 'u':
292 if (longflag)
293 sprintf(s, "%lu",
294 va_arg(vargs, unsigned long));
295 else if (size_tflag)
296 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
297 va_arg(vargs, size_t));
298 else
299 sprintf(s, "%u",
300 va_arg(vargs, unsigned int));
301 s += strlen(s);
302 break;
303 case 'i':
304 sprintf(s, "%i", va_arg(vargs, int));
305 s += strlen(s);
306 break;
307 case 'x':
308 sprintf(s, "%x", va_arg(vargs, int));
309 s += strlen(s);
310 break;
311 case 's':
312 p = va_arg(vargs, char*);
313 i = strlen(p);
314 if (n > 0 && i > n)
315 i = n;
316 Py_MEMCPY(s, p, i);
317 s += i;
318 break;
319 case 'p':
320 sprintf(s, "%p", va_arg(vargs, void*));
321 /* %p is ill-defined: ensure leading 0x. */
322 if (s[1] == 'X')
323 s[1] = 'x';
324 else if (s[1] != 'x') {
325 memmove(s+2, s, strlen(s)+1);
326 s[0] = '0';
327 s[1] = 'x';
328 }
329 s += strlen(s);
330 break;
331 case '%':
332 *s++ = '%';
333 break;
334 default:
335 strcpy(s, p);
336 s += strlen(s);
337 goto end;
338 }
339 } else
340 *s++ = *f;
341 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000342
343 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
345 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000346}
347
348PyObject *
349PyBytes_FromFormat(const char *format, ...)
350{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000351 PyObject* ret;
352 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000353
354#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000355 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000356#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000357 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000358#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 ret = PyBytes_FromFormatV(format, vargs);
360 va_end(vargs);
361 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000362}
363
364static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000365bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000366{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000368}
369
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000370/* Unescape a backslash-escaped string. If unicode is non-zero,
371 the string is a u-literal. If recode_encoding is non-zero,
372 the string is UTF-8 encoded and should be re-encoded in the
373 specified encoding. */
374
375PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000376 Py_ssize_t len,
377 const char *errors,
378 Py_ssize_t unicode,
379 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000380{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 int c;
382 char *p, *buf;
383 const char *end;
384 PyObject *v;
385 Py_ssize_t newlen = recode_encoding ? 4*len:len;
386 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
387 if (v == NULL)
388 return NULL;
389 p = buf = PyBytes_AsString(v);
390 end = s + len;
391 while (s < end) {
392 if (*s != '\\') {
393 non_esc:
394 if (recode_encoding && (*s & 0x80)) {
395 PyObject *u, *w;
396 char *r;
397 const char* t;
398 Py_ssize_t rn;
399 t = s;
400 /* Decode non-ASCII bytes as UTF-8. */
401 while (t < end && (*t & 0x80)) t++;
402 u = PyUnicode_DecodeUTF8(s, t - s, errors);
403 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000404
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000405 /* Recode them in target encoding. */
406 w = PyUnicode_AsEncodedString(
407 u, recode_encoding, errors);
408 Py_DECREF(u);
409 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000410
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000411 /* Append bytes to output buffer. */
412 assert(PyBytes_Check(w));
413 r = PyBytes_AS_STRING(w);
414 rn = PyBytes_GET_SIZE(w);
415 Py_MEMCPY(p, r, rn);
416 p += rn;
417 Py_DECREF(w);
418 s = t;
419 } else {
420 *p++ = *s++;
421 }
422 continue;
423 }
424 s++;
425 if (s==end) {
426 PyErr_SetString(PyExc_ValueError,
427 "Trailing \\ in string");
428 goto failed;
429 }
430 switch (*s++) {
431 /* XXX This assumes ASCII! */
432 case '\n': break;
433 case '\\': *p++ = '\\'; break;
434 case '\'': *p++ = '\''; break;
435 case '\"': *p++ = '\"'; break;
436 case 'b': *p++ = '\b'; break;
437 case 'f': *p++ = '\014'; break; /* FF */
438 case 't': *p++ = '\t'; break;
439 case 'n': *p++ = '\n'; break;
440 case 'r': *p++ = '\r'; break;
441 case 'v': *p++ = '\013'; break; /* VT */
442 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
443 case '0': case '1': case '2': case '3':
444 case '4': case '5': case '6': case '7':
445 c = s[-1] - '0';
446 if (s < end && '0' <= *s && *s <= '7') {
447 c = (c<<3) + *s++ - '0';
448 if (s < end && '0' <= *s && *s <= '7')
449 c = (c<<3) + *s++ - '0';
450 }
451 *p++ = c;
452 break;
453 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000454 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000455 unsigned int x = 0;
456 c = Py_CHARMASK(*s);
457 s++;
David Malcolm96960882010-11-05 17:23:41 +0000458 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000460 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 x = 10 + c - 'a';
462 else
463 x = 10 + c - 'A';
464 x = x << 4;
465 c = Py_CHARMASK(*s);
466 s++;
David Malcolm96960882010-11-05 17:23:41 +0000467 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000468 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000469 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000470 x += 10 + c - 'a';
471 else
472 x += 10 + c - 'A';
473 *p++ = x;
474 break;
475 }
476 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200477 PyErr_Format(PyExc_ValueError,
478 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +0200479 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000480 goto failed;
481 }
482 if (strcmp(errors, "replace") == 0) {
483 *p++ = '?';
484 } else if (strcmp(errors, "ignore") == 0)
485 /* do nothing */;
486 else {
487 PyErr_Format(PyExc_ValueError,
488 "decoding error; unknown "
489 "error handling code: %.400s",
490 errors);
491 goto failed;
492 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +0200493 /* skip \x */
494 if (s < end && Py_ISXDIGIT(s[0]))
495 s++; /* and a hexdigit */
496 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000497 default:
498 *p++ = '\\';
499 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200500 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000501 UTF-8 bytes may follow. */
502 }
503 }
504 if (p-buf < newlen)
505 _PyBytes_Resize(&v, p - buf);
506 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000507 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000508 Py_DECREF(v);
509 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000510}
511
512/* -------------------------------------------------------------------- */
513/* object api */
514
515Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200516PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000517{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000518 if (!PyBytes_Check(op)) {
519 PyErr_Format(PyExc_TypeError,
520 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
521 return -1;
522 }
523 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000524}
525
526char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200527PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000528{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000529 if (!PyBytes_Check(op)) {
530 PyErr_Format(PyExc_TypeError,
531 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
532 return NULL;
533 }
534 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000535}
536
537int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200538PyBytes_AsStringAndSize(PyObject *obj,
539 char **s,
540 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000541{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000542 if (s == NULL) {
543 PyErr_BadInternalCall();
544 return -1;
545 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000546
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000547 if (!PyBytes_Check(obj)) {
548 PyErr_Format(PyExc_TypeError,
549 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
550 return -1;
551 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000552
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000553 *s = PyBytes_AS_STRING(obj);
554 if (len != NULL)
555 *len = PyBytes_GET_SIZE(obj);
556 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
557 PyErr_SetString(PyExc_TypeError,
558 "expected bytes with no null");
559 return -1;
560 }
561 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000562}
Neal Norwitz6968b052007-02-27 19:02:19 +0000563
564/* -------------------------------------------------------------------- */
565/* Methods */
566
Eric Smith0923d1d2009-04-16 20:16:10 +0000567#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000568
569#include "stringlib/fastsearch.h"
570#include "stringlib/count.h"
571#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +0200572#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000573#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000574#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000575#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000576
Eric Smith0f78bff2009-11-30 01:01:42 +0000577#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000578
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000579PyObject *
580PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000581{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200582 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200583 Py_ssize_t i, length = Py_SIZE(op);
584 size_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000585 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200586 unsigned char quote, *s, *p;
587
588 /* Compute size of output string */
589 squotes = dquotes = 0;
590 newsize = 3; /* b'' */
591 s = (unsigned char*)op->ob_sval;
592 for (i = 0; i < length; i++) {
593 switch(s[i]) {
594 case '\'': squotes++; newsize++; break;
595 case '"': dquotes++; newsize++; break;
596 case '\\': case '\t': case '\n': case '\r':
597 newsize += 2; break; /* \C */
598 default:
599 if (s[i] < ' ' || s[i] >= 0x7f)
600 newsize += 4; /* \xHH */
601 else
602 newsize++;
603 }
604 }
605 quote = '\'';
606 if (smartquotes && squotes && !dquotes)
607 quote = '"';
608 if (squotes && quote == '\'')
609 newsize += squotes;
Victor Stinner6430fd52011-09-29 04:02:13 +0200610
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200611 if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000612 PyErr_SetString(PyExc_OverflowError,
613 "bytes object is too large to make repr");
614 return NULL;
615 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200616
617 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000618 if (v == NULL) {
619 return NULL;
620 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200621 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000622
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200623 *p++ = 'b', *p++ = quote;
624 for (i = 0; i < length; i++) {
625 unsigned char c = op->ob_sval[i];
626 if (c == quote || c == '\\')
627 *p++ = '\\', *p++ = c;
628 else if (c == '\t')
629 *p++ = '\\', *p++ = 't';
630 else if (c == '\n')
631 *p++ = '\\', *p++ = 'n';
632 else if (c == '\r')
633 *p++ = '\\', *p++ = 'r';
634 else if (c < ' ' || c >= 0x7f) {
635 *p++ = '\\';
636 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200637 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
638 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000639 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200640 else
641 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000642 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200643 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +0200644 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200645 return v;
Neal Norwitz6968b052007-02-27 19:02:19 +0000646}
647
Neal Norwitz6968b052007-02-27 19:02:19 +0000648static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000649bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000650{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000651 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000652}
653
Neal Norwitz6968b052007-02-27 19:02:19 +0000654static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000655bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000656{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000657 if (Py_BytesWarningFlag) {
658 if (PyErr_WarnEx(PyExc_BytesWarning,
659 "str() on a bytes instance", 1))
660 return NULL;
661 }
662 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000663}
664
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000665static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000666bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000667{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000668 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000669}
Neal Norwitz6968b052007-02-27 19:02:19 +0000670
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000671/* This is also used by PyBytes_Concat() */
672static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000673bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000674{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000675 Py_ssize_t size;
676 Py_buffer va, vb;
677 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000678
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000679 va.len = -1;
680 vb.len = -1;
681 if (_getbuffer(a, &va) < 0 ||
682 _getbuffer(b, &vb) < 0) {
683 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
684 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
685 goto done;
686 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000687
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000688 /* Optimize end cases */
689 if (va.len == 0 && PyBytes_CheckExact(b)) {
690 result = b;
691 Py_INCREF(result);
692 goto done;
693 }
694 if (vb.len == 0 && PyBytes_CheckExact(a)) {
695 result = a;
696 Py_INCREF(result);
697 goto done;
698 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000699
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000700 size = va.len + vb.len;
701 if (size < 0) {
702 PyErr_NoMemory();
703 goto done;
704 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000705
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000706 result = PyBytes_FromStringAndSize(NULL, size);
707 if (result != NULL) {
708 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
709 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
710 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000711
712 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000713 if (va.len != -1)
714 PyBuffer_Release(&va);
715 if (vb.len != -1)
716 PyBuffer_Release(&vb);
717 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000718}
Neal Norwitz6968b052007-02-27 19:02:19 +0000719
720static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200721bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000722{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200723 Py_ssize_t i;
724 Py_ssize_t j;
725 Py_ssize_t size;
726 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000727 size_t nbytes;
728 if (n < 0)
729 n = 0;
730 /* watch out for overflows: the size can overflow int,
731 * and the # of bytes needed can overflow size_t
732 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000733 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000734 PyErr_SetString(PyExc_OverflowError,
735 "repeated bytes are too long");
736 return NULL;
737 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000738 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000739 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
740 Py_INCREF(a);
741 return (PyObject *)a;
742 }
743 nbytes = (size_t)size;
744 if (nbytes + PyBytesObject_SIZE <= nbytes) {
745 PyErr_SetString(PyExc_OverflowError,
746 "repeated bytes are too long");
747 return NULL;
748 }
749 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
750 if (op == NULL)
751 return PyErr_NoMemory();
752 PyObject_INIT_VAR(op, &PyBytes_Type, size);
753 op->ob_shash = -1;
754 op->ob_sval[size] = '\0';
755 if (Py_SIZE(a) == 1 && n > 0) {
756 memset(op->ob_sval, a->ob_sval[0] , n);
757 return (PyObject *) op;
758 }
759 i = 0;
760 if (i < size) {
761 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
762 i = Py_SIZE(a);
763 }
764 while (i < size) {
765 j = (i <= size-i) ? i : size-i;
766 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
767 i += j;
768 }
769 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000770}
771
Guido van Rossum98297ee2007-11-06 21:34:58 +0000772static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000773bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000774{
775 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
776 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000777 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000778 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000779 PyErr_Clear();
780 if (_getbuffer(arg, &varg) < 0)
781 return -1;
782 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
783 varg.buf, varg.len, 0);
784 PyBuffer_Release(&varg);
785 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000786 }
787 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000788 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
789 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000790 }
791
Antoine Pitrou0010d372010-08-15 17:12:55 +0000792 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000793}
794
Neal Norwitz6968b052007-02-27 19:02:19 +0000795static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200796bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000797{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000798 if (i < 0 || i >= Py_SIZE(a)) {
799 PyErr_SetString(PyExc_IndexError, "index out of range");
800 return NULL;
801 }
802 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000803}
804
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100805Py_LOCAL(int)
806bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
807{
808 int cmp;
809 Py_ssize_t len;
810
811 len = Py_SIZE(a);
812 if (Py_SIZE(b) != len)
813 return 0;
814
815 if (a->ob_sval[0] != b->ob_sval[0])
816 return 0;
817
818 cmp = memcmp(a->ob_sval, b->ob_sval, len);
819 return (cmp == 0);
820}
821
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000822static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000823bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000824{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000825 int c;
826 Py_ssize_t len_a, len_b;
827 Py_ssize_t min_len;
828 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000829
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000830 /* Make sure both arguments are strings. */
831 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
832 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
833 (PyObject_IsInstance((PyObject*)a,
834 (PyObject*)&PyUnicode_Type) ||
835 PyObject_IsInstance((PyObject*)b,
836 (PyObject*)&PyUnicode_Type))) {
837 if (PyErr_WarnEx(PyExc_BytesWarning,
838 "Comparison between bytes and string", 1))
839 return NULL;
840 }
841 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000842 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100843 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000844 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100845 case Py_EQ:
846 case Py_LE:
847 case Py_GE:
848 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000849 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100850 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100851 case Py_NE:
852 case Py_LT:
853 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000854 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100855 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100856 default:
857 PyErr_BadArgument();
858 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000859 }
860 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100861 else if (op == Py_EQ || op == Py_NE) {
862 int eq = bytes_compare_eq(a, b);
863 eq ^= (op == Py_NE);
864 result = eq ? Py_True : Py_False;
865 }
866 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100867 len_a = Py_SIZE(a);
868 len_b = Py_SIZE(b);
869 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100870 if (min_len > 0) {
871 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100872 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100873 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000874 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100875 else
876 c = 0;
877 if (c == 0)
878 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
879 switch (op) {
880 case Py_LT: c = c < 0; break;
881 case Py_LE: c = c <= 0; break;
882 case Py_GT: c = c > 0; break;
883 case Py_GE: c = c >= 0; break;
884 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100885 PyErr_BadArgument();
886 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100887 }
888 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000889 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100890
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000891 Py_INCREF(result);
892 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000893}
894
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000895static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000896bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000897{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100898 if (a->ob_shash == -1) {
899 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +0100900 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100901 }
902 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +0000903}
904
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000905static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000906bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000907{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000908 if (PyIndex_Check(item)) {
909 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
910 if (i == -1 && PyErr_Occurred())
911 return NULL;
912 if (i < 0)
913 i += PyBytes_GET_SIZE(self);
914 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
915 PyErr_SetString(PyExc_IndexError,
916 "index out of range");
917 return NULL;
918 }
919 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
920 }
921 else if (PySlice_Check(item)) {
922 Py_ssize_t start, stop, step, slicelength, cur, i;
923 char* source_buf;
924 char* result_buf;
925 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000926
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000927 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000928 PyBytes_GET_SIZE(self),
929 &start, &stop, &step, &slicelength) < 0) {
930 return NULL;
931 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000932
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000933 if (slicelength <= 0) {
934 return PyBytes_FromStringAndSize("", 0);
935 }
936 else if (start == 0 && step == 1 &&
937 slicelength == PyBytes_GET_SIZE(self) &&
938 PyBytes_CheckExact(self)) {
939 Py_INCREF(self);
940 return (PyObject *)self;
941 }
942 else if (step == 1) {
943 return PyBytes_FromStringAndSize(
944 PyBytes_AS_STRING(self) + start,
945 slicelength);
946 }
947 else {
948 source_buf = PyBytes_AS_STRING(self);
949 result = PyBytes_FromStringAndSize(NULL, slicelength);
950 if (result == NULL)
951 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000952
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000953 result_buf = PyBytes_AS_STRING(result);
954 for (cur = start, i = 0; i < slicelength;
955 cur += step, i++) {
956 result_buf[i] = source_buf[cur];
957 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000958
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000959 return result;
960 }
961 }
962 else {
963 PyErr_Format(PyExc_TypeError,
964 "byte indices must be integers, not %.200s",
965 Py_TYPE(item)->tp_name);
966 return NULL;
967 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000968}
969
970static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000971bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000972{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000973 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
974 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000975}
976
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000977static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000978 (lenfunc)bytes_length, /*sq_length*/
979 (binaryfunc)bytes_concat, /*sq_concat*/
980 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
981 (ssizeargfunc)bytes_item, /*sq_item*/
982 0, /*sq_slice*/
983 0, /*sq_ass_item*/
984 0, /*sq_ass_slice*/
985 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000986};
987
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000988static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000989 (lenfunc)bytes_length,
990 (binaryfunc)bytes_subscript,
991 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000992};
993
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000994static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000995 (getbufferproc)bytes_buffer_getbuffer,
996 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000997};
998
999
1000#define LEFTSTRIP 0
1001#define RIGHTSTRIP 1
1002#define BOTHSTRIP 2
1003
1004/* Arrays indexed by above */
1005static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1006
1007#define STRIPNAME(i) (stripformat[i]+3)
1008
Neal Norwitz6968b052007-02-27 19:02:19 +00001009PyDoc_STRVAR(split__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001010"B.split(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001011\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001012Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001013If sep is not specified or is None, B is split on ASCII whitespace\n\
1014characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001015If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001016
1017static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001018bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +00001019{
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001020 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001021 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1022 Py_ssize_t maxsplit = -1;
1023 const char *s = PyBytes_AS_STRING(self), *sub;
1024 Py_buffer vsub;
1025 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001026
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001027 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split",
1028 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001029 return NULL;
1030 if (maxsplit < 0)
1031 maxsplit = PY_SSIZE_T_MAX;
1032 if (subobj == Py_None)
1033 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1034 if (_getbuffer(subobj, &vsub) < 0)
1035 return NULL;
1036 sub = vsub.buf;
1037 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001038
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001039 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1040 PyBuffer_Release(&vsub);
1041 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001042}
1043
Neal Norwitz6968b052007-02-27 19:02:19 +00001044PyDoc_STRVAR(partition__doc__,
1045"B.partition(sep) -> (head, sep, tail)\n\
1046\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001047Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001048the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001049found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001050
1051static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001052bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001053{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001054 const char *sep;
1055 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001056
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001057 if (PyBytes_Check(sep_obj)) {
1058 sep = PyBytes_AS_STRING(sep_obj);
1059 sep_len = PyBytes_GET_SIZE(sep_obj);
1060 }
1061 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1062 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001063
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001064 return stringlib_partition(
1065 (PyObject*) self,
1066 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1067 sep_obj, sep, sep_len
1068 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001069}
1070
1071PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001072"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001073\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001074Search for the separator sep in B, starting at the end of B,\n\
1075and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001076part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001077bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001078
1079static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001080bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001081{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001082 const char *sep;
1083 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001085 if (PyBytes_Check(sep_obj)) {
1086 sep = PyBytes_AS_STRING(sep_obj);
1087 sep_len = PyBytes_GET_SIZE(sep_obj);
1088 }
1089 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1090 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 return stringlib_rpartition(
1093 (PyObject*) self,
1094 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1095 sep_obj, sep, sep_len
1096 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001097}
1098
Neal Norwitz6968b052007-02-27 19:02:19 +00001099PyDoc_STRVAR(rsplit__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001100"B.rsplit(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001101\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001102Return a list of the sections in B, using sep as the delimiter,\n\
1103starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001104If sep is not given, B is split on ASCII whitespace characters\n\
1105(space, tab, return, newline, formfeed, vertical tab).\n\
1106If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001107
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001108
Neal Norwitz6968b052007-02-27 19:02:19 +00001109static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001110bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +00001111{
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001112 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001113 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1114 Py_ssize_t maxsplit = -1;
1115 const char *s = PyBytes_AS_STRING(self), *sub;
1116 Py_buffer vsub;
1117 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001118
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001119 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit",
1120 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001121 return NULL;
1122 if (maxsplit < 0)
1123 maxsplit = PY_SSIZE_T_MAX;
1124 if (subobj == Py_None)
1125 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1126 if (_getbuffer(subobj, &vsub) < 0)
1127 return NULL;
1128 sub = vsub.buf;
1129 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001130
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001131 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1132 PyBuffer_Release(&vsub);
1133 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001134}
1135
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001136
1137PyDoc_STRVAR(join__doc__,
1138"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001139\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001140Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001141Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1142
Neal Norwitz6968b052007-02-27 19:02:19 +00001143static PyObject *
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001144bytes_join(PyObject *self, PyObject *iterable)
Neal Norwitz6968b052007-02-27 19:02:19 +00001145{
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001146 return stringlib_bytes_join(self, iterable);
Neal Norwitz6968b052007-02-27 19:02:19 +00001147}
1148
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001149PyObject *
1150_PyBytes_Join(PyObject *sep, PyObject *x)
1151{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001152 assert(sep != NULL && PyBytes_Check(sep));
1153 assert(x != NULL);
1154 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001155}
1156
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001157/* helper macro to fixup start/end slice values */
1158#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001159 if (end > len) \
1160 end = len; \
1161 else if (end < 0) { \
1162 end += len; \
1163 if (end < 0) \
1164 end = 0; \
1165 } \
1166 if (start < 0) { \
1167 start += len; \
1168 if (start < 0) \
1169 start = 0; \
1170 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001171
1172Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001173bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001174{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001175 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001176 char byte;
1177 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001178 const char *sub;
1179 Py_ssize_t sub_len;
1180 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001181 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001182
Antoine Pitrouac65d962011-10-20 23:54:17 +02001183 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1184 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001185 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001186
Antoine Pitrouac65d962011-10-20 23:54:17 +02001187 if (subobj) {
1188 if (_getbuffer(subobj, &subbuf) < 0)
1189 return -2;
1190
1191 sub = subbuf.buf;
1192 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001193 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001194 else {
1195 sub = &byte;
1196 sub_len = 1;
1197 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001198
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001199 if (dir > 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001200 res = stringlib_find_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001201 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1202 sub, sub_len, start, end);
1203 else
Antoine Pitrouac65d962011-10-20 23:54:17 +02001204 res = stringlib_rfind_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001205 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1206 sub, sub_len, start, end);
Antoine Pitrouac65d962011-10-20 23:54:17 +02001207
1208 if (subobj)
1209 PyBuffer_Release(&subbuf);
1210
1211 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001212}
1213
1214
1215PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001216"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001217\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001218Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001219such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001220arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001221\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001222Return -1 on failure.");
1223
Neal Norwitz6968b052007-02-27 19:02:19 +00001224static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001225bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001226{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001227 Py_ssize_t result = bytes_find_internal(self, args, +1);
1228 if (result == -2)
1229 return NULL;
1230 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001231}
1232
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001233
1234PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001235"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001236\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001237Like B.find() but raise ValueError when the substring is not found.");
1238
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001239static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001240bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001241{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001242 Py_ssize_t result = bytes_find_internal(self, args, +1);
1243 if (result == -2)
1244 return NULL;
1245 if (result == -1) {
1246 PyErr_SetString(PyExc_ValueError,
1247 "substring not found");
1248 return NULL;
1249 }
1250 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001251}
1252
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001253
1254PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001255"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001256\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001257Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001258such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001259arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001260\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001261Return -1 on failure.");
1262
Neal Norwitz6968b052007-02-27 19:02:19 +00001263static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001264bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001265{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001266 Py_ssize_t result = bytes_find_internal(self, args, -1);
1267 if (result == -2)
1268 return NULL;
1269 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001270}
1271
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001272
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001273PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001274"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001275\n\
1276Like B.rfind() but raise ValueError when the substring is not found.");
1277
1278static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001279bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001280{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001281 Py_ssize_t result = bytes_find_internal(self, args, -1);
1282 if (result == -2)
1283 return NULL;
1284 if (result == -1) {
1285 PyErr_SetString(PyExc_ValueError,
1286 "substring not found");
1287 return NULL;
1288 }
1289 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001290}
1291
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001292
1293Py_LOCAL_INLINE(PyObject *)
1294do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001295{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001296 Py_buffer vsep;
1297 char *s = PyBytes_AS_STRING(self);
1298 Py_ssize_t len = PyBytes_GET_SIZE(self);
1299 char *sep;
1300 Py_ssize_t seplen;
1301 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001302
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001303 if (_getbuffer(sepobj, &vsep) < 0)
1304 return NULL;
1305 sep = vsep.buf;
1306 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001307
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001308 i = 0;
1309 if (striptype != RIGHTSTRIP) {
1310 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1311 i++;
1312 }
1313 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001314
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001315 j = len;
1316 if (striptype != LEFTSTRIP) {
1317 do {
1318 j--;
1319 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1320 j++;
1321 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001322
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001323 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001324
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001325 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1326 Py_INCREF(self);
1327 return (PyObject*)self;
1328 }
1329 else
1330 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001331}
1332
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001333
1334Py_LOCAL_INLINE(PyObject *)
1335do_strip(PyBytesObject *self, int striptype)
1336{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001337 char *s = PyBytes_AS_STRING(self);
1338 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001339
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001340 i = 0;
1341 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001342 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001343 i++;
1344 }
1345 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001346
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001347 j = len;
1348 if (striptype != LEFTSTRIP) {
1349 do {
1350 j--;
David Malcolm96960882010-11-05 17:23:41 +00001351 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001352 j++;
1353 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001354
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001355 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1356 Py_INCREF(self);
1357 return (PyObject*)self;
1358 }
1359 else
1360 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001361}
1362
1363
1364Py_LOCAL_INLINE(PyObject *)
1365do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1366{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001367 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001368
Serhiy Storchakac6792272013-10-19 21:03:34 +03001369 if (!PyArg_ParseTuple(args, stripformat[striptype], &sep))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001371
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001372 if (sep != NULL && sep != Py_None) {
1373 return do_xstrip(self, striptype, sep);
1374 }
1375 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001376}
1377
1378
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001379PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001380"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001381\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001382Strip leading and trailing bytes contained in the argument.\n\
Georg Brandlbeca27a2012-01-22 21:31:21 +01001383If the argument is omitted, strip leading and trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001384static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001385bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001386{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001387 if (PyTuple_GET_SIZE(args) == 0)
1388 return do_strip(self, BOTHSTRIP); /* Common case */
1389 else
1390 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001391}
1392
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001393
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001394PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001395"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001396\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001397Strip leading bytes contained in the argument.\n\
1398If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001399static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001400bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001401{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001402 if (PyTuple_GET_SIZE(args) == 0)
1403 return do_strip(self, LEFTSTRIP); /* Common case */
1404 else
1405 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001406}
1407
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001408
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001409PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001410"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001411\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001412Strip trailing bytes contained in the argument.\n\
1413If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001414static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001415bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001416{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 if (PyTuple_GET_SIZE(args) == 0)
1418 return do_strip(self, RIGHTSTRIP); /* Common case */
1419 else
1420 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001421}
Neal Norwitz6968b052007-02-27 19:02:19 +00001422
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001423
1424PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001425"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001426\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001427Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001428string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001429as in slice notation.");
1430
1431static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001432bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001433{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001434 PyObject *sub_obj;
1435 const char *str = PyBytes_AS_STRING(self), *sub;
1436 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001437 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001438 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001439
Antoine Pitrouac65d962011-10-20 23:54:17 +02001440 Py_buffer vsub;
1441 PyObject *count_obj;
1442
1443 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
1444 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001445 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001446
Antoine Pitrouac65d962011-10-20 23:54:17 +02001447 if (sub_obj) {
1448 if (_getbuffer(sub_obj, &vsub) < 0)
1449 return NULL;
1450
1451 sub = vsub.buf;
1452 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001453 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001454 else {
1455 sub = &byte;
1456 sub_len = 1;
1457 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001458
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001459 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001460
Antoine Pitrouac65d962011-10-20 23:54:17 +02001461 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001462 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1463 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02001464
1465 if (sub_obj)
1466 PyBuffer_Release(&vsub);
1467
1468 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001469}
1470
1471
1472PyDoc_STRVAR(translate__doc__,
1473"B.translate(table[, deletechars]) -> bytes\n\
1474\n\
1475Return a copy of B, where all characters occurring in the\n\
1476optional argument deletechars are removed, and the remaining\n\
1477characters have been mapped through the given translation\n\
1478table, which must be a bytes object of length 256.");
1479
1480static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001481bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001482{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001483 char *input, *output;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001484 const char *table;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001485 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001486 PyObject *input_obj = (PyObject*)self;
1487 const char *output_start, *del_table=NULL;
1488 Py_ssize_t inlen, tablen, dellen = 0;
1489 PyObject *result;
1490 int trans_table[256];
1491 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001492
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001493 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1494 &tableobj, &delobj))
1495 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001496
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001497 if (PyBytes_Check(tableobj)) {
1498 table = PyBytes_AS_STRING(tableobj);
1499 tablen = PyBytes_GET_SIZE(tableobj);
1500 }
1501 else if (tableobj == Py_None) {
1502 table = NULL;
1503 tablen = 256;
1504 }
1505 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1506 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001507
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001508 if (tablen != 256) {
1509 PyErr_SetString(PyExc_ValueError,
1510 "translation table must be 256 characters long");
1511 return NULL;
1512 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001513
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001514 if (delobj != NULL) {
1515 if (PyBytes_Check(delobj)) {
1516 del_table = PyBytes_AS_STRING(delobj);
1517 dellen = PyBytes_GET_SIZE(delobj);
1518 }
1519 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1520 return NULL;
1521 }
1522 else {
1523 del_table = NULL;
1524 dellen = 0;
1525 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001526
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001527 inlen = PyBytes_GET_SIZE(input_obj);
1528 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1529 if (result == NULL)
1530 return NULL;
1531 output_start = output = PyBytes_AsString(result);
1532 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001533
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001534 if (dellen == 0 && table != NULL) {
1535 /* If no deletions are required, use faster code */
1536 for (i = inlen; --i >= 0; ) {
1537 c = Py_CHARMASK(*input++);
1538 if (Py_CHARMASK((*output++ = table[c])) != c)
1539 changed = 1;
1540 }
1541 if (changed || !PyBytes_CheckExact(input_obj))
1542 return result;
1543 Py_DECREF(result);
1544 Py_INCREF(input_obj);
1545 return input_obj;
1546 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001547
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001548 if (table == NULL) {
1549 for (i = 0; i < 256; i++)
1550 trans_table[i] = Py_CHARMASK(i);
1551 } else {
1552 for (i = 0; i < 256; i++)
1553 trans_table[i] = Py_CHARMASK(table[i]);
1554 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001555
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001556 for (i = 0; i < dellen; i++)
1557 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001558
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001559 for (i = inlen; --i >= 0; ) {
1560 c = Py_CHARMASK(*input++);
1561 if (trans_table[c] != -1)
1562 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1563 continue;
1564 changed = 1;
1565 }
1566 if (!changed && PyBytes_CheckExact(input_obj)) {
1567 Py_DECREF(result);
1568 Py_INCREF(input_obj);
1569 return input_obj;
1570 }
1571 /* Fix the size of the resulting string */
1572 if (inlen > 0)
1573 _PyBytes_Resize(&result, output - output_start);
1574 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001575}
1576
1577
Georg Brandlabc38772009-04-12 15:51:51 +00001578static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001579bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001580{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001581 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001582}
1583
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001584/* find and count characters and substrings */
1585
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001586#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001587 ((char *)memchr((const void *)(target), c, target_len))
1588
1589/* String ops must return a string. */
1590/* If the object is subclass of string, create a copy */
1591Py_LOCAL(PyBytesObject *)
1592return_self(PyBytesObject *self)
1593{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001594 if (PyBytes_CheckExact(self)) {
1595 Py_INCREF(self);
1596 return self;
1597 }
1598 return (PyBytesObject *)PyBytes_FromStringAndSize(
1599 PyBytes_AS_STRING(self),
1600 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001601}
1602
1603Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001604countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001605{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001606 Py_ssize_t count=0;
1607 const char *start=target;
1608 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001609
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001610 while ( (start=findchar(start, end-start, c)) != NULL ) {
1611 count++;
1612 if (count >= maxcount)
1613 break;
1614 start += 1;
1615 }
1616 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001617}
1618
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001619
1620/* Algorithms for different cases of string replacement */
1621
1622/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1623Py_LOCAL(PyBytesObject *)
1624replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001625 const char *to_s, Py_ssize_t to_len,
1626 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001627{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001628 char *self_s, *result_s;
1629 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001630 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001631 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001632
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001633 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001634
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001635 /* 1 at the end plus 1 after every character;
1636 count = min(maxcount, self_len + 1) */
1637 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001638 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001639 else
1640 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1641 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001642
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643 /* Check for overflow */
1644 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001645 assert(count > 0);
1646 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001647 PyErr_SetString(PyExc_OverflowError,
1648 "replacement bytes are too long");
1649 return NULL;
1650 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001651 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001652
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001653 if (! (result = (PyBytesObject *)
1654 PyBytes_FromStringAndSize(NULL, result_len)) )
1655 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001656
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001657 self_s = PyBytes_AS_STRING(self);
1658 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001659
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001660 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001661
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001662 /* Lay the first one down (guaranteed this will occur) */
1663 Py_MEMCPY(result_s, to_s, to_len);
1664 result_s += to_len;
1665 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001666
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001667 for (i=0; i<count; i++) {
1668 *result_s++ = *self_s++;
1669 Py_MEMCPY(result_s, to_s, to_len);
1670 result_s += to_len;
1671 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001672
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001673 /* Copy the rest of the original string */
1674 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001675
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001676 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001677}
1678
1679/* Special case for deleting a single character */
1680/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1681Py_LOCAL(PyBytesObject *)
1682replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001683 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001684{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001685 char *self_s, *result_s;
1686 char *start, *next, *end;
1687 Py_ssize_t self_len, result_len;
1688 Py_ssize_t count;
1689 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001691 self_len = PyBytes_GET_SIZE(self);
1692 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001693
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001694 count = countchar(self_s, self_len, from_c, maxcount);
1695 if (count == 0) {
1696 return return_self(self);
1697 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001699 result_len = self_len - count; /* from_len == 1 */
1700 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001701
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001702 if ( (result = (PyBytesObject *)
1703 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1704 return NULL;
1705 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001706
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001707 start = self_s;
1708 end = self_s + self_len;
1709 while (count-- > 0) {
1710 next = findchar(start, end-start, from_c);
1711 if (next == NULL)
1712 break;
1713 Py_MEMCPY(result_s, start, next-start);
1714 result_s += (next-start);
1715 start = next+1;
1716 }
1717 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001718
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001719 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001720}
1721
1722/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1723
1724Py_LOCAL(PyBytesObject *)
1725replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001726 const char *from_s, Py_ssize_t from_len,
1727 Py_ssize_t maxcount) {
1728 char *self_s, *result_s;
1729 char *start, *next, *end;
1730 Py_ssize_t self_len, result_len;
1731 Py_ssize_t count, offset;
1732 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001733
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001734 self_len = PyBytes_GET_SIZE(self);
1735 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001736
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001737 count = stringlib_count(self_s, self_len,
1738 from_s, from_len,
1739 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001740
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001741 if (count == 0) {
1742 /* no matches */
1743 return return_self(self);
1744 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001745
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001746 result_len = self_len - (count * from_len);
1747 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001748
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001749 if ( (result = (PyBytesObject *)
1750 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1751 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001752
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001753 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001754
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001755 start = self_s;
1756 end = self_s + self_len;
1757 while (count-- > 0) {
1758 offset = stringlib_find(start, end-start,
1759 from_s, from_len,
1760 0);
1761 if (offset == -1)
1762 break;
1763 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001764
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001765 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001766
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001767 result_s += (next-start);
1768 start = next+from_len;
1769 }
1770 Py_MEMCPY(result_s, start, end-start);
1771 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001772}
1773
1774/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1775Py_LOCAL(PyBytesObject *)
1776replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001777 char from_c, char to_c,
1778 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001779{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001780 char *self_s, *result_s, *start, *end, *next;
1781 Py_ssize_t self_len;
1782 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001783
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001784 /* The result string will be the same size */
1785 self_s = PyBytes_AS_STRING(self);
1786 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001787
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001788 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001789
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001790 if (next == NULL) {
1791 /* No matches; return the original string */
1792 return return_self(self);
1793 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001794
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001795 /* Need to make a new string */
1796 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1797 if (result == NULL)
1798 return NULL;
1799 result_s = PyBytes_AS_STRING(result);
1800 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001801
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001802 /* change everything in-place, starting with this one */
1803 start = result_s + (next-self_s);
1804 *start = to_c;
1805 start++;
1806 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001807
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001808 while (--maxcount > 0) {
1809 next = findchar(start, end-start, from_c);
1810 if (next == NULL)
1811 break;
1812 *next = to_c;
1813 start = next+1;
1814 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001815
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001816 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001817}
1818
1819/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1820Py_LOCAL(PyBytesObject *)
1821replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001822 const char *from_s, Py_ssize_t from_len,
1823 const char *to_s, Py_ssize_t to_len,
1824 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001825{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001826 char *result_s, *start, *end;
1827 char *self_s;
1828 Py_ssize_t self_len, offset;
1829 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001830
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001831 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001832
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001833 self_s = PyBytes_AS_STRING(self);
1834 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001835
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001836 offset = stringlib_find(self_s, self_len,
1837 from_s, from_len,
1838 0);
1839 if (offset == -1) {
1840 /* No matches; return the original string */
1841 return return_self(self);
1842 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001843
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001844 /* Need to make a new string */
1845 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1846 if (result == NULL)
1847 return NULL;
1848 result_s = PyBytes_AS_STRING(result);
1849 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001850
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001851 /* change everything in-place, starting with this one */
1852 start = result_s + offset;
1853 Py_MEMCPY(start, to_s, from_len);
1854 start += from_len;
1855 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001856
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001857 while ( --maxcount > 0) {
1858 offset = stringlib_find(start, end-start,
1859 from_s, from_len,
1860 0);
1861 if (offset==-1)
1862 break;
1863 Py_MEMCPY(start+offset, to_s, from_len);
1864 start += offset+from_len;
1865 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001866
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001867 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001868}
1869
1870/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1871Py_LOCAL(PyBytesObject *)
1872replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001873 char from_c,
1874 const char *to_s, Py_ssize_t to_len,
1875 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001876{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001877 char *self_s, *result_s;
1878 char *start, *next, *end;
1879 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001880 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001881 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001882
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001883 self_s = PyBytes_AS_STRING(self);
1884 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001885
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001886 count = countchar(self_s, self_len, from_c, maxcount);
1887 if (count == 0) {
1888 /* no matches, return unchanged */
1889 return return_self(self);
1890 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001891
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001892 /* use the difference between current and new, hence the "-1" */
1893 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001894 assert(count > 0);
1895 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001896 PyErr_SetString(PyExc_OverflowError,
1897 "replacement bytes are too long");
1898 return NULL;
1899 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001900 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001901
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001902 if ( (result = (PyBytesObject *)
1903 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1904 return NULL;
1905 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001906
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001907 start = self_s;
1908 end = self_s + self_len;
1909 while (count-- > 0) {
1910 next = findchar(start, end-start, from_c);
1911 if (next == NULL)
1912 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001913
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001914 if (next == start) {
1915 /* replace with the 'to' */
1916 Py_MEMCPY(result_s, to_s, to_len);
1917 result_s += to_len;
1918 start += 1;
1919 } else {
1920 /* copy the unchanged old then the 'to' */
1921 Py_MEMCPY(result_s, start, next-start);
1922 result_s += (next-start);
1923 Py_MEMCPY(result_s, to_s, to_len);
1924 result_s += to_len;
1925 start = next+1;
1926 }
1927 }
1928 /* Copy the remainder of the remaining string */
1929 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001930
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001931 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001932}
1933
1934/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1935Py_LOCAL(PyBytesObject *)
1936replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001937 const char *from_s, Py_ssize_t from_len,
1938 const char *to_s, Py_ssize_t to_len,
1939 Py_ssize_t maxcount) {
1940 char *self_s, *result_s;
1941 char *start, *next, *end;
1942 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001943 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001944 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001945
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001946 self_s = PyBytes_AS_STRING(self);
1947 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001948
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001949 count = stringlib_count(self_s, self_len,
1950 from_s, from_len,
1951 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001952
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001953 if (count == 0) {
1954 /* no matches, return unchanged */
1955 return return_self(self);
1956 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001957
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001958 /* Check for overflow */
1959 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001960 assert(count > 0);
1961 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001962 PyErr_SetString(PyExc_OverflowError,
1963 "replacement bytes are too long");
1964 return NULL;
1965 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001966 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001967
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001968 if ( (result = (PyBytesObject *)
1969 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1970 return NULL;
1971 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001972
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001973 start = self_s;
1974 end = self_s + self_len;
1975 while (count-- > 0) {
1976 offset = stringlib_find(start, end-start,
1977 from_s, from_len,
1978 0);
1979 if (offset == -1)
1980 break;
1981 next = start+offset;
1982 if (next == start) {
1983 /* replace with the 'to' */
1984 Py_MEMCPY(result_s, to_s, to_len);
1985 result_s += to_len;
1986 start += from_len;
1987 } else {
1988 /* copy the unchanged old then the 'to' */
1989 Py_MEMCPY(result_s, start, next-start);
1990 result_s += (next-start);
1991 Py_MEMCPY(result_s, to_s, to_len);
1992 result_s += to_len;
1993 start = next+from_len;
1994 }
1995 }
1996 /* Copy the remainder of the remaining string */
1997 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001998
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001999 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002000}
2001
2002
2003Py_LOCAL(PyBytesObject *)
2004replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002005 const char *from_s, Py_ssize_t from_len,
2006 const char *to_s, Py_ssize_t to_len,
2007 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002008{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002009 if (maxcount < 0) {
2010 maxcount = PY_SSIZE_T_MAX;
2011 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2012 /* nothing to do; return the original string */
2013 return return_self(self);
2014 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002015
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002016 if (maxcount == 0 ||
2017 (from_len == 0 && to_len == 0)) {
2018 /* nothing to do; return the original string */
2019 return return_self(self);
2020 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002021
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002022 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002023
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002024 if (from_len == 0) {
2025 /* insert the 'to' string everywhere. */
2026 /* >>> "Python".replace("", ".") */
2027 /* '.P.y.t.h.o.n.' */
2028 return replace_interleave(self, to_s, to_len, maxcount);
2029 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002030
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002031 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2032 /* point for an empty self string to generate a non-empty string */
2033 /* Special case so the remaining code always gets a non-empty string */
2034 if (PyBytes_GET_SIZE(self) == 0) {
2035 return return_self(self);
2036 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002037
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002038 if (to_len == 0) {
2039 /* delete all occurrences of 'from' string */
2040 if (from_len == 1) {
2041 return replace_delete_single_character(
2042 self, from_s[0], maxcount);
2043 } else {
2044 return replace_delete_substring(self, from_s,
2045 from_len, maxcount);
2046 }
2047 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002048
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002049 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002050
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002051 if (from_len == to_len) {
2052 if (from_len == 1) {
2053 return replace_single_character_in_place(
2054 self,
2055 from_s[0],
2056 to_s[0],
2057 maxcount);
2058 } else {
2059 return replace_substring_in_place(
2060 self, from_s, from_len, to_s, to_len,
2061 maxcount);
2062 }
2063 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002065 /* Otherwise use the more generic algorithms */
2066 if (from_len == 1) {
2067 return replace_single_character(self, from_s[0],
2068 to_s, to_len, maxcount);
2069 } else {
2070 /* len('from')>=2, len('to')>=1 */
2071 return replace_substring(self, from_s, from_len, to_s, to_len,
2072 maxcount);
2073 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002074}
2075
2076PyDoc_STRVAR(replace__doc__,
2077"B.replace(old, new[, count]) -> bytes\n\
2078\n\
2079Return a copy of B with all occurrences of subsection\n\
2080old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002081given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002082
2083static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002084bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002085{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002086 Py_ssize_t count = -1;
2087 PyObject *from, *to;
2088 const char *from_s, *to_s;
2089 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002091 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2092 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002093
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002094 if (PyBytes_Check(from)) {
2095 from_s = PyBytes_AS_STRING(from);
2096 from_len = PyBytes_GET_SIZE(from);
2097 }
2098 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2099 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002100
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002101 if (PyBytes_Check(to)) {
2102 to_s = PyBytes_AS_STRING(to);
2103 to_len = PyBytes_GET_SIZE(to);
2104 }
2105 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2106 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002107
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002108 return (PyObject *)replace((PyBytesObject *) self,
2109 from_s, from_len,
2110 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002111}
2112
2113/** End DALKE **/
2114
2115/* Matches the end (direction >= 0) or start (direction < 0) of self
2116 * against substr, using the start and end arguments. Returns
2117 * -1 on error, 0 if not found and 1 if found.
2118 */
2119Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002120_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002121 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002122{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002123 Py_ssize_t len = PyBytes_GET_SIZE(self);
2124 Py_ssize_t slen;
2125 const char* sub;
2126 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002127
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002128 if (PyBytes_Check(substr)) {
2129 sub = PyBytes_AS_STRING(substr);
2130 slen = PyBytes_GET_SIZE(substr);
2131 }
2132 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2133 return -1;
2134 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002135
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002136 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002138 if (direction < 0) {
2139 /* startswith */
2140 if (start+slen > len)
2141 return 0;
2142 } else {
2143 /* endswith */
2144 if (end-start < slen || start > len)
2145 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002146
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002147 if (end-slen > start)
2148 start = end - slen;
2149 }
2150 if (end-start >= slen)
2151 return ! memcmp(str+start, sub, slen);
2152 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002153}
2154
2155
2156PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002157"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002158\n\
2159Return True if B starts with the specified prefix, False otherwise.\n\
2160With optional start, test B beginning at that position.\n\
2161With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002162prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002163
2164static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002165bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002166{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002167 Py_ssize_t start = 0;
2168 Py_ssize_t end = PY_SSIZE_T_MAX;
2169 PyObject *subobj;
2170 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002171
Jesus Ceaac451502011-04-20 17:09:23 +02002172 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002173 return NULL;
2174 if (PyTuple_Check(subobj)) {
2175 Py_ssize_t i;
2176 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2177 result = _bytes_tailmatch(self,
2178 PyTuple_GET_ITEM(subobj, i),
2179 start, end, -1);
2180 if (result == -1)
2181 return NULL;
2182 else if (result) {
2183 Py_RETURN_TRUE;
2184 }
2185 }
2186 Py_RETURN_FALSE;
2187 }
2188 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002189 if (result == -1) {
2190 if (PyErr_ExceptionMatches(PyExc_TypeError))
2191 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2192 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002193 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002194 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002195 else
2196 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002197}
2198
2199
2200PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002201"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002202\n\
2203Return True if B ends with the specified suffix, False otherwise.\n\
2204With optional start, test B beginning at that position.\n\
2205With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002206suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002207
2208static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002209bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002210{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002211 Py_ssize_t start = 0;
2212 Py_ssize_t end = PY_SSIZE_T_MAX;
2213 PyObject *subobj;
2214 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002215
Jesus Ceaac451502011-04-20 17:09:23 +02002216 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002217 return NULL;
2218 if (PyTuple_Check(subobj)) {
2219 Py_ssize_t i;
2220 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2221 result = _bytes_tailmatch(self,
2222 PyTuple_GET_ITEM(subobj, i),
2223 start, end, +1);
2224 if (result == -1)
2225 return NULL;
2226 else if (result) {
2227 Py_RETURN_TRUE;
2228 }
2229 }
2230 Py_RETURN_FALSE;
2231 }
2232 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002233 if (result == -1) {
2234 if (PyErr_ExceptionMatches(PyExc_TypeError))
2235 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2236 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002237 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002238 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002239 else
2240 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002241}
2242
2243
2244PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002245"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002246\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002247Decode B using the codec registered for encoding. Default encoding\n\
2248is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002249handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2250a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002251as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002252able to handle UnicodeDecodeErrors.");
2253
2254static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002255bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002256{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002257 const char *encoding = NULL;
2258 const char *errors = NULL;
2259 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002260
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002261 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2262 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002263 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002264}
2265
Guido van Rossum20188312006-05-05 15:15:40 +00002266
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002267PyDoc_STRVAR(splitlines__doc__,
2268"B.splitlines([keepends]) -> list of lines\n\
2269\n\
2270Return a list of the lines in B, breaking at line boundaries.\n\
2271Line breaks are not included in the resulting list unless keepends\n\
2272is given and true.");
2273
2274static PyObject*
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002275bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002276{
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002277 static char *kwlist[] = {"keepends", 0};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002278 int keepends = 0;
2279
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002280 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines",
2281 kwlist, &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002282 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002283
2284 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002285 (PyObject*) self, PyBytes_AS_STRING(self),
2286 PyBytes_GET_SIZE(self), keepends
2287 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002288}
2289
2290
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002291PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002292"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002293\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002294Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002295Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002296Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002297
2298static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002299hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002300{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002301 if (c >= 128)
2302 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002303 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002304 return c - '0';
2305 else {
David Malcolm96960882010-11-05 17:23:41 +00002306 if (Py_ISUPPER(c))
2307 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002308 if (c >= 'a' && c <= 'f')
2309 return c - 'a' + 10;
2310 }
2311 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002312}
2313
2314static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002315bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002316{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002317 PyObject *newstring, *hexobj;
2318 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002319 Py_ssize_t hexlen, byteslen, i, j;
2320 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002321 void *data;
2322 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002323
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002324 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2325 return NULL;
2326 assert(PyUnicode_Check(hexobj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002327 if (PyUnicode_READY(hexobj))
2328 return NULL;
2329 kind = PyUnicode_KIND(hexobj);
2330 data = PyUnicode_DATA(hexobj);
2331 hexlen = PyUnicode_GET_LENGTH(hexobj);
2332
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002333 byteslen = hexlen/2; /* This overestimates if there are spaces */
2334 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2335 if (!newstring)
2336 return NULL;
2337 buf = PyBytes_AS_STRING(newstring);
2338 for (i = j = 0; i < hexlen; i += 2) {
2339 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002340 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002341 i++;
2342 if (i >= hexlen)
2343 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002344 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
2345 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002346 if (top == -1 || bot == -1) {
2347 PyErr_Format(PyExc_ValueError,
2348 "non-hexadecimal number found in "
2349 "fromhex() arg at position %zd", i);
2350 goto error;
2351 }
2352 buf[j++] = (top << 4) + bot;
2353 }
2354 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2355 goto error;
2356 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002357
2358 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002359 Py_XDECREF(newstring);
2360 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002361}
2362
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002363PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002364"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002365
2366static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002367bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002368{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002369 Py_ssize_t res;
2370 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2371 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002372}
2373
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002374
2375static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002376bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002377{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002378 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002379}
2380
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002381
2382static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002383bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002384 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2385 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2386 _Py_capitalize__doc__},
2387 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2388 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2389 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2390 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2391 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02002392 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002393 expandtabs__doc__},
2394 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2395 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2396 fromhex_doc},
2397 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2398 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2399 _Py_isalnum__doc__},
2400 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2401 _Py_isalpha__doc__},
2402 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2403 _Py_isdigit__doc__},
2404 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2405 _Py_islower__doc__},
2406 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2407 _Py_isspace__doc__},
2408 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2409 _Py_istitle__doc__},
2410 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2411 _Py_isupper__doc__},
2412 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2413 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2414 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2415 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2416 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2417 _Py_maketrans__doc__},
2418 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2419 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2420 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2421 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2422 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2423 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2424 rpartition__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002425 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002426 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002427 {"split", (PyCFunction)bytes_split, METH_VARARGS | METH_KEYWORDS, split__doc__},
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002428 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002429 splitlines__doc__},
2430 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2431 startswith__doc__},
2432 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2433 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2434 _Py_swapcase__doc__},
2435 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2436 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2437 translate__doc__},
2438 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2439 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2440 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2441 sizeof__doc__},
2442 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002443};
2444
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002445static PyObject *
2446str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2447
2448static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002449bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002450{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002451 PyObject *x = NULL;
2452 const char *encoding = NULL;
2453 const char *errors = NULL;
2454 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002455 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002456 Py_ssize_t size;
2457 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002458 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002459
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002460 if (type != &PyBytes_Type)
2461 return str_subtype_new(type, args, kwds);
2462 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2463 &encoding, &errors))
2464 return NULL;
2465 if (x == NULL) {
2466 if (encoding != NULL || errors != NULL) {
2467 PyErr_SetString(PyExc_TypeError,
2468 "encoding or errors without sequence "
2469 "argument");
2470 return NULL;
2471 }
2472 return PyBytes_FromString("");
2473 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002474
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002475 if (PyUnicode_Check(x)) {
2476 /* Encode via the codec registry */
2477 if (encoding == NULL) {
2478 PyErr_SetString(PyExc_TypeError,
2479 "string argument without an encoding");
2480 return NULL;
2481 }
2482 new = PyUnicode_AsEncodedString(x, encoding, errors);
2483 if (new == NULL)
2484 return NULL;
2485 assert(PyBytes_Check(new));
2486 return new;
2487 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002488
2489 /* We'd like to call PyObject_Bytes here, but we need to check for an
2490 integer argument before deferring to PyBytes_FromObject, something
2491 PyObject_Bytes doesn't do. */
2492 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2493 if (func != NULL) {
2494 new = PyObject_CallFunctionObjArgs(func, NULL);
2495 Py_DECREF(func);
2496 if (new == NULL)
2497 return NULL;
2498 if (!PyBytes_Check(new)) {
2499 PyErr_Format(PyExc_TypeError,
2500 "__bytes__ returned non-bytes (type %.200s)",
2501 Py_TYPE(new)->tp_name);
2502 Py_DECREF(new);
2503 return NULL;
2504 }
2505 return new;
2506 }
2507 else if (PyErr_Occurred())
2508 return NULL;
2509
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002510 /* Is it an integer? */
2511 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2512 if (size == -1 && PyErr_Occurred()) {
2513 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2514 return NULL;
2515 PyErr_Clear();
2516 }
2517 else if (size < 0) {
2518 PyErr_SetString(PyExc_ValueError, "negative count");
2519 return NULL;
2520 }
2521 else {
2522 new = PyBytes_FromStringAndSize(NULL, size);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002523 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002524 return NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002525 if (size > 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002526 memset(((PyBytesObject*)new)->ob_sval, 0, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002527 return new;
2528 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002529
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002530 /* If it's not unicode, there can't be encoding or errors */
2531 if (encoding != NULL || errors != NULL) {
2532 PyErr_SetString(PyExc_TypeError,
2533 "encoding or errors without a string argument");
2534 return NULL;
2535 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002536
2537 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002538}
2539
2540PyObject *
2541PyBytes_FromObject(PyObject *x)
2542{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002543 PyObject *new, *it;
2544 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002545
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002546 if (x == NULL) {
2547 PyErr_BadInternalCall();
2548 return NULL;
2549 }
Larry Hastingsca28e992012-05-24 22:58:30 -07002550
2551 if (PyBytes_CheckExact(x)) {
2552 Py_INCREF(x);
2553 return x;
2554 }
2555
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002556 /* Use the modern buffer interface */
2557 if (PyObject_CheckBuffer(x)) {
2558 Py_buffer view;
2559 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2560 return NULL;
2561 new = PyBytes_FromStringAndSize(NULL, view.len);
2562 if (!new)
2563 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002564 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2565 &view, view.len, 'C') < 0)
2566 goto fail;
2567 PyBuffer_Release(&view);
2568 return new;
2569 fail:
2570 Py_XDECREF(new);
2571 PyBuffer_Release(&view);
2572 return NULL;
2573 }
2574 if (PyUnicode_Check(x)) {
2575 PyErr_SetString(PyExc_TypeError,
2576 "cannot convert unicode object to bytes");
2577 return NULL;
2578 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002579
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002580 if (PyList_CheckExact(x)) {
2581 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2582 if (new == NULL)
2583 return NULL;
2584 for (i = 0; i < Py_SIZE(x); i++) {
2585 Py_ssize_t value = PyNumber_AsSsize_t(
2586 PyList_GET_ITEM(x, i), PyExc_ValueError);
2587 if (value == -1 && PyErr_Occurred()) {
2588 Py_DECREF(new);
2589 return NULL;
2590 }
2591 if (value < 0 || value >= 256) {
2592 PyErr_SetString(PyExc_ValueError,
2593 "bytes must be in range(0, 256)");
2594 Py_DECREF(new);
2595 return NULL;
2596 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002597 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002598 }
2599 return new;
2600 }
2601 if (PyTuple_CheckExact(x)) {
2602 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2603 if (new == NULL)
2604 return NULL;
2605 for (i = 0; i < Py_SIZE(x); i++) {
2606 Py_ssize_t value = PyNumber_AsSsize_t(
2607 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2608 if (value == -1 && PyErr_Occurred()) {
2609 Py_DECREF(new);
2610 return NULL;
2611 }
2612 if (value < 0 || value >= 256) {
2613 PyErr_SetString(PyExc_ValueError,
2614 "bytes must be in range(0, 256)");
2615 Py_DECREF(new);
2616 return NULL;
2617 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002618 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002619 }
2620 return new;
2621 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002622
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002623 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002624 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002625 if (size == -1 && PyErr_Occurred())
2626 return NULL;
2627 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2628 returning a shared empty bytes string. This required because we
2629 want to call _PyBytes_Resize() the returned object, which we can
2630 only do on bytes objects with refcount == 1. */
2631 size += 1;
2632 new = PyBytes_FromStringAndSize(NULL, size);
2633 if (new == NULL)
2634 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002635
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002636 /* Get the iterator */
2637 it = PyObject_GetIter(x);
2638 if (it == NULL)
2639 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002640
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002641 /* Run the iterator to exhaustion */
2642 for (i = 0; ; i++) {
2643 PyObject *item;
2644 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002645
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002646 /* Get the next item */
2647 item = PyIter_Next(it);
2648 if (item == NULL) {
2649 if (PyErr_Occurred())
2650 goto error;
2651 break;
2652 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002653
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002654 /* Interpret it as an int (__index__) */
2655 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2656 Py_DECREF(item);
2657 if (value == -1 && PyErr_Occurred())
2658 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002659
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002660 /* Range check */
2661 if (value < 0 || value >= 256) {
2662 PyErr_SetString(PyExc_ValueError,
2663 "bytes must be in range(0, 256)");
2664 goto error;
2665 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002666
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002667 /* Append the byte */
2668 if (i >= size) {
2669 size = 2 * size + 1;
2670 if (_PyBytes_Resize(&new, size) < 0)
2671 goto error;
2672 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002673 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002674 }
2675 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002676
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002677 /* Clean up and return success */
2678 Py_DECREF(it);
2679 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002680
2681 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002682 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01002683 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002684 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002685}
2686
2687static PyObject *
2688str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2689{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002690 PyObject *tmp, *pnew;
2691 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002692
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002693 assert(PyType_IsSubtype(type, &PyBytes_Type));
2694 tmp = bytes_new(&PyBytes_Type, args, kwds);
2695 if (tmp == NULL)
2696 return NULL;
2697 assert(PyBytes_CheckExact(tmp));
2698 n = PyBytes_GET_SIZE(tmp);
2699 pnew = type->tp_alloc(type, n);
2700 if (pnew != NULL) {
2701 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2702 PyBytes_AS_STRING(tmp), n+1);
2703 ((PyBytesObject *)pnew)->ob_shash =
2704 ((PyBytesObject *)tmp)->ob_shash;
2705 }
2706 Py_DECREF(tmp);
2707 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002708}
2709
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002710PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002711"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002712bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002713bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002714bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2715bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002716\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002717Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002718 - an iterable yielding integers in range(256)\n\
2719 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002720 - any object implementing the buffer API.\n\
2721 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002722
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002723static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002724
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002725PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002726 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2727 "bytes",
2728 PyBytesObject_SIZE,
2729 sizeof(char),
2730 bytes_dealloc, /* tp_dealloc */
2731 0, /* tp_print */
2732 0, /* tp_getattr */
2733 0, /* tp_setattr */
2734 0, /* tp_reserved */
2735 (reprfunc)bytes_repr, /* tp_repr */
2736 0, /* tp_as_number */
2737 &bytes_as_sequence, /* tp_as_sequence */
2738 &bytes_as_mapping, /* tp_as_mapping */
2739 (hashfunc)bytes_hash, /* tp_hash */
2740 0, /* tp_call */
2741 bytes_str, /* tp_str */
2742 PyObject_GenericGetAttr, /* tp_getattro */
2743 0, /* tp_setattro */
2744 &bytes_as_buffer, /* tp_as_buffer */
2745 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2746 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2747 bytes_doc, /* tp_doc */
2748 0, /* tp_traverse */
2749 0, /* tp_clear */
2750 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2751 0, /* tp_weaklistoffset */
2752 bytes_iter, /* tp_iter */
2753 0, /* tp_iternext */
2754 bytes_methods, /* tp_methods */
2755 0, /* tp_members */
2756 0, /* tp_getset */
2757 &PyBaseObject_Type, /* tp_base */
2758 0, /* tp_dict */
2759 0, /* tp_descr_get */
2760 0, /* tp_descr_set */
2761 0, /* tp_dictoffset */
2762 0, /* tp_init */
2763 0, /* tp_alloc */
2764 bytes_new, /* tp_new */
2765 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002766};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002767
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002768void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002769PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002770{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002771 PyObject *v;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002772 assert(pv != NULL);
2773 if (*pv == NULL)
2774 return;
2775 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002776 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002777 return;
2778 }
2779 v = bytes_concat(*pv, w);
2780 Py_DECREF(*pv);
2781 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002782}
2783
2784void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002785PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002786{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002787 PyBytes_Concat(pv, w);
2788 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002789}
2790
2791
2792/* The following function breaks the notion that strings are immutable:
2793 it changes the size of a string. We get away with this only if there
2794 is only one module referencing the object. You can also think of it
2795 as creating a new string object and destroying the old one, only
2796 more efficiently. In any case, don't use this if the string may
2797 already be known to some other part of the code...
2798 Note that if there's not enough memory to resize the string, the original
2799 string object at *pv is deallocated, *pv is set to NULL, an "out of
2800 memory" exception is set, and -1 is returned. Else (on success) 0 is
2801 returned, and the value in *pv may or may not be the same as on input.
2802 As always, an extra byte is allocated for a trailing \0 byte (newsize
2803 does *not* include that), and a trailing \0 byte is stored.
2804*/
2805
2806int
2807_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2808{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002809 PyObject *v;
2810 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002811 v = *pv;
2812 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2813 *pv = 0;
2814 Py_DECREF(v);
2815 PyErr_BadInternalCall();
2816 return -1;
2817 }
2818 /* XXX UNREF/NEWREF interface should be more symmetrical */
2819 _Py_DEC_REFTOTAL;
2820 _Py_ForgetReference(v);
2821 *pv = (PyObject *)
2822 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2823 if (*pv == NULL) {
2824 PyObject_Del(v);
2825 PyErr_NoMemory();
2826 return -1;
2827 }
2828 _Py_NewReference(*pv);
2829 sv = (PyBytesObject *) *pv;
2830 Py_SIZE(sv) = newsize;
2831 sv->ob_sval[newsize] = '\0';
2832 sv->ob_shash = -1; /* invalidate cached hash value */
2833 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002834}
2835
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002836void
2837PyBytes_Fini(void)
2838{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002839 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002840 for (i = 0; i < UCHAR_MAX + 1; i++)
2841 Py_CLEAR(characters[i]);
2842 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002843}
2844
Benjamin Peterson4116f362008-05-27 00:36:20 +00002845/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002846
2847typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002848 PyObject_HEAD
2849 Py_ssize_t it_index;
2850 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002851} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002852
2853static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002854striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002855{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002856 _PyObject_GC_UNTRACK(it);
2857 Py_XDECREF(it->it_seq);
2858 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002859}
2860
2861static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002862striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002863{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002864 Py_VISIT(it->it_seq);
2865 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002866}
2867
2868static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002869striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002870{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002871 PyBytesObject *seq;
2872 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002873
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002874 assert(it != NULL);
2875 seq = it->it_seq;
2876 if (seq == NULL)
2877 return NULL;
2878 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002879
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002880 if (it->it_index < PyBytes_GET_SIZE(seq)) {
2881 item = PyLong_FromLong(
2882 (unsigned char)seq->ob_sval[it->it_index]);
2883 if (item != NULL)
2884 ++it->it_index;
2885 return item;
2886 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002887
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002888 Py_DECREF(seq);
2889 it->it_seq = NULL;
2890 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002891}
2892
2893static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002894striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002895{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002896 Py_ssize_t len = 0;
2897 if (it->it_seq)
2898 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
2899 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002900}
2901
2902PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002903 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002904
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002905static PyObject *
2906striter_reduce(striterobject *it)
2907{
2908 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02002909 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002910 it->it_seq, it->it_index);
2911 } else {
2912 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
2913 if (u == NULL)
2914 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02002915 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002916 }
2917}
2918
2919PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2920
2921static PyObject *
2922striter_setstate(striterobject *it, PyObject *state)
2923{
2924 Py_ssize_t index = PyLong_AsSsize_t(state);
2925 if (index == -1 && PyErr_Occurred())
2926 return NULL;
2927 if (index < 0)
2928 index = 0;
2929 it->it_index = index;
2930 Py_RETURN_NONE;
2931}
2932
2933PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
2934
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002935static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002936 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
2937 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002938 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
2939 reduce_doc},
2940 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
2941 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002942 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002943};
2944
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002945PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002946 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2947 "bytes_iterator", /* tp_name */
2948 sizeof(striterobject), /* tp_basicsize */
2949 0, /* tp_itemsize */
2950 /* methods */
2951 (destructor)striter_dealloc, /* tp_dealloc */
2952 0, /* tp_print */
2953 0, /* tp_getattr */
2954 0, /* tp_setattr */
2955 0, /* tp_reserved */
2956 0, /* tp_repr */
2957 0, /* tp_as_number */
2958 0, /* tp_as_sequence */
2959 0, /* tp_as_mapping */
2960 0, /* tp_hash */
2961 0, /* tp_call */
2962 0, /* tp_str */
2963 PyObject_GenericGetAttr, /* tp_getattro */
2964 0, /* tp_setattro */
2965 0, /* tp_as_buffer */
2966 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
2967 0, /* tp_doc */
2968 (traverseproc)striter_traverse, /* tp_traverse */
2969 0, /* tp_clear */
2970 0, /* tp_richcompare */
2971 0, /* tp_weaklistoffset */
2972 PyObject_SelfIter, /* tp_iter */
2973 (iternextfunc)striter_next, /* tp_iternext */
2974 striter_methods, /* tp_methods */
2975 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002976};
2977
2978static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002979bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002980{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002981 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002982
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002983 if (!PyBytes_Check(seq)) {
2984 PyErr_BadInternalCall();
2985 return NULL;
2986 }
2987 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
2988 if (it == NULL)
2989 return NULL;
2990 it->it_index = 0;
2991 Py_INCREF(seq);
2992 it->it_seq = (PyBytesObject *)seq;
2993 _PyObject_GC_TRACK(it);
2994 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002995}