blob: cb5679bc38b6fa47fe3051657d3477bcbaf12cf2 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroud1188562010-06-09 16:38:55 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
44 For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
45 parameter `size' denotes number of characters to allocate, not counting any
46 null terminating character.
47
48 For PyBytes_FromString(), the parameter `str' points to a null-terminated
49 string containing exactly `size' bytes.
50
51 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
52 either NULL or else points to a string containing at least `size' bytes.
53 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
54 not have to be null-terminated. (Therefore it is safe to construct a
55 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
56 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
57 bytes (setting the last byte to the null terminating character) and you can
58 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000059 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000060 alter the data yourself, since the strings may be shared.
61
62 The PyObject member `op->ob_size', which denotes the number of "extra
63 items" in a variable-size object, will contain the number of bytes
64 allocated for string data, not counting the null terminating character. It
65 is therefore equal to the equal to the `size' parameter (for
66 PyBytes_FromStringAndSize()) or the length of the string in the `str'
67 parameter (for PyBytes_FromString()).
68*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000069PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000071{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 register PyBytesObject *op;
73 if (size < 0) {
74 PyErr_SetString(PyExc_SystemError,
75 "Negative size passed to PyBytes_FromStringAndSize");
76 return NULL;
77 }
78 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000079#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000081#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85 if (size == 1 && str != NULL &&
86 (op = characters[*str & UCHAR_MAX]) != NULL)
87 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000088#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 Py_INCREF(op);
92 return (PyObject *)op;
93 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
96 PyErr_SetString(PyExc_OverflowError,
97 "byte string is too large");
98 return NULL;
99 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +0000100
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101 /* Inline PyObject_NewVar */
102 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
103 if (op == NULL)
104 return PyErr_NoMemory();
105 PyObject_INIT_VAR(op, &PyBytes_Type, size);
106 op->ob_shash = -1;
107 if (str != NULL)
108 Py_MEMCPY(op->ob_sval, str, size);
109 op->ob_sval[size] = '\0';
110 /* share short strings */
111 if (size == 0) {
112 nullstring = op;
113 Py_INCREF(op);
114 } else if (size == 1 && str != NULL) {
115 characters[*str & UCHAR_MAX] = op;
116 Py_INCREF(op);
117 }
118 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000119}
120
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000121PyObject *
122PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000123{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000124 register size_t size;
125 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000127 assert(str != NULL);
128 size = strlen(str);
129 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
130 PyErr_SetString(PyExc_OverflowError,
131 "byte string is too long");
132 return NULL;
133 }
134 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000135#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000136 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000137#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
141 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000144#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 /* Inline PyObject_NewVar */
150 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
151 if (op == NULL)
152 return PyErr_NoMemory();
153 PyObject_INIT_VAR(op, &PyBytes_Type, size);
154 op->ob_shash = -1;
155 Py_MEMCPY(op->ob_sval, str, size+1);
156 /* share short strings */
157 if (size == 0) {
158 nullstring = op;
159 Py_INCREF(op);
160 } else if (size == 1) {
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000165}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000166
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167PyObject *
168PyBytes_FromFormatV(const char *format, va_list vargs)
169{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000175
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000176 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 /* step 1: figure out how large a buffer we need */
178 for (f = format; *f; f++) {
179 if (*f == '%') {
180 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000181 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000183
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
185 * they don't affect the amount of space we reserve.
186 */
187 if ((*f == 'l' || *f == 'z') &&
188 (f[1] == 'd' || f[1] == 'u'))
189 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 switch (*f) {
192 case 'c':
193 (void)va_arg(count, int);
194 /* fall through... */
195 case '%':
196 n++;
197 break;
198 case 'd': case 'u': case 'i': case 'x':
199 (void) va_arg(count, int);
200 /* 20 bytes is enough to hold a 64-bit
201 integer. Decimal takes the most space.
202 This isn't enough for octal. */
203 n += 20;
204 break;
205 case 's':
206 s = va_arg(count, char*);
207 n += strlen(s);
208 break;
209 case 'p':
210 (void) va_arg(count, int);
211 /* maximum 64-bit pointer representation:
212 * 0xffffffffffffffff
213 * so 19 characters is enough.
214 * XXX I count 18 -- what's the extra for?
215 */
216 n += 19;
217 break;
218 default:
219 /* if we stumble upon an unknown
220 formatting code, copy the rest of
221 the format string to the output
222 string. (we cannot just skip the
223 code, since there's no way to know
224 what's in the argument list) */
225 n += strlen(p);
226 goto expand;
227 }
228 } else
229 n++;
230 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000231 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000232 /* step 2: fill the buffer */
233 /* Since we've analyzed how much space we need for the worst case,
234 use sprintf directly instead of the slower PyOS_snprintf. */
235 string = PyBytes_FromStringAndSize(NULL, n);
236 if (!string)
237 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000238
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000239 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000240
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000241 for (f = format; *f; f++) {
242 if (*f == '%') {
243 const char* p = f++;
244 Py_ssize_t i;
245 int longflag = 0;
246 int size_tflag = 0;
247 /* parse the width.precision part (we're only
248 interested in the precision value, if any) */
249 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000250 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000251 n = (n*10) + *f++ - '0';
252 if (*f == '.') {
253 f++;
254 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000255 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 n = (n*10) + *f++ - '0';
257 }
David Malcolm96960882010-11-05 17:23:41 +0000258 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000259 f++;
260 /* handle the long flag, but only for %ld and %lu.
261 others can be added when necessary. */
262 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
263 longflag = 1;
264 ++f;
265 }
266 /* handle the size_t flag. */
267 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
268 size_tflag = 1;
269 ++f;
270 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000271
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 switch (*f) {
273 case 'c':
274 *s++ = va_arg(vargs, int);
275 break;
276 case 'd':
277 if (longflag)
278 sprintf(s, "%ld", va_arg(vargs, long));
279 else if (size_tflag)
280 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
281 va_arg(vargs, Py_ssize_t));
282 else
283 sprintf(s, "%d", va_arg(vargs, int));
284 s += strlen(s);
285 break;
286 case 'u':
287 if (longflag)
288 sprintf(s, "%lu",
289 va_arg(vargs, unsigned long));
290 else if (size_tflag)
291 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
292 va_arg(vargs, size_t));
293 else
294 sprintf(s, "%u",
295 va_arg(vargs, unsigned int));
296 s += strlen(s);
297 break;
298 case 'i':
299 sprintf(s, "%i", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 'x':
303 sprintf(s, "%x", va_arg(vargs, int));
304 s += strlen(s);
305 break;
306 case 's':
307 p = va_arg(vargs, char*);
308 i = strlen(p);
309 if (n > 0 && i > n)
310 i = n;
311 Py_MEMCPY(s, p, i);
312 s += i;
313 break;
314 case 'p':
315 sprintf(s, "%p", va_arg(vargs, void*));
316 /* %p is ill-defined: ensure leading 0x. */
317 if (s[1] == 'X')
318 s[1] = 'x';
319 else if (s[1] != 'x') {
320 memmove(s+2, s, strlen(s)+1);
321 s[0] = '0';
322 s[1] = 'x';
323 }
324 s += strlen(s);
325 break;
326 case '%':
327 *s++ = '%';
328 break;
329 default:
330 strcpy(s, p);
331 s += strlen(s);
332 goto end;
333 }
334 } else
335 *s++ = *f;
336 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000337
338 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000339 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
340 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000341}
342
343PyObject *
344PyBytes_FromFormat(const char *format, ...)
345{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 PyObject* ret;
347 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000348
349#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000351#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000352 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000353#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000354 ret = PyBytes_FromFormatV(format, vargs);
355 va_end(vargs);
356 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000357}
358
359static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000360bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000361{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000362 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000363}
364
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000365/* Unescape a backslash-escaped string. If unicode is non-zero,
366 the string is a u-literal. If recode_encoding is non-zero,
367 the string is UTF-8 encoded and should be re-encoded in the
368 specified encoding. */
369
370PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 Py_ssize_t len,
372 const char *errors,
373 Py_ssize_t unicode,
374 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000375{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000376 int c;
377 char *p, *buf;
378 const char *end;
379 PyObject *v;
380 Py_ssize_t newlen = recode_encoding ? 4*len:len;
381 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
382 if (v == NULL)
383 return NULL;
384 p = buf = PyBytes_AsString(v);
385 end = s + len;
386 while (s < end) {
387 if (*s != '\\') {
388 non_esc:
389 if (recode_encoding && (*s & 0x80)) {
390 PyObject *u, *w;
391 char *r;
392 const char* t;
393 Py_ssize_t rn;
394 t = s;
395 /* Decode non-ASCII bytes as UTF-8. */
396 while (t < end && (*t & 0x80)) t++;
397 u = PyUnicode_DecodeUTF8(s, t - s, errors);
398 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000399
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000400 /* Recode them in target encoding. */
401 w = PyUnicode_AsEncodedString(
402 u, recode_encoding, errors);
403 Py_DECREF(u);
404 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000405
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000406 /* Append bytes to output buffer. */
407 assert(PyBytes_Check(w));
408 r = PyBytes_AS_STRING(w);
409 rn = PyBytes_GET_SIZE(w);
410 Py_MEMCPY(p, r, rn);
411 p += rn;
412 Py_DECREF(w);
413 s = t;
414 } else {
415 *p++ = *s++;
416 }
417 continue;
418 }
419 s++;
420 if (s==end) {
421 PyErr_SetString(PyExc_ValueError,
422 "Trailing \\ in string");
423 goto failed;
424 }
425 switch (*s++) {
426 /* XXX This assumes ASCII! */
427 case '\n': break;
428 case '\\': *p++ = '\\'; break;
429 case '\'': *p++ = '\''; break;
430 case '\"': *p++ = '\"'; break;
431 case 'b': *p++ = '\b'; break;
432 case 'f': *p++ = '\014'; break; /* FF */
433 case 't': *p++ = '\t'; break;
434 case 'n': *p++ = '\n'; break;
435 case 'r': *p++ = '\r'; break;
436 case 'v': *p++ = '\013'; break; /* VT */
437 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
438 case '0': case '1': case '2': case '3':
439 case '4': case '5': case '6': case '7':
440 c = s[-1] - '0';
441 if (s < end && '0' <= *s && *s <= '7') {
442 c = (c<<3) + *s++ - '0';
443 if (s < end && '0' <= *s && *s <= '7')
444 c = (c<<3) + *s++ - '0';
445 }
446 *p++ = c;
447 break;
448 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000449 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 unsigned int x = 0;
451 c = Py_CHARMASK(*s);
452 s++;
David Malcolm96960882010-11-05 17:23:41 +0000453 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000454 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000455 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000456 x = 10 + c - 'a';
457 else
458 x = 10 + c - 'A';
459 x = x << 4;
460 c = Py_CHARMASK(*s);
461 s++;
David Malcolm96960882010-11-05 17:23:41 +0000462 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000463 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000464 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000465 x += 10 + c - 'a';
466 else
467 x += 10 + c - 'A';
468 *p++ = x;
469 break;
470 }
471 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200472 PyErr_Format(PyExc_ValueError,
473 "invalid \\x escape at position %d",
474 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000475 goto failed;
476 }
477 if (strcmp(errors, "replace") == 0) {
478 *p++ = '?';
479 } else if (strcmp(errors, "ignore") == 0)
480 /* do nothing */;
481 else {
482 PyErr_Format(PyExc_ValueError,
483 "decoding error; unknown "
484 "error handling code: %.400s",
485 errors);
486 goto failed;
487 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +0200488 /* skip \x */
489 if (s < end && Py_ISXDIGIT(s[0]))
490 s++; /* and a hexdigit */
491 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000492 default:
493 *p++ = '\\';
494 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200495 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000496 UTF-8 bytes may follow. */
497 }
498 }
499 if (p-buf < newlen)
500 _PyBytes_Resize(&v, p - buf);
501 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000502 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000503 Py_DECREF(v);
504 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000505}
506
507/* -------------------------------------------------------------------- */
508/* object api */
509
510Py_ssize_t
511PyBytes_Size(register PyObject *op)
512{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000513 if (!PyBytes_Check(op)) {
514 PyErr_Format(PyExc_TypeError,
515 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
516 return -1;
517 }
518 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000519}
520
521char *
522PyBytes_AsString(register PyObject *op)
523{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000524 if (!PyBytes_Check(op)) {
525 PyErr_Format(PyExc_TypeError,
526 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
527 return NULL;
528 }
529 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000530}
531
532int
533PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000534 register char **s,
535 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000536{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000537 if (s == NULL) {
538 PyErr_BadInternalCall();
539 return -1;
540 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000541
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000542 if (!PyBytes_Check(obj)) {
543 PyErr_Format(PyExc_TypeError,
544 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
545 return -1;
546 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000547
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000548 *s = PyBytes_AS_STRING(obj);
549 if (len != NULL)
550 *len = PyBytes_GET_SIZE(obj);
551 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
552 PyErr_SetString(PyExc_TypeError,
553 "expected bytes with no null");
554 return -1;
555 }
556 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000557}
Neal Norwitz6968b052007-02-27 19:02:19 +0000558
559/* -------------------------------------------------------------------- */
560/* Methods */
561
Eric Smith0923d1d2009-04-16 20:16:10 +0000562#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000563
564#include "stringlib/fastsearch.h"
565#include "stringlib/count.h"
566#include "stringlib/find.h"
567#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000568#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000569#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000570
Eric Smith0f78bff2009-11-30 01:01:42 +0000571#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000572
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000573PyObject *
574PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000575{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000576 static const char *hexdigits = "0123456789abcdef";
577 register PyBytesObject* op = (PyBytesObject*) obj;
578 Py_ssize_t length = Py_SIZE(op);
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000579 size_t newsize;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000580 PyObject *v;
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000581 if (length > (PY_SSIZE_T_MAX - 3) / 4) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000582 PyErr_SetString(PyExc_OverflowError,
583 "bytes object is too large to make repr");
584 return NULL;
585 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000586 newsize = 3 + 4 * length;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000587 v = PyUnicode_FromUnicode(NULL, newsize);
588 if (v == NULL) {
589 return NULL;
590 }
591 else {
592 register Py_ssize_t i;
593 register Py_UNICODE c;
594 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
595 int quote;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000596
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000597 /* Figure out which quote to use; single is preferred */
598 quote = '\'';
599 if (smartquotes) {
600 char *test, *start;
601 start = PyBytes_AS_STRING(op);
602 for (test = start; test < start+length; ++test) {
603 if (*test == '"') {
604 quote = '\''; /* back to single */
605 goto decided;
606 }
607 else if (*test == '\'')
608 quote = '"';
609 }
610 decided:
611 ;
612 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000613
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000614 *p++ = 'b', *p++ = quote;
615 for (i = 0; i < length; i++) {
616 /* There's at least enough room for a hex escape
617 and a closing quote. */
618 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
619 c = op->ob_sval[i];
620 if (c == quote || c == '\\')
621 *p++ = '\\', *p++ = c;
622 else if (c == '\t')
623 *p++ = '\\', *p++ = 't';
624 else if (c == '\n')
625 *p++ = '\\', *p++ = 'n';
626 else if (c == '\r')
627 *p++ = '\\', *p++ = 'r';
628 else if (c < ' ' || c >= 0x7f) {
629 *p++ = '\\';
630 *p++ = 'x';
631 *p++ = hexdigits[(c & 0xf0) >> 4];
632 *p++ = hexdigits[c & 0xf];
633 }
634 else
635 *p++ = c;
636 }
637 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
638 *p++ = quote;
639 *p = '\0';
640 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
641 Py_DECREF(v);
642 return NULL;
643 }
644 return v;
645 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000646}
647
Neal Norwitz6968b052007-02-27 19:02:19 +0000648static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000649bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000650{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000651 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000652}
653
Neal Norwitz6968b052007-02-27 19:02:19 +0000654static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000655bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000656{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000657 if (Py_BytesWarningFlag) {
658 if (PyErr_WarnEx(PyExc_BytesWarning,
659 "str() on a bytes instance", 1))
660 return NULL;
661 }
662 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000663}
664
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000665static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000666bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000667{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000668 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000669}
Neal Norwitz6968b052007-02-27 19:02:19 +0000670
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000671/* This is also used by PyBytes_Concat() */
672static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000673bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000674{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000675 Py_ssize_t size;
676 Py_buffer va, vb;
677 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000678
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000679 va.len = -1;
680 vb.len = -1;
681 if (_getbuffer(a, &va) < 0 ||
682 _getbuffer(b, &vb) < 0) {
683 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
684 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
685 goto done;
686 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000687
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000688 /* Optimize end cases */
689 if (va.len == 0 && PyBytes_CheckExact(b)) {
690 result = b;
691 Py_INCREF(result);
692 goto done;
693 }
694 if (vb.len == 0 && PyBytes_CheckExact(a)) {
695 result = a;
696 Py_INCREF(result);
697 goto done;
698 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000699
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000700 size = va.len + vb.len;
701 if (size < 0) {
702 PyErr_NoMemory();
703 goto done;
704 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000705
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000706 result = PyBytes_FromStringAndSize(NULL, size);
707 if (result != NULL) {
708 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
709 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
710 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000711
712 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000713 if (va.len != -1)
714 PyBuffer_Release(&va);
715 if (vb.len != -1)
716 PyBuffer_Release(&vb);
717 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000718}
Neal Norwitz6968b052007-02-27 19:02:19 +0000719
720static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000721bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000722{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000723 register Py_ssize_t i;
724 register Py_ssize_t j;
725 register Py_ssize_t size;
726 register PyBytesObject *op;
727 size_t nbytes;
728 if (n < 0)
729 n = 0;
730 /* watch out for overflows: the size can overflow int,
731 * and the # of bytes needed can overflow size_t
732 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000733 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000734 PyErr_SetString(PyExc_OverflowError,
735 "repeated bytes are too long");
736 return NULL;
737 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000738 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000739 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
740 Py_INCREF(a);
741 return (PyObject *)a;
742 }
743 nbytes = (size_t)size;
744 if (nbytes + PyBytesObject_SIZE <= nbytes) {
745 PyErr_SetString(PyExc_OverflowError,
746 "repeated bytes are too long");
747 return NULL;
748 }
749 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
750 if (op == NULL)
751 return PyErr_NoMemory();
752 PyObject_INIT_VAR(op, &PyBytes_Type, size);
753 op->ob_shash = -1;
754 op->ob_sval[size] = '\0';
755 if (Py_SIZE(a) == 1 && n > 0) {
756 memset(op->ob_sval, a->ob_sval[0] , n);
757 return (PyObject *) op;
758 }
759 i = 0;
760 if (i < size) {
761 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
762 i = Py_SIZE(a);
763 }
764 while (i < size) {
765 j = (i <= size-i) ? i : size-i;
766 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
767 i += j;
768 }
769 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000770}
771
Guido van Rossum98297ee2007-11-06 21:34:58 +0000772static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000773bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000774{
775 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
776 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000777 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000778 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000779 PyErr_Clear();
780 if (_getbuffer(arg, &varg) < 0)
781 return -1;
782 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
783 varg.buf, varg.len, 0);
784 PyBuffer_Release(&varg);
785 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000786 }
787 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000788 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
789 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000790 }
791
Antoine Pitrou0010d372010-08-15 17:12:55 +0000792 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000793}
794
Neal Norwitz6968b052007-02-27 19:02:19 +0000795static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000796bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000797{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000798 if (i < 0 || i >= Py_SIZE(a)) {
799 PyErr_SetString(PyExc_IndexError, "index out of range");
800 return NULL;
801 }
802 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000803}
804
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000805static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000806bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000807{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000808 int c;
809 Py_ssize_t len_a, len_b;
810 Py_ssize_t min_len;
811 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000812
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000813 /* Make sure both arguments are strings. */
814 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
815 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
816 (PyObject_IsInstance((PyObject*)a,
817 (PyObject*)&PyUnicode_Type) ||
818 PyObject_IsInstance((PyObject*)b,
819 (PyObject*)&PyUnicode_Type))) {
820 if (PyErr_WarnEx(PyExc_BytesWarning,
821 "Comparison between bytes and string", 1))
822 return NULL;
823 }
824 result = Py_NotImplemented;
825 goto out;
826 }
827 if (a == b) {
828 switch (op) {
829 case Py_EQ:case Py_LE:case Py_GE:
830 result = Py_True;
831 goto out;
832 case Py_NE:case Py_LT:case Py_GT:
833 result = Py_False;
834 goto out;
835 }
836 }
837 if (op == Py_EQ) {
838 /* Supporting Py_NE here as well does not save
839 much time, since Py_NE is rarely used. */
840 if (Py_SIZE(a) == Py_SIZE(b)
841 && (a->ob_sval[0] == b->ob_sval[0]
842 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
843 result = Py_True;
844 } else {
845 result = Py_False;
846 }
847 goto out;
848 }
849 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
850 min_len = (len_a < len_b) ? len_a : len_b;
851 if (min_len > 0) {
852 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
853 if (c==0)
854 c = memcmp(a->ob_sval, b->ob_sval, min_len);
855 } else
856 c = 0;
857 if (c == 0)
858 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
859 switch (op) {
860 case Py_LT: c = c < 0; break;
861 case Py_LE: c = c <= 0; break;
862 case Py_EQ: assert(0); break; /* unreachable */
863 case Py_NE: c = c != 0; break;
864 case Py_GT: c = c > 0; break;
865 case Py_GE: c = c >= 0; break;
866 default:
867 result = Py_NotImplemented;
868 goto out;
869 }
870 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000871 out:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000872 Py_INCREF(result);
873 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000874}
875
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000876static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000877bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000878{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000879 register Py_ssize_t len;
880 register unsigned char *p;
Gregory P. Smith27cbcd62012-12-10 18:15:46 -0800881 register Py_uhash_t x; /* Unsigned for defined overflow behavior. */
Neal Norwitz6968b052007-02-27 19:02:19 +0000882
Benjamin Petersonf6622c82012-04-09 14:53:07 -0400883#ifdef Py_DEBUG
Benjamin Peterson69e97272012-02-21 11:08:50 -0500884 assert(_Py_HashSecret_Initialized);
Benjamin Petersonf6622c82012-04-09 14:53:07 -0400885#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000886 if (a->ob_shash != -1)
887 return a->ob_shash;
888 len = Py_SIZE(a);
Georg Brandl2daf6ae2012-02-20 19:54:16 +0100889 /*
890 We make the hash of the empty string be 0, rather than using
891 (prefix ^ suffix), since this slightly obfuscates the hash secret
892 */
893 if (len == 0) {
894 a->ob_shash = 0;
895 return 0;
896 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000897 p = (unsigned char *) a->ob_sval;
Georg Brandl2daf6ae2012-02-20 19:54:16 +0100898 x = _Py_HashSecret.prefix;
899 x ^= *p << 7;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000900 while (--len >= 0)
Gregory P. Smith63e6c322012-01-14 15:31:34 -0800901 x = (_PyHASH_MULTIPLIER*x) ^ *p++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000902 x ^= Py_SIZE(a);
Georg Brandl2daf6ae2012-02-20 19:54:16 +0100903 x ^= _Py_HashSecret.suffix;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000904 if (x == -1)
905 x = -2;
906 a->ob_shash = x;
907 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000908}
909
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000910static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000911bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000912{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000913 if (PyIndex_Check(item)) {
914 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
915 if (i == -1 && PyErr_Occurred())
916 return NULL;
917 if (i < 0)
918 i += PyBytes_GET_SIZE(self);
919 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
920 PyErr_SetString(PyExc_IndexError,
921 "index out of range");
922 return NULL;
923 }
924 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
925 }
926 else if (PySlice_Check(item)) {
927 Py_ssize_t start, stop, step, slicelength, cur, i;
928 char* source_buf;
929 char* result_buf;
930 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000931
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000932 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000933 PyBytes_GET_SIZE(self),
934 &start, &stop, &step, &slicelength) < 0) {
935 return NULL;
936 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000937
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000938 if (slicelength <= 0) {
939 return PyBytes_FromStringAndSize("", 0);
940 }
941 else if (start == 0 && step == 1 &&
942 slicelength == PyBytes_GET_SIZE(self) &&
943 PyBytes_CheckExact(self)) {
944 Py_INCREF(self);
945 return (PyObject *)self;
946 }
947 else if (step == 1) {
948 return PyBytes_FromStringAndSize(
949 PyBytes_AS_STRING(self) + start,
950 slicelength);
951 }
952 else {
953 source_buf = PyBytes_AS_STRING(self);
954 result = PyBytes_FromStringAndSize(NULL, slicelength);
955 if (result == NULL)
956 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000957
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000958 result_buf = PyBytes_AS_STRING(result);
959 for (cur = start, i = 0; i < slicelength;
960 cur += step, i++) {
961 result_buf[i] = source_buf[cur];
962 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000963
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000964 return result;
965 }
966 }
967 else {
968 PyErr_Format(PyExc_TypeError,
969 "byte indices must be integers, not %.200s",
970 Py_TYPE(item)->tp_name);
971 return NULL;
972 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000973}
974
975static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000976bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000977{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000978 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
979 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000980}
981
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000982static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000983 (lenfunc)bytes_length, /*sq_length*/
984 (binaryfunc)bytes_concat, /*sq_concat*/
985 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
986 (ssizeargfunc)bytes_item, /*sq_item*/
987 0, /*sq_slice*/
988 0, /*sq_ass_item*/
989 0, /*sq_ass_slice*/
990 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000991};
992
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000993static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000994 (lenfunc)bytes_length,
995 (binaryfunc)bytes_subscript,
996 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000997};
998
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000999static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001000 (getbufferproc)bytes_buffer_getbuffer,
1001 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001002};
1003
1004
1005#define LEFTSTRIP 0
1006#define RIGHTSTRIP 1
1007#define BOTHSTRIP 2
1008
1009/* Arrays indexed by above */
1010static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1011
1012#define STRIPNAME(i) (stripformat[i]+3)
1013
Neal Norwitz6968b052007-02-27 19:02:19 +00001014PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001015"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001016\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001017Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001018If sep is not specified or is None, B is split on ASCII whitespace\n\
1019characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001020If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001021
1022static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001023bytes_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001024{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001025 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1026 Py_ssize_t maxsplit = -1;
1027 const char *s = PyBytes_AS_STRING(self), *sub;
1028 Py_buffer vsub;
1029 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001030
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001031 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1032 return NULL;
1033 if (maxsplit < 0)
1034 maxsplit = PY_SSIZE_T_MAX;
1035 if (subobj == Py_None)
1036 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1037 if (_getbuffer(subobj, &vsub) < 0)
1038 return NULL;
1039 sub = vsub.buf;
1040 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001041
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001042 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1043 PyBuffer_Release(&vsub);
1044 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001045}
1046
Neal Norwitz6968b052007-02-27 19:02:19 +00001047PyDoc_STRVAR(partition__doc__,
1048"B.partition(sep) -> (head, sep, tail)\n\
1049\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001050Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001051the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001052found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001053
1054static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001055bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001056{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001057 const char *sep;
1058 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001059
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001060 if (PyBytes_Check(sep_obj)) {
1061 sep = PyBytes_AS_STRING(sep_obj);
1062 sep_len = PyBytes_GET_SIZE(sep_obj);
1063 }
1064 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1065 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001066
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001067 return stringlib_partition(
1068 (PyObject*) self,
1069 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1070 sep_obj, sep, sep_len
1071 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001072}
1073
1074PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001075"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001076\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001077Search for the separator sep in B, starting at the end of B,\n\
1078and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001079part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001080bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001081
1082static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001083bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001084{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001085 const char *sep;
1086 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001087
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001088 if (PyBytes_Check(sep_obj)) {
1089 sep = PyBytes_AS_STRING(sep_obj);
1090 sep_len = PyBytes_GET_SIZE(sep_obj);
1091 }
1092 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1093 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001095 return stringlib_rpartition(
1096 (PyObject*) self,
1097 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1098 sep_obj, sep, sep_len
1099 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001100}
1101
Neal Norwitz6968b052007-02-27 19:02:19 +00001102PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001103"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001104\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001105Return a list of the sections in B, using sep as the delimiter,\n\
1106starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001107If sep is not given, B is split on ASCII whitespace characters\n\
1108(space, tab, return, newline, formfeed, vertical tab).\n\
1109If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001110
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001111
Neal Norwitz6968b052007-02-27 19:02:19 +00001112static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001113bytes_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001114{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001115 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1116 Py_ssize_t maxsplit = -1;
1117 const char *s = PyBytes_AS_STRING(self), *sub;
1118 Py_buffer vsub;
1119 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001120
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001121 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1122 return NULL;
1123 if (maxsplit < 0)
1124 maxsplit = PY_SSIZE_T_MAX;
1125 if (subobj == Py_None)
1126 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1127 if (_getbuffer(subobj, &vsub) < 0)
1128 return NULL;
1129 sub = vsub.buf;
1130 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001131
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001132 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1133 PyBuffer_Release(&vsub);
1134 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001135}
1136
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001137
1138PyDoc_STRVAR(join__doc__,
1139"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001140\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001141Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001142Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1143
Neal Norwitz6968b052007-02-27 19:02:19 +00001144static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001145bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001146{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001147 char *sep = PyBytes_AS_STRING(self);
1148 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1149 PyObject *res = NULL;
1150 char *p;
1151 Py_ssize_t seqlen = 0;
1152 size_t sz = 0;
1153 Py_ssize_t i;
1154 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001156 seq = PySequence_Fast(orig, "");
1157 if (seq == NULL) {
1158 return NULL;
1159 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001160
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001161 seqlen = PySequence_Size(seq);
1162 if (seqlen == 0) {
1163 Py_DECREF(seq);
1164 return PyBytes_FromString("");
1165 }
1166 if (seqlen == 1) {
1167 item = PySequence_Fast_GET_ITEM(seq, 0);
1168 if (PyBytes_CheckExact(item)) {
1169 Py_INCREF(item);
1170 Py_DECREF(seq);
1171 return item;
1172 }
1173 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001174
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001175 /* There are at least two things to join, or else we have a subclass
1176 * of the builtin types in the sequence.
1177 * Do a pre-pass to figure out the total amount of space we'll
1178 * need (sz), and see whether all argument are bytes.
1179 */
1180 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1181 for (i = 0; i < seqlen; i++) {
1182 const size_t old_sz = sz;
1183 item = PySequence_Fast_GET_ITEM(seq, i);
1184 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1185 PyErr_Format(PyExc_TypeError,
1186 "sequence item %zd: expected bytes,"
1187 " %.80s found",
1188 i, Py_TYPE(item)->tp_name);
1189 Py_DECREF(seq);
1190 return NULL;
1191 }
1192 sz += Py_SIZE(item);
1193 if (i != 0)
1194 sz += seplen;
1195 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1196 PyErr_SetString(PyExc_OverflowError,
1197 "join() result is too long for bytes");
1198 Py_DECREF(seq);
1199 return NULL;
1200 }
1201 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001202
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001203 /* Allocate result space. */
1204 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1205 if (res == NULL) {
1206 Py_DECREF(seq);
1207 return NULL;
1208 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001209
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001210 /* Catenate everything. */
1211 /* I'm not worried about a PyByteArray item growing because there's
1212 nowhere in this function where we release the GIL. */
1213 p = PyBytes_AS_STRING(res);
1214 for (i = 0; i < seqlen; ++i) {
1215 size_t n;
1216 char *q;
1217 if (i) {
1218 Py_MEMCPY(p, sep, seplen);
1219 p += seplen;
1220 }
1221 item = PySequence_Fast_GET_ITEM(seq, i);
1222 n = Py_SIZE(item);
1223 if (PyBytes_Check(item))
1224 q = PyBytes_AS_STRING(item);
1225 else
1226 q = PyByteArray_AS_STRING(item);
1227 Py_MEMCPY(p, q, n);
1228 p += n;
1229 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001230
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001231 Py_DECREF(seq);
1232 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001233}
1234
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001235PyObject *
1236_PyBytes_Join(PyObject *sep, PyObject *x)
1237{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001238 assert(sep != NULL && PyBytes_Check(sep));
1239 assert(x != NULL);
1240 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001241}
1242
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001243/* helper macro to fixup start/end slice values */
1244#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001245 if (end > len) \
1246 end = len; \
1247 else if (end < 0) { \
1248 end += len; \
1249 if (end < 0) \
1250 end = 0; \
1251 } \
1252 if (start < 0) { \
1253 start += len; \
1254 if (start < 0) \
1255 start = 0; \
1256 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001257
1258Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001259bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001260{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001261 PyObject *subobj;
1262 const char *sub;
1263 Py_ssize_t sub_len;
1264 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001265
Jesus Ceaac451502011-04-20 17:09:23 +02001266 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1267 args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001268 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001269
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001270 if (PyBytes_Check(subobj)) {
1271 sub = PyBytes_AS_STRING(subobj);
1272 sub_len = PyBytes_GET_SIZE(subobj);
1273 }
1274 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1275 /* XXX - the "expected a character buffer object" is pretty
1276 confusing for a non-expert. remap to something else ? */
1277 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001278
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001279 if (dir > 0)
1280 return stringlib_find_slice(
1281 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1282 sub, sub_len, start, end);
1283 else
1284 return stringlib_rfind_slice(
1285 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1286 sub, sub_len, start, end);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001287}
1288
1289
1290PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001291"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001292\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001293Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001294such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001295arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001296\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001297Return -1 on failure.");
1298
Neal Norwitz6968b052007-02-27 19:02:19 +00001299static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001300bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001301{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001302 Py_ssize_t result = bytes_find_internal(self, args, +1);
1303 if (result == -2)
1304 return NULL;
1305 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001306}
1307
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001308
1309PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001310"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001311\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001312Like B.find() but raise ValueError when the substring is not found.");
1313
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001314static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001315bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001316{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001317 Py_ssize_t result = bytes_find_internal(self, args, +1);
1318 if (result == -2)
1319 return NULL;
1320 if (result == -1) {
1321 PyErr_SetString(PyExc_ValueError,
1322 "substring not found");
1323 return NULL;
1324 }
1325 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001326}
1327
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001328
1329PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001330"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001331\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001332Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001333such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001334arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001335\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001336Return -1 on failure.");
1337
Neal Norwitz6968b052007-02-27 19:02:19 +00001338static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001339bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001340{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001341 Py_ssize_t result = bytes_find_internal(self, args, -1);
1342 if (result == -2)
1343 return NULL;
1344 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001345}
1346
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001347
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001348PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001349"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001350\n\
1351Like B.rfind() but raise ValueError when the substring is not found.");
1352
1353static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001354bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001355{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001356 Py_ssize_t result = bytes_find_internal(self, args, -1);
1357 if (result == -2)
1358 return NULL;
1359 if (result == -1) {
1360 PyErr_SetString(PyExc_ValueError,
1361 "substring not found");
1362 return NULL;
1363 }
1364 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001365}
1366
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001367
1368Py_LOCAL_INLINE(PyObject *)
1369do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001370{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001371 Py_buffer vsep;
1372 char *s = PyBytes_AS_STRING(self);
1373 Py_ssize_t len = PyBytes_GET_SIZE(self);
1374 char *sep;
1375 Py_ssize_t seplen;
1376 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001377
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001378 if (_getbuffer(sepobj, &vsep) < 0)
1379 return NULL;
1380 sep = vsep.buf;
1381 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001382
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001383 i = 0;
1384 if (striptype != RIGHTSTRIP) {
1385 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1386 i++;
1387 }
1388 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001389
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 j = len;
1391 if (striptype != LEFTSTRIP) {
1392 do {
1393 j--;
1394 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1395 j++;
1396 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001397
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001398 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001399
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001400 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1401 Py_INCREF(self);
1402 return (PyObject*)self;
1403 }
1404 else
1405 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001406}
1407
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001408
1409Py_LOCAL_INLINE(PyObject *)
1410do_strip(PyBytesObject *self, int striptype)
1411{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001412 char *s = PyBytes_AS_STRING(self);
1413 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001414
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 i = 0;
1416 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001417 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001418 i++;
1419 }
1420 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 j = len;
1423 if (striptype != LEFTSTRIP) {
1424 do {
1425 j--;
David Malcolm96960882010-11-05 17:23:41 +00001426 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001427 j++;
1428 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001429
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1431 Py_INCREF(self);
1432 return (PyObject*)self;
1433 }
1434 else
1435 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001436}
1437
1438
1439Py_LOCAL_INLINE(PyObject *)
1440do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1441{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001443
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001444 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1445 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001446
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001447 if (sep != NULL && sep != Py_None) {
1448 return do_xstrip(self, striptype, sep);
1449 }
1450 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001451}
1452
1453
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001454PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001455"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001456\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001457Strip leading and trailing bytes contained in the argument.\n\
Georg Brandlbeca27a2012-01-22 21:31:21 +01001458If the argument is omitted, strip leading and trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001459static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001460bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001461{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001462 if (PyTuple_GET_SIZE(args) == 0)
1463 return do_strip(self, BOTHSTRIP); /* Common case */
1464 else
1465 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001466}
1467
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001468
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001469PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001470"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001471\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001472Strip leading bytes contained in the argument.\n\
1473If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001474static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001475bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001476{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001477 if (PyTuple_GET_SIZE(args) == 0)
1478 return do_strip(self, LEFTSTRIP); /* Common case */
1479 else
1480 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001481}
1482
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001483
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001484PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001485"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001486\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001487Strip trailing bytes contained in the argument.\n\
1488If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001489static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001490bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001491{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001492 if (PyTuple_GET_SIZE(args) == 0)
1493 return do_strip(self, RIGHTSTRIP); /* Common case */
1494 else
1495 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001496}
Neal Norwitz6968b052007-02-27 19:02:19 +00001497
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001498
1499PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001500"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001501\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001502Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001503string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001504as in slice notation.");
1505
1506static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001507bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001508{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001509 PyObject *sub_obj;
1510 const char *str = PyBytes_AS_STRING(self), *sub;
1511 Py_ssize_t sub_len;
1512 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001513
Jesus Ceaac451502011-04-20 17:09:23 +02001514 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001515 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001516
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001517 if (PyBytes_Check(sub_obj)) {
1518 sub = PyBytes_AS_STRING(sub_obj);
1519 sub_len = PyBytes_GET_SIZE(sub_obj);
1520 }
1521 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1522 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001523
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001524 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001525
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001526 return PyLong_FromSsize_t(
1527 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1528 );
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001529}
1530
1531
1532PyDoc_STRVAR(translate__doc__,
1533"B.translate(table[, deletechars]) -> bytes\n\
1534\n\
1535Return a copy of B, where all characters occurring in the\n\
1536optional argument deletechars are removed, and the remaining\n\
1537characters have been mapped through the given translation\n\
1538table, which must be a bytes object of length 256.");
1539
1540static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001541bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001542{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001543 register char *input, *output;
1544 const char *table;
1545 register Py_ssize_t i, c, changed = 0;
1546 PyObject *input_obj = (PyObject*)self;
1547 const char *output_start, *del_table=NULL;
1548 Py_ssize_t inlen, tablen, dellen = 0;
1549 PyObject *result;
1550 int trans_table[256];
1551 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001552
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001553 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1554 &tableobj, &delobj))
1555 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001556
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001557 if (PyBytes_Check(tableobj)) {
1558 table = PyBytes_AS_STRING(tableobj);
1559 tablen = PyBytes_GET_SIZE(tableobj);
1560 }
1561 else if (tableobj == Py_None) {
1562 table = NULL;
1563 tablen = 256;
1564 }
1565 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1566 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001567
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001568 if (tablen != 256) {
1569 PyErr_SetString(PyExc_ValueError,
1570 "translation table must be 256 characters long");
1571 return NULL;
1572 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001573
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001574 if (delobj != NULL) {
1575 if (PyBytes_Check(delobj)) {
1576 del_table = PyBytes_AS_STRING(delobj);
1577 dellen = PyBytes_GET_SIZE(delobj);
1578 }
1579 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1580 return NULL;
1581 }
1582 else {
1583 del_table = NULL;
1584 dellen = 0;
1585 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001586
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001587 inlen = PyBytes_GET_SIZE(input_obj);
1588 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1589 if (result == NULL)
1590 return NULL;
1591 output_start = output = PyBytes_AsString(result);
1592 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001593
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001594 if (dellen == 0 && table != NULL) {
1595 /* If no deletions are required, use faster code */
1596 for (i = inlen; --i >= 0; ) {
1597 c = Py_CHARMASK(*input++);
1598 if (Py_CHARMASK((*output++ = table[c])) != c)
1599 changed = 1;
1600 }
1601 if (changed || !PyBytes_CheckExact(input_obj))
1602 return result;
1603 Py_DECREF(result);
1604 Py_INCREF(input_obj);
1605 return input_obj;
1606 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001607
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001608 if (table == NULL) {
1609 for (i = 0; i < 256; i++)
1610 trans_table[i] = Py_CHARMASK(i);
1611 } else {
1612 for (i = 0; i < 256; i++)
1613 trans_table[i] = Py_CHARMASK(table[i]);
1614 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001615
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001616 for (i = 0; i < dellen; i++)
1617 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001618
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001619 for (i = inlen; --i >= 0; ) {
1620 c = Py_CHARMASK(*input++);
1621 if (trans_table[c] != -1)
1622 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1623 continue;
1624 changed = 1;
1625 }
1626 if (!changed && PyBytes_CheckExact(input_obj)) {
1627 Py_DECREF(result);
1628 Py_INCREF(input_obj);
1629 return input_obj;
1630 }
1631 /* Fix the size of the resulting string */
1632 if (inlen > 0)
1633 _PyBytes_Resize(&result, output - output_start);
1634 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001635}
1636
1637
Georg Brandlabc38772009-04-12 15:51:51 +00001638static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001639bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001640{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001641 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001642}
1643
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001644/* find and count characters and substrings */
1645
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001646#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001647 ((char *)memchr((const void *)(target), c, target_len))
1648
1649/* String ops must return a string. */
1650/* If the object is subclass of string, create a copy */
1651Py_LOCAL(PyBytesObject *)
1652return_self(PyBytesObject *self)
1653{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001654 if (PyBytes_CheckExact(self)) {
1655 Py_INCREF(self);
1656 return self;
1657 }
1658 return (PyBytesObject *)PyBytes_FromStringAndSize(
1659 PyBytes_AS_STRING(self),
1660 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001661}
1662
1663Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001664countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001665{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001666 Py_ssize_t count=0;
1667 const char *start=target;
1668 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001669
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001670 while ( (start=findchar(start, end-start, c)) != NULL ) {
1671 count++;
1672 if (count >= maxcount)
1673 break;
1674 start += 1;
1675 }
1676 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001677}
1678
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001679
1680/* Algorithms for different cases of string replacement */
1681
1682/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1683Py_LOCAL(PyBytesObject *)
1684replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001685 const char *to_s, Py_ssize_t to_len,
1686 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001687{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001688 char *self_s, *result_s;
1689 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001690 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001691 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001692
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001693 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001694
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001695 /* 1 at the end plus 1 after every character;
1696 count = min(maxcount, self_len + 1) */
1697 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001698 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001699 else
1700 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1701 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001702
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001703 /* Check for overflow */
1704 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001705 assert(count > 0);
1706 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001707 PyErr_SetString(PyExc_OverflowError,
1708 "replacement bytes are too long");
1709 return NULL;
1710 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001711 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001712
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001713 if (! (result = (PyBytesObject *)
1714 PyBytes_FromStringAndSize(NULL, result_len)) )
1715 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001716
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001717 self_s = PyBytes_AS_STRING(self);
1718 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001719
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001720 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001721
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001722 /* Lay the first one down (guaranteed this will occur) */
1723 Py_MEMCPY(result_s, to_s, to_len);
1724 result_s += to_len;
1725 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001726
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001727 for (i=0; i<count; i++) {
1728 *result_s++ = *self_s++;
1729 Py_MEMCPY(result_s, to_s, to_len);
1730 result_s += to_len;
1731 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001732
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001733 /* Copy the rest of the original string */
1734 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001735
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001736 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001737}
1738
1739/* Special case for deleting a single character */
1740/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1741Py_LOCAL(PyBytesObject *)
1742replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001743 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001744{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001745 char *self_s, *result_s;
1746 char *start, *next, *end;
1747 Py_ssize_t self_len, result_len;
1748 Py_ssize_t count;
1749 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001750
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001751 self_len = PyBytes_GET_SIZE(self);
1752 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001753
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001754 count = countchar(self_s, self_len, from_c, maxcount);
1755 if (count == 0) {
1756 return return_self(self);
1757 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001758
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001759 result_len = self_len - count; /* from_len == 1 */
1760 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001761
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001762 if ( (result = (PyBytesObject *)
1763 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1764 return NULL;
1765 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001766
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001767 start = self_s;
1768 end = self_s + self_len;
1769 while (count-- > 0) {
1770 next = findchar(start, end-start, from_c);
1771 if (next == NULL)
1772 break;
1773 Py_MEMCPY(result_s, start, next-start);
1774 result_s += (next-start);
1775 start = next+1;
1776 }
1777 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001778
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001779 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001780}
1781
1782/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1783
1784Py_LOCAL(PyBytesObject *)
1785replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001786 const char *from_s, Py_ssize_t from_len,
1787 Py_ssize_t maxcount) {
1788 char *self_s, *result_s;
1789 char *start, *next, *end;
1790 Py_ssize_t self_len, result_len;
1791 Py_ssize_t count, offset;
1792 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001793
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001794 self_len = PyBytes_GET_SIZE(self);
1795 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001796
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001797 count = stringlib_count(self_s, self_len,
1798 from_s, from_len,
1799 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001800
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001801 if (count == 0) {
1802 /* no matches */
1803 return return_self(self);
1804 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001805
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001806 result_len = self_len - (count * from_len);
1807 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001808
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001809 if ( (result = (PyBytesObject *)
1810 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1811 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001812
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001813 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001814
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001815 start = self_s;
1816 end = self_s + self_len;
1817 while (count-- > 0) {
1818 offset = stringlib_find(start, end-start,
1819 from_s, from_len,
1820 0);
1821 if (offset == -1)
1822 break;
1823 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001824
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001825 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001826
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001827 result_s += (next-start);
1828 start = next+from_len;
1829 }
1830 Py_MEMCPY(result_s, start, end-start);
1831 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001832}
1833
1834/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1835Py_LOCAL(PyBytesObject *)
1836replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001837 char from_c, char to_c,
1838 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001839{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001840 char *self_s, *result_s, *start, *end, *next;
1841 Py_ssize_t self_len;
1842 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001843
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001844 /* The result string will be the same size */
1845 self_s = PyBytes_AS_STRING(self);
1846 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001847
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001848 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001849
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001850 if (next == NULL) {
1851 /* No matches; return the original string */
1852 return return_self(self);
1853 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001854
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001855 /* Need to make a new string */
1856 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1857 if (result == NULL)
1858 return NULL;
1859 result_s = PyBytes_AS_STRING(result);
1860 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001861
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001862 /* change everything in-place, starting with this one */
1863 start = result_s + (next-self_s);
1864 *start = to_c;
1865 start++;
1866 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001867
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001868 while (--maxcount > 0) {
1869 next = findchar(start, end-start, from_c);
1870 if (next == NULL)
1871 break;
1872 *next = to_c;
1873 start = next+1;
1874 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001875
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001876 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001877}
1878
1879/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1880Py_LOCAL(PyBytesObject *)
1881replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001882 const char *from_s, Py_ssize_t from_len,
1883 const char *to_s, Py_ssize_t to_len,
1884 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001885{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001886 char *result_s, *start, *end;
1887 char *self_s;
1888 Py_ssize_t self_len, offset;
1889 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001890
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001891 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001892
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001893 self_s = PyBytes_AS_STRING(self);
1894 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001895
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001896 offset = stringlib_find(self_s, self_len,
1897 from_s, from_len,
1898 0);
1899 if (offset == -1) {
1900 /* No matches; return the original string */
1901 return return_self(self);
1902 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001903
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001904 /* Need to make a new string */
1905 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1906 if (result == NULL)
1907 return NULL;
1908 result_s = PyBytes_AS_STRING(result);
1909 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001910
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001911 /* change everything in-place, starting with this one */
1912 start = result_s + offset;
1913 Py_MEMCPY(start, to_s, from_len);
1914 start += from_len;
1915 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001916
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001917 while ( --maxcount > 0) {
1918 offset = stringlib_find(start, end-start,
1919 from_s, from_len,
1920 0);
1921 if (offset==-1)
1922 break;
1923 Py_MEMCPY(start+offset, to_s, from_len);
1924 start += offset+from_len;
1925 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001926
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001927 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001928}
1929
1930/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1931Py_LOCAL(PyBytesObject *)
1932replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001933 char from_c,
1934 const char *to_s, Py_ssize_t to_len,
1935 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001936{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001937 char *self_s, *result_s;
1938 char *start, *next, *end;
1939 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001940 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001941 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001942
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001943 self_s = PyBytes_AS_STRING(self);
1944 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001945
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001946 count = countchar(self_s, self_len, from_c, maxcount);
1947 if (count == 0) {
1948 /* no matches, return unchanged */
1949 return return_self(self);
1950 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001951
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001952 /* use the difference between current and new, hence the "-1" */
1953 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001954 assert(count > 0);
1955 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001956 PyErr_SetString(PyExc_OverflowError,
1957 "replacement bytes are too long");
1958 return NULL;
1959 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001960 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001961
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001962 if ( (result = (PyBytesObject *)
1963 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1964 return NULL;
1965 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001966
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001967 start = self_s;
1968 end = self_s + self_len;
1969 while (count-- > 0) {
1970 next = findchar(start, end-start, from_c);
1971 if (next == NULL)
1972 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001973
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001974 if (next == start) {
1975 /* replace with the 'to' */
1976 Py_MEMCPY(result_s, to_s, to_len);
1977 result_s += to_len;
1978 start += 1;
1979 } else {
1980 /* copy the unchanged old then the 'to' */
1981 Py_MEMCPY(result_s, start, next-start);
1982 result_s += (next-start);
1983 Py_MEMCPY(result_s, to_s, to_len);
1984 result_s += to_len;
1985 start = next+1;
1986 }
1987 }
1988 /* Copy the remainder of the remaining string */
1989 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001990
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001991 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001992}
1993
1994/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1995Py_LOCAL(PyBytesObject *)
1996replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001997 const char *from_s, Py_ssize_t from_len,
1998 const char *to_s, Py_ssize_t to_len,
1999 Py_ssize_t maxcount) {
2000 char *self_s, *result_s;
2001 char *start, *next, *end;
2002 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002003 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002004 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002005
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002006 self_s = PyBytes_AS_STRING(self);
2007 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002008
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002009 count = stringlib_count(self_s, self_len,
2010 from_s, from_len,
2011 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002012
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002013 if (count == 0) {
2014 /* no matches, return unchanged */
2015 return return_self(self);
2016 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002017
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002018 /* Check for overflow */
2019 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002020 assert(count > 0);
2021 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002022 PyErr_SetString(PyExc_OverflowError,
2023 "replacement bytes are too long");
2024 return NULL;
2025 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002026 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002027
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002028 if ( (result = (PyBytesObject *)
2029 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2030 return NULL;
2031 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002032
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002033 start = self_s;
2034 end = self_s + self_len;
2035 while (count-- > 0) {
2036 offset = stringlib_find(start, end-start,
2037 from_s, from_len,
2038 0);
2039 if (offset == -1)
2040 break;
2041 next = start+offset;
2042 if (next == start) {
2043 /* replace with the 'to' */
2044 Py_MEMCPY(result_s, to_s, to_len);
2045 result_s += to_len;
2046 start += from_len;
2047 } else {
2048 /* copy the unchanged old then the 'to' */
2049 Py_MEMCPY(result_s, start, next-start);
2050 result_s += (next-start);
2051 Py_MEMCPY(result_s, to_s, to_len);
2052 result_s += to_len;
2053 start = next+from_len;
2054 }
2055 }
2056 /* Copy the remainder of the remaining string */
2057 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002058
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002059 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002060}
2061
2062
2063Py_LOCAL(PyBytesObject *)
2064replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002065 const char *from_s, Py_ssize_t from_len,
2066 const char *to_s, Py_ssize_t to_len,
2067 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002068{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002069 if (maxcount < 0) {
2070 maxcount = PY_SSIZE_T_MAX;
2071 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2072 /* nothing to do; return the original string */
2073 return return_self(self);
2074 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002075
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002076 if (maxcount == 0 ||
2077 (from_len == 0 && to_len == 0)) {
2078 /* nothing to do; return the original string */
2079 return return_self(self);
2080 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002082 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002083
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002084 if (from_len == 0) {
2085 /* insert the 'to' string everywhere. */
2086 /* >>> "Python".replace("", ".") */
2087 /* '.P.y.t.h.o.n.' */
2088 return replace_interleave(self, to_s, to_len, maxcount);
2089 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002091 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2092 /* point for an empty self string to generate a non-empty string */
2093 /* Special case so the remaining code always gets a non-empty string */
2094 if (PyBytes_GET_SIZE(self) == 0) {
2095 return return_self(self);
2096 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002097
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002098 if (to_len == 0) {
2099 /* delete all occurrences of 'from' string */
2100 if (from_len == 1) {
2101 return replace_delete_single_character(
2102 self, from_s[0], maxcount);
2103 } else {
2104 return replace_delete_substring(self, from_s,
2105 from_len, maxcount);
2106 }
2107 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002108
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002109 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002110
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002111 if (from_len == to_len) {
2112 if (from_len == 1) {
2113 return replace_single_character_in_place(
2114 self,
2115 from_s[0],
2116 to_s[0],
2117 maxcount);
2118 } else {
2119 return replace_substring_in_place(
2120 self, from_s, from_len, to_s, to_len,
2121 maxcount);
2122 }
2123 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002124
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002125 /* Otherwise use the more generic algorithms */
2126 if (from_len == 1) {
2127 return replace_single_character(self, from_s[0],
2128 to_s, to_len, maxcount);
2129 } else {
2130 /* len('from')>=2, len('to')>=1 */
2131 return replace_substring(self, from_s, from_len, to_s, to_len,
2132 maxcount);
2133 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002134}
2135
2136PyDoc_STRVAR(replace__doc__,
2137"B.replace(old, new[, count]) -> bytes\n\
2138\n\
2139Return a copy of B with all occurrences of subsection\n\
2140old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002141given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002142
2143static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002144bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002145{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002146 Py_ssize_t count = -1;
2147 PyObject *from, *to;
2148 const char *from_s, *to_s;
2149 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002150
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002151 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2152 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002153
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002154 if (PyBytes_Check(from)) {
2155 from_s = PyBytes_AS_STRING(from);
2156 from_len = PyBytes_GET_SIZE(from);
2157 }
2158 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2159 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002160
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002161 if (PyBytes_Check(to)) {
2162 to_s = PyBytes_AS_STRING(to);
2163 to_len = PyBytes_GET_SIZE(to);
2164 }
2165 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2166 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002167
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002168 return (PyObject *)replace((PyBytesObject *) self,
2169 from_s, from_len,
2170 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002171}
2172
2173/** End DALKE **/
2174
2175/* Matches the end (direction >= 0) or start (direction < 0) of self
2176 * against substr, using the start and end arguments. Returns
2177 * -1 on error, 0 if not found and 1 if found.
2178 */
2179Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002180_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002181 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002182{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002183 Py_ssize_t len = PyBytes_GET_SIZE(self);
2184 Py_ssize_t slen;
2185 const char* sub;
2186 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002187
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002188 if (PyBytes_Check(substr)) {
2189 sub = PyBytes_AS_STRING(substr);
2190 slen = PyBytes_GET_SIZE(substr);
2191 }
2192 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2193 return -1;
2194 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002195
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002196 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002197
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002198 if (direction < 0) {
2199 /* startswith */
2200 if (start+slen > len)
2201 return 0;
2202 } else {
2203 /* endswith */
2204 if (end-start < slen || start > len)
2205 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002206
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002207 if (end-slen > start)
2208 start = end - slen;
2209 }
2210 if (end-start >= slen)
2211 return ! memcmp(str+start, sub, slen);
2212 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002213}
2214
2215
2216PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002217"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002218\n\
2219Return True if B starts with the specified prefix, False otherwise.\n\
2220With optional start, test B beginning at that position.\n\
2221With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002222prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002223
2224static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002225bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002226{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002227 Py_ssize_t start = 0;
2228 Py_ssize_t end = PY_SSIZE_T_MAX;
2229 PyObject *subobj;
2230 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002231
Jesus Ceaac451502011-04-20 17:09:23 +02002232 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002233 return NULL;
2234 if (PyTuple_Check(subobj)) {
2235 Py_ssize_t i;
2236 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2237 result = _bytes_tailmatch(self,
2238 PyTuple_GET_ITEM(subobj, i),
2239 start, end, -1);
2240 if (result == -1)
2241 return NULL;
2242 else if (result) {
2243 Py_RETURN_TRUE;
2244 }
2245 }
2246 Py_RETURN_FALSE;
2247 }
2248 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002249 if (result == -1) {
2250 if (PyErr_ExceptionMatches(PyExc_TypeError))
2251 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2252 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002253 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002254 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002255 else
2256 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002257}
2258
2259
2260PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002261"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002262\n\
2263Return True if B ends with the specified suffix, False otherwise.\n\
2264With optional start, test B beginning at that position.\n\
2265With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002266suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002267
2268static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002269bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002270{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002271 Py_ssize_t start = 0;
2272 Py_ssize_t end = PY_SSIZE_T_MAX;
2273 PyObject *subobj;
2274 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002275
Jesus Ceaac451502011-04-20 17:09:23 +02002276 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002277 return NULL;
2278 if (PyTuple_Check(subobj)) {
2279 Py_ssize_t i;
2280 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2281 result = _bytes_tailmatch(self,
2282 PyTuple_GET_ITEM(subobj, i),
2283 start, end, +1);
2284 if (result == -1)
2285 return NULL;
2286 else if (result) {
2287 Py_RETURN_TRUE;
2288 }
2289 }
2290 Py_RETURN_FALSE;
2291 }
2292 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002293 if (result == -1) {
2294 if (PyErr_ExceptionMatches(PyExc_TypeError))
2295 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2296 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002297 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002298 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002299 else
2300 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002301}
2302
2303
2304PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002305"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002306\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002307Decode B using the codec registered for encoding. Default encoding\n\
2308is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002309handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2310a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002311as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002312able to handle UnicodeDecodeErrors.");
2313
2314static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002315bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002316{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002317 const char *encoding = NULL;
2318 const char *errors = NULL;
2319 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002320
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002321 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2322 return NULL;
2323 if (encoding == NULL)
2324 encoding = PyUnicode_GetDefaultEncoding();
2325 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002326}
2327
Guido van Rossum20188312006-05-05 15:15:40 +00002328
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002329PyDoc_STRVAR(splitlines__doc__,
2330"B.splitlines([keepends]) -> list of lines\n\
2331\n\
2332Return a list of the lines in B, breaking at line boundaries.\n\
2333Line breaks are not included in the resulting list unless keepends\n\
2334is given and true.");
2335
2336static PyObject*
2337bytes_splitlines(PyObject *self, PyObject *args)
2338{
2339 int keepends = 0;
2340
2341 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002342 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002343
2344 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002345 (PyObject*) self, PyBytes_AS_STRING(self),
2346 PyBytes_GET_SIZE(self), keepends
2347 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002348}
2349
2350
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002351PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002352"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002353\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002354Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002355Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002356Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002357
2358static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002359hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002360{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002361 if (c >= 128)
2362 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002363 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002364 return c - '0';
2365 else {
David Malcolm96960882010-11-05 17:23:41 +00002366 if (Py_ISUPPER(c))
2367 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002368 if (c >= 'a' && c <= 'f')
2369 return c - 'a' + 10;
2370 }
2371 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002372}
2373
2374static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002375bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002376{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002377 PyObject *newstring, *hexobj;
2378 char *buf;
2379 Py_UNICODE *hex;
2380 Py_ssize_t hexlen, byteslen, i, j;
2381 int top, bot;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002382
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002383 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2384 return NULL;
2385 assert(PyUnicode_Check(hexobj));
2386 hexlen = PyUnicode_GET_SIZE(hexobj);
2387 hex = PyUnicode_AS_UNICODE(hexobj);
2388 byteslen = hexlen/2; /* This overestimates if there are spaces */
2389 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2390 if (!newstring)
2391 return NULL;
2392 buf = PyBytes_AS_STRING(newstring);
2393 for (i = j = 0; i < hexlen; i += 2) {
2394 /* skip over spaces in the input */
2395 while (hex[i] == ' ')
2396 i++;
2397 if (i >= hexlen)
2398 break;
2399 top = hex_digit_to_int(hex[i]);
2400 bot = hex_digit_to_int(hex[i+1]);
2401 if (top == -1 || bot == -1) {
2402 PyErr_Format(PyExc_ValueError,
2403 "non-hexadecimal number found in "
2404 "fromhex() arg at position %zd", i);
2405 goto error;
2406 }
2407 buf[j++] = (top << 4) + bot;
2408 }
2409 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2410 goto error;
2411 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002412
2413 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002414 Py_XDECREF(newstring);
2415 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002416}
2417
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002418PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002419"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002420
2421static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002422bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002423{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002424 Py_ssize_t res;
2425 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2426 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002427}
2428
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002429
2430static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002431bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002432{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002433 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002434}
2435
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002436
2437static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002438bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002439 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2440 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2441 _Py_capitalize__doc__},
2442 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2443 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2444 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2445 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2446 endswith__doc__},
2447 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2448 expandtabs__doc__},
2449 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2450 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2451 fromhex_doc},
2452 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2453 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2454 _Py_isalnum__doc__},
2455 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2456 _Py_isalpha__doc__},
2457 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2458 _Py_isdigit__doc__},
2459 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2460 _Py_islower__doc__},
2461 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2462 _Py_isspace__doc__},
2463 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2464 _Py_istitle__doc__},
2465 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2466 _Py_isupper__doc__},
2467 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2468 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2469 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2470 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2471 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2472 _Py_maketrans__doc__},
2473 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2474 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2475 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2476 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2477 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2478 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2479 rpartition__doc__},
2480 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2481 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
2482 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2483 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS,
2484 splitlines__doc__},
2485 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2486 startswith__doc__},
2487 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2488 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2489 _Py_swapcase__doc__},
2490 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2491 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2492 translate__doc__},
2493 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2494 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2495 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2496 sizeof__doc__},
2497 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002498};
2499
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002500static PyObject *
2501str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2502
2503static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002504bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002505{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002506 PyObject *x = NULL;
2507 const char *encoding = NULL;
2508 const char *errors = NULL;
2509 PyObject *new = NULL;
2510 Py_ssize_t size;
2511 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002512
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002513 if (type != &PyBytes_Type)
2514 return str_subtype_new(type, args, kwds);
2515 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2516 &encoding, &errors))
2517 return NULL;
2518 if (x == NULL) {
2519 if (encoding != NULL || errors != NULL) {
2520 PyErr_SetString(PyExc_TypeError,
2521 "encoding or errors without sequence "
2522 "argument");
2523 return NULL;
2524 }
2525 return PyBytes_FromString("");
2526 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002527
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002528 if (PyUnicode_Check(x)) {
2529 /* Encode via the codec registry */
2530 if (encoding == NULL) {
2531 PyErr_SetString(PyExc_TypeError,
2532 "string argument without an encoding");
2533 return NULL;
2534 }
2535 new = PyUnicode_AsEncodedString(x, encoding, errors);
2536 if (new == NULL)
2537 return NULL;
2538 assert(PyBytes_Check(new));
2539 return new;
2540 }
2541 /* Is it an integer? */
2542 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2543 if (size == -1 && PyErr_Occurred()) {
2544 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2545 return NULL;
2546 PyErr_Clear();
2547 }
2548 else if (size < 0) {
2549 PyErr_SetString(PyExc_ValueError, "negative count");
2550 return NULL;
2551 }
2552 else {
2553 new = PyBytes_FromStringAndSize(NULL, size);
2554 if (new == NULL) {
2555 return NULL;
2556 }
2557 if (size > 0) {
2558 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2559 }
2560 return new;
2561 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002562
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002563 /* If it's not unicode, there can't be encoding or errors */
2564 if (encoding != NULL || errors != NULL) {
2565 PyErr_SetString(PyExc_TypeError,
2566 "encoding or errors without a string argument");
2567 return NULL;
2568 }
2569 return PyObject_Bytes(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002570}
2571
2572PyObject *
2573PyBytes_FromObject(PyObject *x)
2574{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002575 PyObject *new, *it;
2576 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002577
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002578 if (x == NULL) {
2579 PyErr_BadInternalCall();
2580 return NULL;
2581 }
2582 /* Use the modern buffer interface */
2583 if (PyObject_CheckBuffer(x)) {
2584 Py_buffer view;
2585 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2586 return NULL;
2587 new = PyBytes_FromStringAndSize(NULL, view.len);
2588 if (!new)
2589 goto fail;
2590 /* XXX(brett.cannon): Better way to get to internal buffer? */
2591 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2592 &view, view.len, 'C') < 0)
2593 goto fail;
2594 PyBuffer_Release(&view);
2595 return new;
2596 fail:
2597 Py_XDECREF(new);
2598 PyBuffer_Release(&view);
2599 return NULL;
2600 }
2601 if (PyUnicode_Check(x)) {
2602 PyErr_SetString(PyExc_TypeError,
2603 "cannot convert unicode object to bytes");
2604 return NULL;
2605 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002606
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002607 if (PyList_CheckExact(x)) {
2608 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2609 if (new == NULL)
2610 return NULL;
2611 for (i = 0; i < Py_SIZE(x); i++) {
2612 Py_ssize_t value = PyNumber_AsSsize_t(
2613 PyList_GET_ITEM(x, i), PyExc_ValueError);
2614 if (value == -1 && PyErr_Occurred()) {
2615 Py_DECREF(new);
2616 return NULL;
2617 }
2618 if (value < 0 || value >= 256) {
2619 PyErr_SetString(PyExc_ValueError,
2620 "bytes must be in range(0, 256)");
2621 Py_DECREF(new);
2622 return NULL;
2623 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002624 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002625 }
2626 return new;
2627 }
2628 if (PyTuple_CheckExact(x)) {
2629 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2630 if (new == NULL)
2631 return NULL;
2632 for (i = 0; i < Py_SIZE(x); i++) {
2633 Py_ssize_t value = PyNumber_AsSsize_t(
2634 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2635 if (value == -1 && PyErr_Occurred()) {
2636 Py_DECREF(new);
2637 return NULL;
2638 }
2639 if (value < 0 || value >= 256) {
2640 PyErr_SetString(PyExc_ValueError,
2641 "bytes must be in range(0, 256)");
2642 Py_DECREF(new);
2643 return NULL;
2644 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002645 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002646 }
2647 return new;
2648 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002649
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002650 /* For iterator version, create a string object and resize as needed */
2651 size = _PyObject_LengthHint(x, 64);
2652 if (size == -1 && PyErr_Occurred())
2653 return NULL;
2654 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2655 returning a shared empty bytes string. This required because we
2656 want to call _PyBytes_Resize() the returned object, which we can
2657 only do on bytes objects with refcount == 1. */
2658 size += 1;
2659 new = PyBytes_FromStringAndSize(NULL, size);
2660 if (new == NULL)
2661 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002662
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002663 /* Get the iterator */
2664 it = PyObject_GetIter(x);
2665 if (it == NULL)
2666 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002667
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002668 /* Run the iterator to exhaustion */
2669 for (i = 0; ; i++) {
2670 PyObject *item;
2671 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002672
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002673 /* Get the next item */
2674 item = PyIter_Next(it);
2675 if (item == NULL) {
2676 if (PyErr_Occurred())
2677 goto error;
2678 break;
2679 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002680
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002681 /* Interpret it as an int (__index__) */
2682 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2683 Py_DECREF(item);
2684 if (value == -1 && PyErr_Occurred())
2685 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002686
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002687 /* Range check */
2688 if (value < 0 || value >= 256) {
2689 PyErr_SetString(PyExc_ValueError,
2690 "bytes must be in range(0, 256)");
2691 goto error;
2692 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002693
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002694 /* Append the byte */
2695 if (i >= size) {
2696 size = 2 * size + 1;
2697 if (_PyBytes_Resize(&new, size) < 0)
2698 goto error;
2699 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002700 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002701 }
2702 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002703
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002704 /* Clean up and return success */
2705 Py_DECREF(it);
2706 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002707
2708 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002709 /* Error handling when new != NULL */
2710 Py_XDECREF(it);
2711 Py_DECREF(new);
2712 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002713}
2714
2715static PyObject *
2716str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2717{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002718 PyObject *tmp, *pnew;
2719 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002720
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002721 assert(PyType_IsSubtype(type, &PyBytes_Type));
2722 tmp = bytes_new(&PyBytes_Type, args, kwds);
2723 if (tmp == NULL)
2724 return NULL;
2725 assert(PyBytes_CheckExact(tmp));
2726 n = PyBytes_GET_SIZE(tmp);
2727 pnew = type->tp_alloc(type, n);
2728 if (pnew != NULL) {
2729 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2730 PyBytes_AS_STRING(tmp), n+1);
2731 ((PyBytesObject *)pnew)->ob_shash =
2732 ((PyBytesObject *)tmp)->ob_shash;
2733 }
2734 Py_DECREF(tmp);
2735 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002736}
2737
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002738PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002739"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002740bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002741bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002742bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2743bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002744\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002745Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002746 - an iterable yielding integers in range(256)\n\
2747 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002748 - any object implementing the buffer API.\n\
2749 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002750
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002751static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002752
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002753PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002754 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2755 "bytes",
2756 PyBytesObject_SIZE,
2757 sizeof(char),
2758 bytes_dealloc, /* tp_dealloc */
2759 0, /* tp_print */
2760 0, /* tp_getattr */
2761 0, /* tp_setattr */
2762 0, /* tp_reserved */
2763 (reprfunc)bytes_repr, /* tp_repr */
2764 0, /* tp_as_number */
2765 &bytes_as_sequence, /* tp_as_sequence */
2766 &bytes_as_mapping, /* tp_as_mapping */
2767 (hashfunc)bytes_hash, /* tp_hash */
2768 0, /* tp_call */
2769 bytes_str, /* tp_str */
2770 PyObject_GenericGetAttr, /* tp_getattro */
2771 0, /* tp_setattro */
2772 &bytes_as_buffer, /* tp_as_buffer */
2773 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2774 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2775 bytes_doc, /* tp_doc */
2776 0, /* tp_traverse */
2777 0, /* tp_clear */
2778 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2779 0, /* tp_weaklistoffset */
2780 bytes_iter, /* tp_iter */
2781 0, /* tp_iternext */
2782 bytes_methods, /* tp_methods */
2783 0, /* tp_members */
2784 0, /* tp_getset */
2785 &PyBaseObject_Type, /* tp_base */
2786 0, /* tp_dict */
2787 0, /* tp_descr_get */
2788 0, /* tp_descr_set */
2789 0, /* tp_dictoffset */
2790 0, /* tp_init */
2791 0, /* tp_alloc */
2792 bytes_new, /* tp_new */
2793 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002794};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002795
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002796void
2797PyBytes_Concat(register PyObject **pv, register PyObject *w)
2798{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002799 register PyObject *v;
2800 assert(pv != NULL);
2801 if (*pv == NULL)
2802 return;
2803 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002804 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002805 return;
2806 }
2807 v = bytes_concat(*pv, w);
2808 Py_DECREF(*pv);
2809 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002810}
2811
2812void
2813PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
2814{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002815 PyBytes_Concat(pv, w);
2816 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002817}
2818
2819
2820/* The following function breaks the notion that strings are immutable:
2821 it changes the size of a string. We get away with this only if there
2822 is only one module referencing the object. You can also think of it
2823 as creating a new string object and destroying the old one, only
2824 more efficiently. In any case, don't use this if the string may
2825 already be known to some other part of the code...
2826 Note that if there's not enough memory to resize the string, the original
2827 string object at *pv is deallocated, *pv is set to NULL, an "out of
2828 memory" exception is set, and -1 is returned. Else (on success) 0 is
2829 returned, and the value in *pv may or may not be the same as on input.
2830 As always, an extra byte is allocated for a trailing \0 byte (newsize
2831 does *not* include that), and a trailing \0 byte is stored.
2832*/
2833
2834int
2835_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2836{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002837 register PyObject *v;
2838 register PyBytesObject *sv;
2839 v = *pv;
2840 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2841 *pv = 0;
2842 Py_DECREF(v);
2843 PyErr_BadInternalCall();
2844 return -1;
2845 }
2846 /* XXX UNREF/NEWREF interface should be more symmetrical */
2847 _Py_DEC_REFTOTAL;
2848 _Py_ForgetReference(v);
2849 *pv = (PyObject *)
2850 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2851 if (*pv == NULL) {
2852 PyObject_Del(v);
2853 PyErr_NoMemory();
2854 return -1;
2855 }
2856 _Py_NewReference(*pv);
2857 sv = (PyBytesObject *) *pv;
2858 Py_SIZE(sv) = newsize;
2859 sv->ob_sval[newsize] = '\0';
2860 sv->ob_shash = -1; /* invalidate cached hash value */
2861 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002862}
2863
2864/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
2865 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2866 * Python's regular ints.
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002867 * Return value: a new PyBytes*, or NULL if error.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002868 * . *pbuf is set to point into it,
2869 * *plen set to the # of chars following that.
2870 * Caller must decref it when done using pbuf.
2871 * The string starting at *pbuf is of the form
2872 * "-"? ("0x" | "0X")? digit+
2873 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2874 * set in flags. The case of hex digits will be correct,
2875 * There will be at least prec digits, zero-filled on the left if
2876 * necessary to get that many.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002877 * val object to be converted
2878 * flags bitmask of format flags; only F_ALT is looked at
2879 * prec minimum number of digits; 0-fill on left if needed
2880 * type a character in [duoxX]; u acts the same as d
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002881 *
2882 * CAUTION: o, x and X conversions on regular ints can never
2883 * produce a '-' sign, but can for Python's unbounded ints.
2884 */
2885PyObject*
2886_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002887 char **pbuf, int *plen)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002888{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002889 PyObject *result = NULL;
2890 char *buf;
2891 Py_ssize_t i;
2892 int sign; /* 1 if '-', else 0 */
2893 int len; /* number of characters */
2894 Py_ssize_t llen;
2895 int numdigits; /* len == numnondigits + numdigits */
2896 int numnondigits = 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002897
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002898 /* Avoid exceeding SSIZE_T_MAX */
2899 if (prec > INT_MAX-3) {
2900 PyErr_SetString(PyExc_OverflowError,
2901 "precision too large");
2902 return NULL;
2903 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002904
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002905 switch (type) {
2906 case 'd':
2907 case 'u':
2908 /* Special-case boolean: we want 0/1 */
2909 if (PyBool_Check(val))
2910 result = PyNumber_ToBase(val, 10);
2911 else
2912 result = Py_TYPE(val)->tp_str(val);
2913 break;
2914 case 'o':
2915 numnondigits = 2;
2916 result = PyNumber_ToBase(val, 8);
2917 break;
2918 case 'x':
2919 case 'X':
2920 numnondigits = 2;
2921 result = PyNumber_ToBase(val, 16);
2922 break;
2923 default:
2924 assert(!"'type' not in [duoxX]");
2925 }
2926 if (!result)
2927 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002928
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002929 buf = _PyUnicode_AsString(result);
2930 if (!buf) {
2931 Py_DECREF(result);
2932 return NULL;
2933 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002934
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002935 /* To modify the string in-place, there can only be one reference. */
2936 if (Py_REFCNT(result) != 1) {
2937 PyErr_BadInternalCall();
2938 return NULL;
2939 }
2940 llen = PyUnicode_GetSize(result);
2941 if (llen > INT_MAX) {
2942 PyErr_SetString(PyExc_ValueError,
2943 "string too large in _PyBytes_FormatLong");
2944 return NULL;
2945 }
2946 len = (int)llen;
2947 if (buf[len-1] == 'L') {
2948 --len;
2949 buf[len] = '\0';
2950 }
2951 sign = buf[0] == '-';
2952 numnondigits += sign;
2953 numdigits = len - numnondigits;
2954 assert(numdigits > 0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002955
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002956 /* Get rid of base marker unless F_ALT */
2957 if (((flags & F_ALT) == 0 &&
2958 (type == 'o' || type == 'x' || type == 'X'))) {
2959 assert(buf[sign] == '0');
2960 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
2961 buf[sign+1] == 'o');
2962 numnondigits -= 2;
2963 buf += 2;
2964 len -= 2;
2965 if (sign)
2966 buf[0] = '-';
2967 assert(len == numnondigits + numdigits);
2968 assert(numdigits > 0);
2969 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002970
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002971 /* Fill with leading zeroes to meet minimum width. */
2972 if (prec > numdigits) {
2973 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
2974 numnondigits + prec);
2975 char *b1;
2976 if (!r1) {
2977 Py_DECREF(result);
2978 return NULL;
2979 }
2980 b1 = PyBytes_AS_STRING(r1);
2981 for (i = 0; i < numnondigits; ++i)
2982 *b1++ = *buf++;
2983 for (i = 0; i < prec - numdigits; i++)
2984 *b1++ = '0';
2985 for (i = 0; i < numdigits; i++)
2986 *b1++ = *buf++;
2987 *b1 = '\0';
2988 Py_DECREF(result);
2989 result = r1;
2990 buf = PyBytes_AS_STRING(result);
2991 len = numnondigits + prec;
2992 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002993
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002994 /* Fix up case for hex conversions. */
2995 if (type == 'X') {
2996 /* Need to convert all lower case letters to upper case.
2997 and need to convert 0x to 0X (and -0x to -0X). */
2998 for (i = 0; i < len; i++)
2999 if (buf[i] >= 'a' && buf[i] <= 'x')
3000 buf[i] -= 'a'-'A';
3001 }
3002 *pbuf = buf;
3003 *plen = len;
3004 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003005}
3006
3007void
3008PyBytes_Fini(void)
3009{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003010 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003011 for (i = 0; i < UCHAR_MAX + 1; i++)
3012 Py_CLEAR(characters[i]);
3013 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003014}
3015
Benjamin Peterson4116f362008-05-27 00:36:20 +00003016/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003017
3018typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003019 PyObject_HEAD
3020 Py_ssize_t it_index;
3021 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003022} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003023
3024static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003025striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003026{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003027 _PyObject_GC_UNTRACK(it);
3028 Py_XDECREF(it->it_seq);
3029 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003030}
3031
3032static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003033striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003034{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003035 Py_VISIT(it->it_seq);
3036 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003037}
3038
3039static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003040striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003041{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003042 PyBytesObject *seq;
3043 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003044
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003045 assert(it != NULL);
3046 seq = it->it_seq;
3047 if (seq == NULL)
3048 return NULL;
3049 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003050
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003051 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3052 item = PyLong_FromLong(
3053 (unsigned char)seq->ob_sval[it->it_index]);
3054 if (item != NULL)
3055 ++it->it_index;
3056 return item;
3057 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003058
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003059 Py_DECREF(seq);
3060 it->it_seq = NULL;
3061 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003062}
3063
3064static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003065striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003066{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003067 Py_ssize_t len = 0;
3068 if (it->it_seq)
3069 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3070 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003071}
3072
3073PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003074 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003075
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003076static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003077 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3078 length_hint_doc},
3079 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003080};
3081
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003082PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003083 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3084 "bytes_iterator", /* tp_name */
3085 sizeof(striterobject), /* tp_basicsize */
3086 0, /* tp_itemsize */
3087 /* methods */
3088 (destructor)striter_dealloc, /* tp_dealloc */
3089 0, /* tp_print */
3090 0, /* tp_getattr */
3091 0, /* tp_setattr */
3092 0, /* tp_reserved */
3093 0, /* tp_repr */
3094 0, /* tp_as_number */
3095 0, /* tp_as_sequence */
3096 0, /* tp_as_mapping */
3097 0, /* tp_hash */
3098 0, /* tp_call */
3099 0, /* tp_str */
3100 PyObject_GenericGetAttr, /* tp_getattro */
3101 0, /* tp_setattro */
3102 0, /* tp_as_buffer */
3103 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3104 0, /* tp_doc */
3105 (traverseproc)striter_traverse, /* tp_traverse */
3106 0, /* tp_clear */
3107 0, /* tp_richcompare */
3108 0, /* tp_weaklistoffset */
3109 PyObject_SelfIter, /* tp_iter */
3110 (iternextfunc)striter_next, /* tp_iternext */
3111 striter_methods, /* tp_methods */
3112 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003113};
3114
3115static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003116bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003117{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003118 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003119
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003120 if (!PyBytes_Check(seq)) {
3121 PyErr_BadInternalCall();
3122 return NULL;
3123 }
3124 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3125 if (it == NULL)
3126 return NULL;
3127 it->it_index = 0;
3128 Py_INCREF(seq);
3129 it->it_seq = (PyBytesObject *)seq;
3130 _PyObject_GC_TRACK(it);
3131 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003132}