blob: b60a8b06c336cbe6d87ef8bd506976ba012e52c9 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroud1188562010-06-09 16:38:55 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
44 For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
45 parameter `size' denotes number of characters to allocate, not counting any
46 null terminating character.
47
48 For PyBytes_FromString(), the parameter `str' points to a null-terminated
49 string containing exactly `size' bytes.
50
51 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
52 either NULL or else points to a string containing at least `size' bytes.
53 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
54 not have to be null-terminated. (Therefore it is safe to construct a
55 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
56 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
57 bytes (setting the last byte to the null terminating character) and you can
58 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000059 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000060 alter the data yourself, since the strings may be shared.
61
62 The PyObject member `op->ob_size', which denotes the number of "extra
63 items" in a variable-size object, will contain the number of bytes
64 allocated for string data, not counting the null terminating character. It
65 is therefore equal to the equal to the `size' parameter (for
66 PyBytes_FromStringAndSize()) or the length of the string in the `str'
67 parameter (for PyBytes_FromString()).
68*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000069PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000071{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 register PyBytesObject *op;
73 if (size < 0) {
74 PyErr_SetString(PyExc_SystemError,
75 "Negative size passed to PyBytes_FromStringAndSize");
76 return NULL;
77 }
78 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000079#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000081#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85 if (size == 1 && str != NULL &&
86 (op = characters[*str & UCHAR_MAX]) != NULL)
87 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000088#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 Py_INCREF(op);
92 return (PyObject *)op;
93 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
96 PyErr_SetString(PyExc_OverflowError,
97 "byte string is too large");
98 return NULL;
99 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +0000100
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101 /* Inline PyObject_NewVar */
102 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
103 if (op == NULL)
104 return PyErr_NoMemory();
105 PyObject_INIT_VAR(op, &PyBytes_Type, size);
106 op->ob_shash = -1;
107 if (str != NULL)
108 Py_MEMCPY(op->ob_sval, str, size);
109 op->ob_sval[size] = '\0';
110 /* share short strings */
111 if (size == 0) {
112 nullstring = op;
113 Py_INCREF(op);
114 } else if (size == 1 && str != NULL) {
115 characters[*str & UCHAR_MAX] = op;
116 Py_INCREF(op);
117 }
118 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000119}
120
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000121PyObject *
122PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000123{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000124 register size_t size;
125 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000127 assert(str != NULL);
128 size = strlen(str);
129 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
130 PyErr_SetString(PyExc_OverflowError,
131 "byte string is too long");
132 return NULL;
133 }
134 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000135#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000136 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000137#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
141 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000144#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 /* Inline PyObject_NewVar */
150 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
151 if (op == NULL)
152 return PyErr_NoMemory();
153 PyObject_INIT_VAR(op, &PyBytes_Type, size);
154 op->ob_shash = -1;
155 Py_MEMCPY(op->ob_sval, str, size+1);
156 /* share short strings */
157 if (size == 0) {
158 nullstring = op;
159 Py_INCREF(op);
160 } else if (size == 1) {
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000165}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000166
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167PyObject *
168PyBytes_FromFormatV(const char *format, va_list vargs)
169{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000175
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000176 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 /* step 1: figure out how large a buffer we need */
178 for (f = format; *f; f++) {
179 if (*f == '%') {
180 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000181 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000183
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
185 * they don't affect the amount of space we reserve.
186 */
187 if ((*f == 'l' || *f == 'z') &&
188 (f[1] == 'd' || f[1] == 'u'))
189 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 switch (*f) {
192 case 'c':
193 (void)va_arg(count, int);
194 /* fall through... */
195 case '%':
196 n++;
197 break;
198 case 'd': case 'u': case 'i': case 'x':
199 (void) va_arg(count, int);
200 /* 20 bytes is enough to hold a 64-bit
201 integer. Decimal takes the most space.
202 This isn't enough for octal. */
203 n += 20;
204 break;
205 case 's':
206 s = va_arg(count, char*);
207 n += strlen(s);
208 break;
209 case 'p':
210 (void) va_arg(count, int);
211 /* maximum 64-bit pointer representation:
212 * 0xffffffffffffffff
213 * so 19 characters is enough.
214 * XXX I count 18 -- what's the extra for?
215 */
216 n += 19;
217 break;
218 default:
219 /* if we stumble upon an unknown
220 formatting code, copy the rest of
221 the format string to the output
222 string. (we cannot just skip the
223 code, since there's no way to know
224 what's in the argument list) */
225 n += strlen(p);
226 goto expand;
227 }
228 } else
229 n++;
230 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000231 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000232 /* step 2: fill the buffer */
233 /* Since we've analyzed how much space we need for the worst case,
234 use sprintf directly instead of the slower PyOS_snprintf. */
235 string = PyBytes_FromStringAndSize(NULL, n);
236 if (!string)
237 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000238
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000239 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000240
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000241 for (f = format; *f; f++) {
242 if (*f == '%') {
243 const char* p = f++;
244 Py_ssize_t i;
245 int longflag = 0;
246 int size_tflag = 0;
247 /* parse the width.precision part (we're only
248 interested in the precision value, if any) */
249 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000250 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000251 n = (n*10) + *f++ - '0';
252 if (*f == '.') {
253 f++;
254 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000255 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 n = (n*10) + *f++ - '0';
257 }
David Malcolm96960882010-11-05 17:23:41 +0000258 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000259 f++;
260 /* handle the long flag, but only for %ld and %lu.
261 others can be added when necessary. */
262 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
263 longflag = 1;
264 ++f;
265 }
266 /* handle the size_t flag. */
267 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
268 size_tflag = 1;
269 ++f;
270 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000271
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 switch (*f) {
273 case 'c':
274 *s++ = va_arg(vargs, int);
275 break;
276 case 'd':
277 if (longflag)
278 sprintf(s, "%ld", va_arg(vargs, long));
279 else if (size_tflag)
280 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
281 va_arg(vargs, Py_ssize_t));
282 else
283 sprintf(s, "%d", va_arg(vargs, int));
284 s += strlen(s);
285 break;
286 case 'u':
287 if (longflag)
288 sprintf(s, "%lu",
289 va_arg(vargs, unsigned long));
290 else if (size_tflag)
291 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
292 va_arg(vargs, size_t));
293 else
294 sprintf(s, "%u",
295 va_arg(vargs, unsigned int));
296 s += strlen(s);
297 break;
298 case 'i':
299 sprintf(s, "%i", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 'x':
303 sprintf(s, "%x", va_arg(vargs, int));
304 s += strlen(s);
305 break;
306 case 's':
307 p = va_arg(vargs, char*);
308 i = strlen(p);
309 if (n > 0 && i > n)
310 i = n;
311 Py_MEMCPY(s, p, i);
312 s += i;
313 break;
314 case 'p':
315 sprintf(s, "%p", va_arg(vargs, void*));
316 /* %p is ill-defined: ensure leading 0x. */
317 if (s[1] == 'X')
318 s[1] = 'x';
319 else if (s[1] != 'x') {
320 memmove(s+2, s, strlen(s)+1);
321 s[0] = '0';
322 s[1] = 'x';
323 }
324 s += strlen(s);
325 break;
326 case '%':
327 *s++ = '%';
328 break;
329 default:
330 strcpy(s, p);
331 s += strlen(s);
332 goto end;
333 }
334 } else
335 *s++ = *f;
336 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000337
338 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000339 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
340 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000341}
342
343PyObject *
344PyBytes_FromFormat(const char *format, ...)
345{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 PyObject* ret;
347 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000348
349#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000351#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000352 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000353#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000354 ret = PyBytes_FromFormatV(format, vargs);
355 va_end(vargs);
356 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000357}
358
359static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000360bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000361{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000362 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000363}
364
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000365/* Unescape a backslash-escaped string. If unicode is non-zero,
366 the string is a u-literal. If recode_encoding is non-zero,
367 the string is UTF-8 encoded and should be re-encoded in the
368 specified encoding. */
369
370PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 Py_ssize_t len,
372 const char *errors,
373 Py_ssize_t unicode,
374 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000375{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000376 int c;
377 char *p, *buf;
378 const char *end;
379 PyObject *v;
380 Py_ssize_t newlen = recode_encoding ? 4*len:len;
381 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
382 if (v == NULL)
383 return NULL;
384 p = buf = PyBytes_AsString(v);
385 end = s + len;
386 while (s < end) {
387 if (*s != '\\') {
388 non_esc:
389 if (recode_encoding && (*s & 0x80)) {
390 PyObject *u, *w;
391 char *r;
392 const char* t;
393 Py_ssize_t rn;
394 t = s;
395 /* Decode non-ASCII bytes as UTF-8. */
396 while (t < end && (*t & 0x80)) t++;
397 u = PyUnicode_DecodeUTF8(s, t - s, errors);
398 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000399
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000400 /* Recode them in target encoding. */
401 w = PyUnicode_AsEncodedString(
402 u, recode_encoding, errors);
403 Py_DECREF(u);
404 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000405
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000406 /* Append bytes to output buffer. */
407 assert(PyBytes_Check(w));
408 r = PyBytes_AS_STRING(w);
409 rn = PyBytes_GET_SIZE(w);
410 Py_MEMCPY(p, r, rn);
411 p += rn;
412 Py_DECREF(w);
413 s = t;
414 } else {
415 *p++ = *s++;
416 }
417 continue;
418 }
419 s++;
420 if (s==end) {
421 PyErr_SetString(PyExc_ValueError,
422 "Trailing \\ in string");
423 goto failed;
424 }
425 switch (*s++) {
426 /* XXX This assumes ASCII! */
427 case '\n': break;
428 case '\\': *p++ = '\\'; break;
429 case '\'': *p++ = '\''; break;
430 case '\"': *p++ = '\"'; break;
431 case 'b': *p++ = '\b'; break;
432 case 'f': *p++ = '\014'; break; /* FF */
433 case 't': *p++ = '\t'; break;
434 case 'n': *p++ = '\n'; break;
435 case 'r': *p++ = '\r'; break;
436 case 'v': *p++ = '\013'; break; /* VT */
437 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
438 case '0': case '1': case '2': case '3':
439 case '4': case '5': case '6': case '7':
440 c = s[-1] - '0';
441 if (s < end && '0' <= *s && *s <= '7') {
442 c = (c<<3) + *s++ - '0';
443 if (s < end && '0' <= *s && *s <= '7')
444 c = (c<<3) + *s++ - '0';
445 }
446 *p++ = c;
447 break;
448 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000449 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 unsigned int x = 0;
451 c = Py_CHARMASK(*s);
452 s++;
David Malcolm96960882010-11-05 17:23:41 +0000453 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000454 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000455 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000456 x = 10 + c - 'a';
457 else
458 x = 10 + c - 'A';
459 x = x << 4;
460 c = Py_CHARMASK(*s);
461 s++;
David Malcolm96960882010-11-05 17:23:41 +0000462 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000463 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000464 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000465 x += 10 + c - 'a';
466 else
467 x += 10 + c - 'A';
468 *p++ = x;
469 break;
470 }
471 if (!errors || strcmp(errors, "strict") == 0) {
472 PyErr_SetString(PyExc_ValueError,
473 "invalid \\x escape");
474 goto failed;
475 }
476 if (strcmp(errors, "replace") == 0) {
477 *p++ = '?';
478 } else if (strcmp(errors, "ignore") == 0)
479 /* do nothing */;
480 else {
481 PyErr_Format(PyExc_ValueError,
482 "decoding error; unknown "
483 "error handling code: %.400s",
484 errors);
485 goto failed;
486 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +0200487 /* skip \x */
488 if (s < end && Py_ISXDIGIT(s[0]))
489 s++; /* and a hexdigit */
490 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000491 default:
492 *p++ = '\\';
493 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200494 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000495 UTF-8 bytes may follow. */
496 }
497 }
498 if (p-buf < newlen)
499 _PyBytes_Resize(&v, p - buf);
500 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000501 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000502 Py_DECREF(v);
503 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000504}
505
506/* -------------------------------------------------------------------- */
507/* object api */
508
509Py_ssize_t
510PyBytes_Size(register PyObject *op)
511{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000512 if (!PyBytes_Check(op)) {
513 PyErr_Format(PyExc_TypeError,
514 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
515 return -1;
516 }
517 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000518}
519
520char *
521PyBytes_AsString(register PyObject *op)
522{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000523 if (!PyBytes_Check(op)) {
524 PyErr_Format(PyExc_TypeError,
525 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
526 return NULL;
527 }
528 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000529}
530
531int
532PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000533 register char **s,
534 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000535{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000536 if (s == NULL) {
537 PyErr_BadInternalCall();
538 return -1;
539 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000540
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000541 if (!PyBytes_Check(obj)) {
542 PyErr_Format(PyExc_TypeError,
543 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
544 return -1;
545 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000546
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000547 *s = PyBytes_AS_STRING(obj);
548 if (len != NULL)
549 *len = PyBytes_GET_SIZE(obj);
550 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
551 PyErr_SetString(PyExc_TypeError,
552 "expected bytes with no null");
553 return -1;
554 }
555 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000556}
Neal Norwitz6968b052007-02-27 19:02:19 +0000557
558/* -------------------------------------------------------------------- */
559/* Methods */
560
Eric Smith0923d1d2009-04-16 20:16:10 +0000561#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000562
563#include "stringlib/fastsearch.h"
564#include "stringlib/count.h"
565#include "stringlib/find.h"
566#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000567#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000568#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000569
Eric Smith0f78bff2009-11-30 01:01:42 +0000570#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000571
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000572PyObject *
573PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000574{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000575 static const char *hexdigits = "0123456789abcdef";
576 register PyBytesObject* op = (PyBytesObject*) obj;
577 Py_ssize_t length = Py_SIZE(op);
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000578 size_t newsize;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000579 PyObject *v;
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000580 if (length > (PY_SSIZE_T_MAX - 3) / 4) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000581 PyErr_SetString(PyExc_OverflowError,
582 "bytes object is too large to make repr");
583 return NULL;
584 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000585 newsize = 3 + 4 * length;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000586 v = PyUnicode_FromUnicode(NULL, newsize);
587 if (v == NULL) {
588 return NULL;
589 }
590 else {
591 register Py_ssize_t i;
592 register Py_UNICODE c;
593 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
594 int quote;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000595
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000596 /* Figure out which quote to use; single is preferred */
597 quote = '\'';
598 if (smartquotes) {
599 char *test, *start;
600 start = PyBytes_AS_STRING(op);
601 for (test = start; test < start+length; ++test) {
602 if (*test == '"') {
603 quote = '\''; /* back to single */
604 goto decided;
605 }
606 else if (*test == '\'')
607 quote = '"';
608 }
609 decided:
610 ;
611 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000612
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000613 *p++ = 'b', *p++ = quote;
614 for (i = 0; i < length; i++) {
615 /* There's at least enough room for a hex escape
616 and a closing quote. */
617 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
618 c = op->ob_sval[i];
619 if (c == quote || c == '\\')
620 *p++ = '\\', *p++ = c;
621 else if (c == '\t')
622 *p++ = '\\', *p++ = 't';
623 else if (c == '\n')
624 *p++ = '\\', *p++ = 'n';
625 else if (c == '\r')
626 *p++ = '\\', *p++ = 'r';
627 else if (c < ' ' || c >= 0x7f) {
628 *p++ = '\\';
629 *p++ = 'x';
630 *p++ = hexdigits[(c & 0xf0) >> 4];
631 *p++ = hexdigits[c & 0xf];
632 }
633 else
634 *p++ = c;
635 }
636 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
637 *p++ = quote;
638 *p = '\0';
639 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
640 Py_DECREF(v);
641 return NULL;
642 }
643 return v;
644 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000645}
646
Neal Norwitz6968b052007-02-27 19:02:19 +0000647static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000648bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000649{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000650 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000651}
652
Neal Norwitz6968b052007-02-27 19:02:19 +0000653static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000654bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000655{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000656 if (Py_BytesWarningFlag) {
657 if (PyErr_WarnEx(PyExc_BytesWarning,
658 "str() on a bytes instance", 1))
659 return NULL;
660 }
661 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000662}
663
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000664static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000665bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000666{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000667 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000668}
Neal Norwitz6968b052007-02-27 19:02:19 +0000669
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000670/* This is also used by PyBytes_Concat() */
671static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000672bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000673{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000674 Py_ssize_t size;
675 Py_buffer va, vb;
676 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000677
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000678 va.len = -1;
679 vb.len = -1;
680 if (_getbuffer(a, &va) < 0 ||
681 _getbuffer(b, &vb) < 0) {
682 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
683 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
684 goto done;
685 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000686
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000687 /* Optimize end cases */
688 if (va.len == 0 && PyBytes_CheckExact(b)) {
689 result = b;
690 Py_INCREF(result);
691 goto done;
692 }
693 if (vb.len == 0 && PyBytes_CheckExact(a)) {
694 result = a;
695 Py_INCREF(result);
696 goto done;
697 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000699 size = va.len + vb.len;
700 if (size < 0) {
701 PyErr_NoMemory();
702 goto done;
703 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000704
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000705 result = PyBytes_FromStringAndSize(NULL, size);
706 if (result != NULL) {
707 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
708 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
709 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000710
711 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000712 if (va.len != -1)
713 PyBuffer_Release(&va);
714 if (vb.len != -1)
715 PyBuffer_Release(&vb);
716 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000717}
Neal Norwitz6968b052007-02-27 19:02:19 +0000718
719static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000720bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000721{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000722 register Py_ssize_t i;
723 register Py_ssize_t j;
724 register Py_ssize_t size;
725 register PyBytesObject *op;
726 size_t nbytes;
727 if (n < 0)
728 n = 0;
729 /* watch out for overflows: the size can overflow int,
730 * and the # of bytes needed can overflow size_t
731 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000732 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000733 PyErr_SetString(PyExc_OverflowError,
734 "repeated bytes are too long");
735 return NULL;
736 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000737 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000738 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
739 Py_INCREF(a);
740 return (PyObject *)a;
741 }
742 nbytes = (size_t)size;
743 if (nbytes + PyBytesObject_SIZE <= nbytes) {
744 PyErr_SetString(PyExc_OverflowError,
745 "repeated bytes are too long");
746 return NULL;
747 }
748 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
749 if (op == NULL)
750 return PyErr_NoMemory();
751 PyObject_INIT_VAR(op, &PyBytes_Type, size);
752 op->ob_shash = -1;
753 op->ob_sval[size] = '\0';
754 if (Py_SIZE(a) == 1 && n > 0) {
755 memset(op->ob_sval, a->ob_sval[0] , n);
756 return (PyObject *) op;
757 }
758 i = 0;
759 if (i < size) {
760 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
761 i = Py_SIZE(a);
762 }
763 while (i < size) {
764 j = (i <= size-i) ? i : size-i;
765 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
766 i += j;
767 }
768 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000769}
770
Guido van Rossum98297ee2007-11-06 21:34:58 +0000771static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000772bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000773{
774 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
775 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000776 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000777 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000778 PyErr_Clear();
779 if (_getbuffer(arg, &varg) < 0)
780 return -1;
781 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
782 varg.buf, varg.len, 0);
783 PyBuffer_Release(&varg);
784 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000785 }
786 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000787 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
788 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000789 }
790
Antoine Pitrou0010d372010-08-15 17:12:55 +0000791 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000792}
793
Neal Norwitz6968b052007-02-27 19:02:19 +0000794static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000795bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000796{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000797 if (i < 0 || i >= Py_SIZE(a)) {
798 PyErr_SetString(PyExc_IndexError, "index out of range");
799 return NULL;
800 }
801 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000802}
803
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000804static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000805bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000806{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000807 int c;
808 Py_ssize_t len_a, len_b;
809 Py_ssize_t min_len;
810 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000811
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000812 /* Make sure both arguments are strings. */
813 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
814 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
815 (PyObject_IsInstance((PyObject*)a,
816 (PyObject*)&PyUnicode_Type) ||
817 PyObject_IsInstance((PyObject*)b,
818 (PyObject*)&PyUnicode_Type))) {
819 if (PyErr_WarnEx(PyExc_BytesWarning,
820 "Comparison between bytes and string", 1))
821 return NULL;
822 }
823 result = Py_NotImplemented;
824 goto out;
825 }
826 if (a == b) {
827 switch (op) {
828 case Py_EQ:case Py_LE:case Py_GE:
829 result = Py_True;
830 goto out;
831 case Py_NE:case Py_LT:case Py_GT:
832 result = Py_False;
833 goto out;
834 }
835 }
836 if (op == Py_EQ) {
837 /* Supporting Py_NE here as well does not save
838 much time, since Py_NE is rarely used. */
839 if (Py_SIZE(a) == Py_SIZE(b)
840 && (a->ob_sval[0] == b->ob_sval[0]
841 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
842 result = Py_True;
843 } else {
844 result = Py_False;
845 }
846 goto out;
847 }
848 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
849 min_len = (len_a < len_b) ? len_a : len_b;
850 if (min_len > 0) {
851 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
852 if (c==0)
853 c = memcmp(a->ob_sval, b->ob_sval, min_len);
854 } else
855 c = 0;
856 if (c == 0)
857 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
858 switch (op) {
859 case Py_LT: c = c < 0; break;
860 case Py_LE: c = c <= 0; break;
861 case Py_EQ: assert(0); break; /* unreachable */
862 case Py_NE: c = c != 0; break;
863 case Py_GT: c = c > 0; break;
864 case Py_GE: c = c >= 0; break;
865 default:
866 result = Py_NotImplemented;
867 goto out;
868 }
869 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000870 out:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000871 Py_INCREF(result);
872 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000873}
874
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000875static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000876bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000877{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000878 register Py_ssize_t len;
879 register unsigned char *p;
Gregory P. Smith27cbcd62012-12-10 18:15:46 -0800880 register Py_uhash_t x; /* Unsigned for defined overflow behavior. */
Neal Norwitz6968b052007-02-27 19:02:19 +0000881
Benjamin Petersonf6622c82012-04-09 14:53:07 -0400882#ifdef Py_DEBUG
Benjamin Peterson69e97272012-02-21 11:08:50 -0500883 assert(_Py_HashSecret_Initialized);
Benjamin Petersonf6622c82012-04-09 14:53:07 -0400884#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000885 if (a->ob_shash != -1)
886 return a->ob_shash;
887 len = Py_SIZE(a);
Georg Brandl2daf6ae2012-02-20 19:54:16 +0100888 /*
889 We make the hash of the empty string be 0, rather than using
890 (prefix ^ suffix), since this slightly obfuscates the hash secret
891 */
892 if (len == 0) {
893 a->ob_shash = 0;
894 return 0;
895 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000896 p = (unsigned char *) a->ob_sval;
Georg Brandl2daf6ae2012-02-20 19:54:16 +0100897 x = _Py_HashSecret.prefix;
898 x ^= *p << 7;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000899 while (--len >= 0)
Gregory P. Smith63e6c322012-01-14 15:31:34 -0800900 x = (_PyHASH_MULTIPLIER*x) ^ *p++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000901 x ^= Py_SIZE(a);
Georg Brandl2daf6ae2012-02-20 19:54:16 +0100902 x ^= _Py_HashSecret.suffix;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000903 if (x == -1)
904 x = -2;
905 a->ob_shash = x;
906 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000907}
908
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000909static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000910bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000911{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000912 if (PyIndex_Check(item)) {
913 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
914 if (i == -1 && PyErr_Occurred())
915 return NULL;
916 if (i < 0)
917 i += PyBytes_GET_SIZE(self);
918 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
919 PyErr_SetString(PyExc_IndexError,
920 "index out of range");
921 return NULL;
922 }
923 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
924 }
925 else if (PySlice_Check(item)) {
926 Py_ssize_t start, stop, step, slicelength, cur, i;
927 char* source_buf;
928 char* result_buf;
929 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000930
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000931 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000932 PyBytes_GET_SIZE(self),
933 &start, &stop, &step, &slicelength) < 0) {
934 return NULL;
935 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000936
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000937 if (slicelength <= 0) {
938 return PyBytes_FromStringAndSize("", 0);
939 }
940 else if (start == 0 && step == 1 &&
941 slicelength == PyBytes_GET_SIZE(self) &&
942 PyBytes_CheckExact(self)) {
943 Py_INCREF(self);
944 return (PyObject *)self;
945 }
946 else if (step == 1) {
947 return PyBytes_FromStringAndSize(
948 PyBytes_AS_STRING(self) + start,
949 slicelength);
950 }
951 else {
952 source_buf = PyBytes_AS_STRING(self);
953 result = PyBytes_FromStringAndSize(NULL, slicelength);
954 if (result == NULL)
955 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000956
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000957 result_buf = PyBytes_AS_STRING(result);
958 for (cur = start, i = 0; i < slicelength;
959 cur += step, i++) {
960 result_buf[i] = source_buf[cur];
961 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000962
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000963 return result;
964 }
965 }
966 else {
967 PyErr_Format(PyExc_TypeError,
968 "byte indices must be integers, not %.200s",
969 Py_TYPE(item)->tp_name);
970 return NULL;
971 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000972}
973
974static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000975bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000976{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000977 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
978 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000979}
980
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000981static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000982 (lenfunc)bytes_length, /*sq_length*/
983 (binaryfunc)bytes_concat, /*sq_concat*/
984 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
985 (ssizeargfunc)bytes_item, /*sq_item*/
986 0, /*sq_slice*/
987 0, /*sq_ass_item*/
988 0, /*sq_ass_slice*/
989 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000990};
991
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000992static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000993 (lenfunc)bytes_length,
994 (binaryfunc)bytes_subscript,
995 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000996};
997
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000998static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000999 (getbufferproc)bytes_buffer_getbuffer,
1000 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001001};
1002
1003
1004#define LEFTSTRIP 0
1005#define RIGHTSTRIP 1
1006#define BOTHSTRIP 2
1007
1008/* Arrays indexed by above */
1009static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1010
1011#define STRIPNAME(i) (stripformat[i]+3)
1012
Neal Norwitz6968b052007-02-27 19:02:19 +00001013PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001014"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001015\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001016Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001017If sep is not specified or is None, B is split on ASCII whitespace\n\
1018characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001019If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001020
1021static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001022bytes_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001023{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001024 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1025 Py_ssize_t maxsplit = -1;
1026 const char *s = PyBytes_AS_STRING(self), *sub;
1027 Py_buffer vsub;
1028 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001029
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001030 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1031 return NULL;
1032 if (maxsplit < 0)
1033 maxsplit = PY_SSIZE_T_MAX;
1034 if (subobj == Py_None)
1035 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1036 if (_getbuffer(subobj, &vsub) < 0)
1037 return NULL;
1038 sub = vsub.buf;
1039 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001040
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001041 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1042 PyBuffer_Release(&vsub);
1043 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001044}
1045
Neal Norwitz6968b052007-02-27 19:02:19 +00001046PyDoc_STRVAR(partition__doc__,
1047"B.partition(sep) -> (head, sep, tail)\n\
1048\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001049Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001050the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001051found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001052
1053static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001054bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001055{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001056 const char *sep;
1057 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001058
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001059 if (PyBytes_Check(sep_obj)) {
1060 sep = PyBytes_AS_STRING(sep_obj);
1061 sep_len = PyBytes_GET_SIZE(sep_obj);
1062 }
1063 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1064 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001065
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001066 return stringlib_partition(
1067 (PyObject*) self,
1068 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1069 sep_obj, sep, sep_len
1070 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001071}
1072
1073PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001074"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001075\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001076Search for the separator sep in B, starting at the end of B,\n\
1077and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001078part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001079bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001080
1081static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001082bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001083{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001084 const char *sep;
1085 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001086
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001087 if (PyBytes_Check(sep_obj)) {
1088 sep = PyBytes_AS_STRING(sep_obj);
1089 sep_len = PyBytes_GET_SIZE(sep_obj);
1090 }
1091 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1092 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001093
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001094 return stringlib_rpartition(
1095 (PyObject*) self,
1096 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1097 sep_obj, sep, sep_len
1098 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001099}
1100
Neal Norwitz6968b052007-02-27 19:02:19 +00001101PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001102"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001103\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001104Return a list of the sections in B, using sep as the delimiter,\n\
1105starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001106If sep is not given, B is split on ASCII whitespace characters\n\
1107(space, tab, return, newline, formfeed, vertical tab).\n\
1108If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001109
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001110
Neal Norwitz6968b052007-02-27 19:02:19 +00001111static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001112bytes_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001113{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001114 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1115 Py_ssize_t maxsplit = -1;
1116 const char *s = PyBytes_AS_STRING(self), *sub;
1117 Py_buffer vsub;
1118 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001119
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001120 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1121 return NULL;
1122 if (maxsplit < 0)
1123 maxsplit = PY_SSIZE_T_MAX;
1124 if (subobj == Py_None)
1125 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1126 if (_getbuffer(subobj, &vsub) < 0)
1127 return NULL;
1128 sub = vsub.buf;
1129 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001130
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001131 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1132 PyBuffer_Release(&vsub);
1133 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001134}
1135
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001136
1137PyDoc_STRVAR(join__doc__,
1138"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001139\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001140Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001141Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1142
Neal Norwitz6968b052007-02-27 19:02:19 +00001143static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001144bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001145{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001146 char *sep = PyBytes_AS_STRING(self);
1147 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1148 PyObject *res = NULL;
1149 char *p;
1150 Py_ssize_t seqlen = 0;
1151 size_t sz = 0;
1152 Py_ssize_t i;
1153 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001154
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001155 seq = PySequence_Fast(orig, "");
1156 if (seq == NULL) {
1157 return NULL;
1158 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001160 seqlen = PySequence_Size(seq);
1161 if (seqlen == 0) {
1162 Py_DECREF(seq);
1163 return PyBytes_FromString("");
1164 }
1165 if (seqlen == 1) {
1166 item = PySequence_Fast_GET_ITEM(seq, 0);
1167 if (PyBytes_CheckExact(item)) {
1168 Py_INCREF(item);
1169 Py_DECREF(seq);
1170 return item;
1171 }
1172 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001173
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001174 /* There are at least two things to join, or else we have a subclass
1175 * of the builtin types in the sequence.
1176 * Do a pre-pass to figure out the total amount of space we'll
1177 * need (sz), and see whether all argument are bytes.
1178 */
1179 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1180 for (i = 0; i < seqlen; i++) {
1181 const size_t old_sz = sz;
1182 item = PySequence_Fast_GET_ITEM(seq, i);
1183 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1184 PyErr_Format(PyExc_TypeError,
1185 "sequence item %zd: expected bytes,"
1186 " %.80s found",
1187 i, Py_TYPE(item)->tp_name);
1188 Py_DECREF(seq);
1189 return NULL;
1190 }
1191 sz += Py_SIZE(item);
1192 if (i != 0)
1193 sz += seplen;
1194 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1195 PyErr_SetString(PyExc_OverflowError,
1196 "join() result is too long for bytes");
1197 Py_DECREF(seq);
1198 return NULL;
1199 }
1200 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001201
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001202 /* Allocate result space. */
1203 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1204 if (res == NULL) {
1205 Py_DECREF(seq);
1206 return NULL;
1207 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001208
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001209 /* Catenate everything. */
1210 /* I'm not worried about a PyByteArray item growing because there's
1211 nowhere in this function where we release the GIL. */
1212 p = PyBytes_AS_STRING(res);
1213 for (i = 0; i < seqlen; ++i) {
1214 size_t n;
1215 char *q;
1216 if (i) {
1217 Py_MEMCPY(p, sep, seplen);
1218 p += seplen;
1219 }
1220 item = PySequence_Fast_GET_ITEM(seq, i);
1221 n = Py_SIZE(item);
1222 if (PyBytes_Check(item))
1223 q = PyBytes_AS_STRING(item);
1224 else
1225 q = PyByteArray_AS_STRING(item);
1226 Py_MEMCPY(p, q, n);
1227 p += n;
1228 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001229
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001230 Py_DECREF(seq);
1231 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001232}
1233
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001234PyObject *
1235_PyBytes_Join(PyObject *sep, PyObject *x)
1236{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001237 assert(sep != NULL && PyBytes_Check(sep));
1238 assert(x != NULL);
1239 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001240}
1241
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001242/* helper macro to fixup start/end slice values */
1243#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001244 if (end > len) \
1245 end = len; \
1246 else if (end < 0) { \
1247 end += len; \
1248 if (end < 0) \
1249 end = 0; \
1250 } \
1251 if (start < 0) { \
1252 start += len; \
1253 if (start < 0) \
1254 start = 0; \
1255 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001256
1257Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001258bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001259{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001260 PyObject *subobj;
1261 const char *sub;
1262 Py_ssize_t sub_len;
1263 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001264
Jesus Ceaac451502011-04-20 17:09:23 +02001265 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1266 args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001267 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001268
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001269 if (PyBytes_Check(subobj)) {
1270 sub = PyBytes_AS_STRING(subobj);
1271 sub_len = PyBytes_GET_SIZE(subobj);
1272 }
1273 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1274 /* XXX - the "expected a character buffer object" is pretty
1275 confusing for a non-expert. remap to something else ? */
1276 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001277
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001278 if (dir > 0)
1279 return stringlib_find_slice(
1280 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1281 sub, sub_len, start, end);
1282 else
1283 return stringlib_rfind_slice(
1284 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1285 sub, sub_len, start, end);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001286}
1287
1288
1289PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001290"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001291\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001292Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001293such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001294arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001295\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001296Return -1 on failure.");
1297
Neal Norwitz6968b052007-02-27 19:02:19 +00001298static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001299bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001300{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001301 Py_ssize_t result = bytes_find_internal(self, args, +1);
1302 if (result == -2)
1303 return NULL;
1304 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001305}
1306
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001307
1308PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001309"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001310\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001311Like B.find() but raise ValueError when the substring is not found.");
1312
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001313static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001314bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001315{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001316 Py_ssize_t result = bytes_find_internal(self, args, +1);
1317 if (result == -2)
1318 return NULL;
1319 if (result == -1) {
1320 PyErr_SetString(PyExc_ValueError,
1321 "substring not found");
1322 return NULL;
1323 }
1324 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001325}
1326
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001327
1328PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001329"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001330\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001331Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001332such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001333arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001334\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001335Return -1 on failure.");
1336
Neal Norwitz6968b052007-02-27 19:02:19 +00001337static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001338bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001339{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001340 Py_ssize_t result = bytes_find_internal(self, args, -1);
1341 if (result == -2)
1342 return NULL;
1343 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001344}
1345
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001346
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001347PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001348"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001349\n\
1350Like B.rfind() but raise ValueError when the substring is not found.");
1351
1352static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001353bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001354{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001355 Py_ssize_t result = bytes_find_internal(self, args, -1);
1356 if (result == -2)
1357 return NULL;
1358 if (result == -1) {
1359 PyErr_SetString(PyExc_ValueError,
1360 "substring not found");
1361 return NULL;
1362 }
1363 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001364}
1365
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001366
1367Py_LOCAL_INLINE(PyObject *)
1368do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001369{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 Py_buffer vsep;
1371 char *s = PyBytes_AS_STRING(self);
1372 Py_ssize_t len = PyBytes_GET_SIZE(self);
1373 char *sep;
1374 Py_ssize_t seplen;
1375 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001376
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001377 if (_getbuffer(sepobj, &vsep) < 0)
1378 return NULL;
1379 sep = vsep.buf;
1380 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001381
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001382 i = 0;
1383 if (striptype != RIGHTSTRIP) {
1384 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1385 i++;
1386 }
1387 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001388
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001389 j = len;
1390 if (striptype != LEFTSTRIP) {
1391 do {
1392 j--;
1393 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1394 j++;
1395 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001396
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001397 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001398
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001399 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1400 Py_INCREF(self);
1401 return (PyObject*)self;
1402 }
1403 else
1404 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001405}
1406
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001407
1408Py_LOCAL_INLINE(PyObject *)
1409do_strip(PyBytesObject *self, int striptype)
1410{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001411 char *s = PyBytes_AS_STRING(self);
1412 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001413
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001414 i = 0;
1415 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001416 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 i++;
1418 }
1419 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001420
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001421 j = len;
1422 if (striptype != LEFTSTRIP) {
1423 do {
1424 j--;
David Malcolm96960882010-11-05 17:23:41 +00001425 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001426 j++;
1427 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001428
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001429 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1430 Py_INCREF(self);
1431 return (PyObject*)self;
1432 }
1433 else
1434 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001435}
1436
1437
1438Py_LOCAL_INLINE(PyObject *)
1439do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1440{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001441 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001442
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001443 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1444 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001445
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001446 if (sep != NULL && sep != Py_None) {
1447 return do_xstrip(self, striptype, sep);
1448 }
1449 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001450}
1451
1452
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001453PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001454"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001455\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001456Strip leading and trailing bytes contained in the argument.\n\
Georg Brandlbeca27a2012-01-22 21:31:21 +01001457If the argument is omitted, strip leading and trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001458static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001459bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001460{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001461 if (PyTuple_GET_SIZE(args) == 0)
1462 return do_strip(self, BOTHSTRIP); /* Common case */
1463 else
1464 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001465}
1466
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001467
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001468PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001469"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001470\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001471Strip leading bytes contained in the argument.\n\
1472If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001473static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001474bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001475{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001476 if (PyTuple_GET_SIZE(args) == 0)
1477 return do_strip(self, LEFTSTRIP); /* Common case */
1478 else
1479 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001480}
1481
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001482
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001483PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001484"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001485\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001486Strip trailing bytes contained in the argument.\n\
1487If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001488static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001489bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001490{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001491 if (PyTuple_GET_SIZE(args) == 0)
1492 return do_strip(self, RIGHTSTRIP); /* Common case */
1493 else
1494 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001495}
Neal Norwitz6968b052007-02-27 19:02:19 +00001496
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001497
1498PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001499"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001500\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001501Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001502string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001503as in slice notation.");
1504
1505static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001506bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001507{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001508 PyObject *sub_obj;
1509 const char *str = PyBytes_AS_STRING(self), *sub;
1510 Py_ssize_t sub_len;
1511 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001512
Jesus Ceaac451502011-04-20 17:09:23 +02001513 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001514 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001515
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001516 if (PyBytes_Check(sub_obj)) {
1517 sub = PyBytes_AS_STRING(sub_obj);
1518 sub_len = PyBytes_GET_SIZE(sub_obj);
1519 }
1520 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1521 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001522
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001523 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001524
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001525 return PyLong_FromSsize_t(
1526 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1527 );
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001528}
1529
1530
1531PyDoc_STRVAR(translate__doc__,
1532"B.translate(table[, deletechars]) -> bytes\n\
1533\n\
1534Return a copy of B, where all characters occurring in the\n\
1535optional argument deletechars are removed, and the remaining\n\
1536characters have been mapped through the given translation\n\
1537table, which must be a bytes object of length 256.");
1538
1539static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001540bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001541{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001542 register char *input, *output;
1543 const char *table;
1544 register Py_ssize_t i, c, changed = 0;
1545 PyObject *input_obj = (PyObject*)self;
1546 const char *output_start, *del_table=NULL;
1547 Py_ssize_t inlen, tablen, dellen = 0;
1548 PyObject *result;
1549 int trans_table[256];
1550 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001551
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001552 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1553 &tableobj, &delobj))
1554 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001555
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001556 if (PyBytes_Check(tableobj)) {
1557 table = PyBytes_AS_STRING(tableobj);
1558 tablen = PyBytes_GET_SIZE(tableobj);
1559 }
1560 else if (tableobj == Py_None) {
1561 table = NULL;
1562 tablen = 256;
1563 }
1564 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1565 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001566
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001567 if (tablen != 256) {
1568 PyErr_SetString(PyExc_ValueError,
1569 "translation table must be 256 characters long");
1570 return NULL;
1571 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001572
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001573 if (delobj != NULL) {
1574 if (PyBytes_Check(delobj)) {
1575 del_table = PyBytes_AS_STRING(delobj);
1576 dellen = PyBytes_GET_SIZE(delobj);
1577 }
1578 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1579 return NULL;
1580 }
1581 else {
1582 del_table = NULL;
1583 dellen = 0;
1584 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001585
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001586 inlen = PyBytes_GET_SIZE(input_obj);
1587 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1588 if (result == NULL)
1589 return NULL;
1590 output_start = output = PyBytes_AsString(result);
1591 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001592
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001593 if (dellen == 0 && table != NULL) {
1594 /* If no deletions are required, use faster code */
1595 for (i = inlen; --i >= 0; ) {
1596 c = Py_CHARMASK(*input++);
1597 if (Py_CHARMASK((*output++ = table[c])) != c)
1598 changed = 1;
1599 }
1600 if (changed || !PyBytes_CheckExact(input_obj))
1601 return result;
1602 Py_DECREF(result);
1603 Py_INCREF(input_obj);
1604 return input_obj;
1605 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001606
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001607 if (table == NULL) {
1608 for (i = 0; i < 256; i++)
1609 trans_table[i] = Py_CHARMASK(i);
1610 } else {
1611 for (i = 0; i < 256; i++)
1612 trans_table[i] = Py_CHARMASK(table[i]);
1613 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001614
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001615 for (i = 0; i < dellen; i++)
1616 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001617
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001618 for (i = inlen; --i >= 0; ) {
1619 c = Py_CHARMASK(*input++);
1620 if (trans_table[c] != -1)
1621 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1622 continue;
1623 changed = 1;
1624 }
1625 if (!changed && PyBytes_CheckExact(input_obj)) {
1626 Py_DECREF(result);
1627 Py_INCREF(input_obj);
1628 return input_obj;
1629 }
1630 /* Fix the size of the resulting string */
1631 if (inlen > 0)
1632 _PyBytes_Resize(&result, output - output_start);
1633 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001634}
1635
1636
Georg Brandlabc38772009-04-12 15:51:51 +00001637static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001638bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001639{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001640 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001641}
1642
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001643/* find and count characters and substrings */
1644
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001645#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001646 ((char *)memchr((const void *)(target), c, target_len))
1647
1648/* String ops must return a string. */
1649/* If the object is subclass of string, create a copy */
1650Py_LOCAL(PyBytesObject *)
1651return_self(PyBytesObject *self)
1652{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001653 if (PyBytes_CheckExact(self)) {
1654 Py_INCREF(self);
1655 return self;
1656 }
1657 return (PyBytesObject *)PyBytes_FromStringAndSize(
1658 PyBytes_AS_STRING(self),
1659 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001660}
1661
1662Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001663countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001664{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001665 Py_ssize_t count=0;
1666 const char *start=target;
1667 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001668
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001669 while ( (start=findchar(start, end-start, c)) != NULL ) {
1670 count++;
1671 if (count >= maxcount)
1672 break;
1673 start += 1;
1674 }
1675 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001676}
1677
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001678
1679/* Algorithms for different cases of string replacement */
1680
1681/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1682Py_LOCAL(PyBytesObject *)
1683replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001684 const char *to_s, Py_ssize_t to_len,
1685 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001686{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001687 char *self_s, *result_s;
1688 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001689 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001690 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001691
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001692 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001693
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001694 /* 1 at the end plus 1 after every character;
1695 count = min(maxcount, self_len + 1) */
1696 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001697 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001698 else
1699 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1700 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001701
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001702 /* Check for overflow */
1703 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001704 assert(count > 0);
1705 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001706 PyErr_SetString(PyExc_OverflowError,
1707 "replacement bytes are too long");
1708 return NULL;
1709 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001710 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001711
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001712 if (! (result = (PyBytesObject *)
1713 PyBytes_FromStringAndSize(NULL, result_len)) )
1714 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001715
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001716 self_s = PyBytes_AS_STRING(self);
1717 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001718
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001719 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001720
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001721 /* Lay the first one down (guaranteed this will occur) */
1722 Py_MEMCPY(result_s, to_s, to_len);
1723 result_s += to_len;
1724 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001725
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001726 for (i=0; i<count; i++) {
1727 *result_s++ = *self_s++;
1728 Py_MEMCPY(result_s, to_s, to_len);
1729 result_s += to_len;
1730 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001731
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001732 /* Copy the rest of the original string */
1733 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001734
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001735 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001736}
1737
1738/* Special case for deleting a single character */
1739/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1740Py_LOCAL(PyBytesObject *)
1741replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001742 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001743{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001744 char *self_s, *result_s;
1745 char *start, *next, *end;
1746 Py_ssize_t self_len, result_len;
1747 Py_ssize_t count;
1748 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001749
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001750 self_len = PyBytes_GET_SIZE(self);
1751 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001752
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001753 count = countchar(self_s, self_len, from_c, maxcount);
1754 if (count == 0) {
1755 return return_self(self);
1756 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001757
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001758 result_len = self_len - count; /* from_len == 1 */
1759 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001760
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001761 if ( (result = (PyBytesObject *)
1762 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1763 return NULL;
1764 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001765
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001766 start = self_s;
1767 end = self_s + self_len;
1768 while (count-- > 0) {
1769 next = findchar(start, end-start, from_c);
1770 if (next == NULL)
1771 break;
1772 Py_MEMCPY(result_s, start, next-start);
1773 result_s += (next-start);
1774 start = next+1;
1775 }
1776 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001777
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001778 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001779}
1780
1781/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1782
1783Py_LOCAL(PyBytesObject *)
1784replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001785 const char *from_s, Py_ssize_t from_len,
1786 Py_ssize_t maxcount) {
1787 char *self_s, *result_s;
1788 char *start, *next, *end;
1789 Py_ssize_t self_len, result_len;
1790 Py_ssize_t count, offset;
1791 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001792
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001793 self_len = PyBytes_GET_SIZE(self);
1794 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001795
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001796 count = stringlib_count(self_s, self_len,
1797 from_s, from_len,
1798 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001799
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001800 if (count == 0) {
1801 /* no matches */
1802 return return_self(self);
1803 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001804
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001805 result_len = self_len - (count * from_len);
1806 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001807
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001808 if ( (result = (PyBytesObject *)
1809 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1810 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001811
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001812 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001813
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001814 start = self_s;
1815 end = self_s + self_len;
1816 while (count-- > 0) {
1817 offset = stringlib_find(start, end-start,
1818 from_s, from_len,
1819 0);
1820 if (offset == -1)
1821 break;
1822 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001823
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001824 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001825
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001826 result_s += (next-start);
1827 start = next+from_len;
1828 }
1829 Py_MEMCPY(result_s, start, end-start);
1830 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001831}
1832
1833/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1834Py_LOCAL(PyBytesObject *)
1835replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001836 char from_c, char to_c,
1837 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001838{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001839 char *self_s, *result_s, *start, *end, *next;
1840 Py_ssize_t self_len;
1841 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001842
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001843 /* The result string will be the same size */
1844 self_s = PyBytes_AS_STRING(self);
1845 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001846
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001847 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001848
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001849 if (next == NULL) {
1850 /* No matches; return the original string */
1851 return return_self(self);
1852 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001853
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001854 /* Need to make a new string */
1855 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1856 if (result == NULL)
1857 return NULL;
1858 result_s = PyBytes_AS_STRING(result);
1859 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001860
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001861 /* change everything in-place, starting with this one */
1862 start = result_s + (next-self_s);
1863 *start = to_c;
1864 start++;
1865 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001866
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001867 while (--maxcount > 0) {
1868 next = findchar(start, end-start, from_c);
1869 if (next == NULL)
1870 break;
1871 *next = to_c;
1872 start = next+1;
1873 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001874
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001875 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001876}
1877
1878/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1879Py_LOCAL(PyBytesObject *)
1880replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001881 const char *from_s, Py_ssize_t from_len,
1882 const char *to_s, Py_ssize_t to_len,
1883 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001884{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001885 char *result_s, *start, *end;
1886 char *self_s;
1887 Py_ssize_t self_len, offset;
1888 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001889
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001890 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001891
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001892 self_s = PyBytes_AS_STRING(self);
1893 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001894
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001895 offset = stringlib_find(self_s, self_len,
1896 from_s, from_len,
1897 0);
1898 if (offset == -1) {
1899 /* No matches; return the original string */
1900 return return_self(self);
1901 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001902
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001903 /* Need to make a new string */
1904 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1905 if (result == NULL)
1906 return NULL;
1907 result_s = PyBytes_AS_STRING(result);
1908 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001909
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001910 /* change everything in-place, starting with this one */
1911 start = result_s + offset;
1912 Py_MEMCPY(start, to_s, from_len);
1913 start += from_len;
1914 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001915
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001916 while ( --maxcount > 0) {
1917 offset = stringlib_find(start, end-start,
1918 from_s, from_len,
1919 0);
1920 if (offset==-1)
1921 break;
1922 Py_MEMCPY(start+offset, to_s, from_len);
1923 start += offset+from_len;
1924 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001925
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001926 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001927}
1928
1929/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1930Py_LOCAL(PyBytesObject *)
1931replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001932 char from_c,
1933 const char *to_s, Py_ssize_t to_len,
1934 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001935{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001936 char *self_s, *result_s;
1937 char *start, *next, *end;
1938 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001939 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001940 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001941
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001942 self_s = PyBytes_AS_STRING(self);
1943 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001944
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001945 count = countchar(self_s, self_len, from_c, maxcount);
1946 if (count == 0) {
1947 /* no matches, return unchanged */
1948 return return_self(self);
1949 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001950
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001951 /* use the difference between current and new, hence the "-1" */
1952 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001953 assert(count > 0);
1954 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001955 PyErr_SetString(PyExc_OverflowError,
1956 "replacement bytes are too long");
1957 return NULL;
1958 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001959 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001960
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001961 if ( (result = (PyBytesObject *)
1962 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1963 return NULL;
1964 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001965
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001966 start = self_s;
1967 end = self_s + self_len;
1968 while (count-- > 0) {
1969 next = findchar(start, end-start, from_c);
1970 if (next == NULL)
1971 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001972
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001973 if (next == start) {
1974 /* replace with the 'to' */
1975 Py_MEMCPY(result_s, to_s, to_len);
1976 result_s += to_len;
1977 start += 1;
1978 } else {
1979 /* copy the unchanged old then the 'to' */
1980 Py_MEMCPY(result_s, start, next-start);
1981 result_s += (next-start);
1982 Py_MEMCPY(result_s, to_s, to_len);
1983 result_s += to_len;
1984 start = next+1;
1985 }
1986 }
1987 /* Copy the remainder of the remaining string */
1988 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001989
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001990 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001991}
1992
1993/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1994Py_LOCAL(PyBytesObject *)
1995replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001996 const char *from_s, Py_ssize_t from_len,
1997 const char *to_s, Py_ssize_t to_len,
1998 Py_ssize_t maxcount) {
1999 char *self_s, *result_s;
2000 char *start, *next, *end;
2001 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002002 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002003 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002004
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002005 self_s = PyBytes_AS_STRING(self);
2006 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002007
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002008 count = stringlib_count(self_s, self_len,
2009 from_s, from_len,
2010 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002011
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002012 if (count == 0) {
2013 /* no matches, return unchanged */
2014 return return_self(self);
2015 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002016
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002017 /* Check for overflow */
2018 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002019 assert(count > 0);
2020 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002021 PyErr_SetString(PyExc_OverflowError,
2022 "replacement bytes are too long");
2023 return NULL;
2024 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002025 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002026
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002027 if ( (result = (PyBytesObject *)
2028 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2029 return NULL;
2030 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002031
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002032 start = self_s;
2033 end = self_s + self_len;
2034 while (count-- > 0) {
2035 offset = stringlib_find(start, end-start,
2036 from_s, from_len,
2037 0);
2038 if (offset == -1)
2039 break;
2040 next = start+offset;
2041 if (next == start) {
2042 /* replace with the 'to' */
2043 Py_MEMCPY(result_s, to_s, to_len);
2044 result_s += to_len;
2045 start += from_len;
2046 } else {
2047 /* copy the unchanged old then the 'to' */
2048 Py_MEMCPY(result_s, start, next-start);
2049 result_s += (next-start);
2050 Py_MEMCPY(result_s, to_s, to_len);
2051 result_s += to_len;
2052 start = next+from_len;
2053 }
2054 }
2055 /* Copy the remainder of the remaining string */
2056 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002057
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002058 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002059}
2060
2061
2062Py_LOCAL(PyBytesObject *)
2063replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002064 const char *from_s, Py_ssize_t from_len,
2065 const char *to_s, Py_ssize_t to_len,
2066 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002067{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002068 if (maxcount < 0) {
2069 maxcount = PY_SSIZE_T_MAX;
2070 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2071 /* nothing to do; return the original string */
2072 return return_self(self);
2073 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002074
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002075 if (maxcount == 0 ||
2076 (from_len == 0 && to_len == 0)) {
2077 /* nothing to do; return the original string */
2078 return return_self(self);
2079 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002080
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002081 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002082
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002083 if (from_len == 0) {
2084 /* insert the 'to' string everywhere. */
2085 /* >>> "Python".replace("", ".") */
2086 /* '.P.y.t.h.o.n.' */
2087 return replace_interleave(self, to_s, to_len, maxcount);
2088 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002089
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002090 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2091 /* point for an empty self string to generate a non-empty string */
2092 /* Special case so the remaining code always gets a non-empty string */
2093 if (PyBytes_GET_SIZE(self) == 0) {
2094 return return_self(self);
2095 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002097 if (to_len == 0) {
2098 /* delete all occurrences of 'from' string */
2099 if (from_len == 1) {
2100 return replace_delete_single_character(
2101 self, from_s[0], maxcount);
2102 } else {
2103 return replace_delete_substring(self, from_s,
2104 from_len, maxcount);
2105 }
2106 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002107
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002108 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002109
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002110 if (from_len == to_len) {
2111 if (from_len == 1) {
2112 return replace_single_character_in_place(
2113 self,
2114 from_s[0],
2115 to_s[0],
2116 maxcount);
2117 } else {
2118 return replace_substring_in_place(
2119 self, from_s, from_len, to_s, to_len,
2120 maxcount);
2121 }
2122 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002123
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002124 /* Otherwise use the more generic algorithms */
2125 if (from_len == 1) {
2126 return replace_single_character(self, from_s[0],
2127 to_s, to_len, maxcount);
2128 } else {
2129 /* len('from')>=2, len('to')>=1 */
2130 return replace_substring(self, from_s, from_len, to_s, to_len,
2131 maxcount);
2132 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002133}
2134
2135PyDoc_STRVAR(replace__doc__,
2136"B.replace(old, new[, count]) -> bytes\n\
2137\n\
2138Return a copy of B with all occurrences of subsection\n\
2139old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002140given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002141
2142static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002143bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002144{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002145 Py_ssize_t count = -1;
2146 PyObject *from, *to;
2147 const char *from_s, *to_s;
2148 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002149
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002150 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2151 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002152
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002153 if (PyBytes_Check(from)) {
2154 from_s = PyBytes_AS_STRING(from);
2155 from_len = PyBytes_GET_SIZE(from);
2156 }
2157 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2158 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002160 if (PyBytes_Check(to)) {
2161 to_s = PyBytes_AS_STRING(to);
2162 to_len = PyBytes_GET_SIZE(to);
2163 }
2164 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2165 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002166
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002167 return (PyObject *)replace((PyBytesObject *) self,
2168 from_s, from_len,
2169 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002170}
2171
2172/** End DALKE **/
2173
2174/* Matches the end (direction >= 0) or start (direction < 0) of self
2175 * against substr, using the start and end arguments. Returns
2176 * -1 on error, 0 if not found and 1 if found.
2177 */
2178Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002179_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002180 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002181{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002182 Py_ssize_t len = PyBytes_GET_SIZE(self);
2183 Py_ssize_t slen;
2184 const char* sub;
2185 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002187 if (PyBytes_Check(substr)) {
2188 sub = PyBytes_AS_STRING(substr);
2189 slen = PyBytes_GET_SIZE(substr);
2190 }
2191 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2192 return -1;
2193 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002194
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002195 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002196
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002197 if (direction < 0) {
2198 /* startswith */
2199 if (start+slen > len)
2200 return 0;
2201 } else {
2202 /* endswith */
2203 if (end-start < slen || start > len)
2204 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002205
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002206 if (end-slen > start)
2207 start = end - slen;
2208 }
2209 if (end-start >= slen)
2210 return ! memcmp(str+start, sub, slen);
2211 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002212}
2213
2214
2215PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002216"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002217\n\
2218Return True if B starts with the specified prefix, False otherwise.\n\
2219With optional start, test B beginning at that position.\n\
2220With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002221prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002222
2223static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002224bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002225{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002226 Py_ssize_t start = 0;
2227 Py_ssize_t end = PY_SSIZE_T_MAX;
2228 PyObject *subobj;
2229 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002230
Jesus Ceaac451502011-04-20 17:09:23 +02002231 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002232 return NULL;
2233 if (PyTuple_Check(subobj)) {
2234 Py_ssize_t i;
2235 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2236 result = _bytes_tailmatch(self,
2237 PyTuple_GET_ITEM(subobj, i),
2238 start, end, -1);
2239 if (result == -1)
2240 return NULL;
2241 else if (result) {
2242 Py_RETURN_TRUE;
2243 }
2244 }
2245 Py_RETURN_FALSE;
2246 }
2247 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002248 if (result == -1) {
2249 if (PyErr_ExceptionMatches(PyExc_TypeError))
2250 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2251 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002252 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002253 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002254 else
2255 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002256}
2257
2258
2259PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002260"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002261\n\
2262Return True if B ends with the specified suffix, False otherwise.\n\
2263With optional start, test B beginning at that position.\n\
2264With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002265suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002266
2267static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002268bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002269{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002270 Py_ssize_t start = 0;
2271 Py_ssize_t end = PY_SSIZE_T_MAX;
2272 PyObject *subobj;
2273 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002274
Jesus Ceaac451502011-04-20 17:09:23 +02002275 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002276 return NULL;
2277 if (PyTuple_Check(subobj)) {
2278 Py_ssize_t i;
2279 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2280 result = _bytes_tailmatch(self,
2281 PyTuple_GET_ITEM(subobj, i),
2282 start, end, +1);
2283 if (result == -1)
2284 return NULL;
2285 else if (result) {
2286 Py_RETURN_TRUE;
2287 }
2288 }
2289 Py_RETURN_FALSE;
2290 }
2291 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002292 if (result == -1) {
2293 if (PyErr_ExceptionMatches(PyExc_TypeError))
2294 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2295 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002296 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002297 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002298 else
2299 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002300}
2301
2302
2303PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002304"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002305\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002306Decode B using the codec registered for encoding. Default encoding\n\
2307is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002308handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2309a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002310as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002311able to handle UnicodeDecodeErrors.");
2312
2313static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002314bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002315{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002316 const char *encoding = NULL;
2317 const char *errors = NULL;
2318 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002319
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002320 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2321 return NULL;
2322 if (encoding == NULL)
2323 encoding = PyUnicode_GetDefaultEncoding();
2324 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002325}
2326
Guido van Rossum20188312006-05-05 15:15:40 +00002327
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002328PyDoc_STRVAR(splitlines__doc__,
2329"B.splitlines([keepends]) -> list of lines\n\
2330\n\
2331Return a list of the lines in B, breaking at line boundaries.\n\
2332Line breaks are not included in the resulting list unless keepends\n\
2333is given and true.");
2334
2335static PyObject*
2336bytes_splitlines(PyObject *self, PyObject *args)
2337{
2338 int keepends = 0;
2339
2340 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002341 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002342
2343 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002344 (PyObject*) self, PyBytes_AS_STRING(self),
2345 PyBytes_GET_SIZE(self), keepends
2346 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002347}
2348
2349
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002350PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002351"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002352\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002353Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002354Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002355Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002356
2357static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002358hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002359{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002360 if (c >= 128)
2361 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002362 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002363 return c - '0';
2364 else {
David Malcolm96960882010-11-05 17:23:41 +00002365 if (Py_ISUPPER(c))
2366 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002367 if (c >= 'a' && c <= 'f')
2368 return c - 'a' + 10;
2369 }
2370 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002371}
2372
2373static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002374bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002375{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002376 PyObject *newstring, *hexobj;
2377 char *buf;
2378 Py_UNICODE *hex;
2379 Py_ssize_t hexlen, byteslen, i, j;
2380 int top, bot;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002381
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002382 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2383 return NULL;
2384 assert(PyUnicode_Check(hexobj));
2385 hexlen = PyUnicode_GET_SIZE(hexobj);
2386 hex = PyUnicode_AS_UNICODE(hexobj);
2387 byteslen = hexlen/2; /* This overestimates if there are spaces */
2388 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2389 if (!newstring)
2390 return NULL;
2391 buf = PyBytes_AS_STRING(newstring);
2392 for (i = j = 0; i < hexlen; i += 2) {
2393 /* skip over spaces in the input */
2394 while (hex[i] == ' ')
2395 i++;
2396 if (i >= hexlen)
2397 break;
2398 top = hex_digit_to_int(hex[i]);
2399 bot = hex_digit_to_int(hex[i+1]);
2400 if (top == -1 || bot == -1) {
2401 PyErr_Format(PyExc_ValueError,
2402 "non-hexadecimal number found in "
2403 "fromhex() arg at position %zd", i);
2404 goto error;
2405 }
2406 buf[j++] = (top << 4) + bot;
2407 }
2408 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2409 goto error;
2410 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002411
2412 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002413 Py_XDECREF(newstring);
2414 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002415}
2416
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002417PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002418"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002419
2420static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002421bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002422{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002423 Py_ssize_t res;
2424 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2425 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002426}
2427
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002428
2429static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002430bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002431{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002432 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002433}
2434
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002435
2436static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002437bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002438 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2439 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2440 _Py_capitalize__doc__},
2441 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2442 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2443 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2444 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2445 endswith__doc__},
2446 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2447 expandtabs__doc__},
2448 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2449 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2450 fromhex_doc},
2451 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2452 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2453 _Py_isalnum__doc__},
2454 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2455 _Py_isalpha__doc__},
2456 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2457 _Py_isdigit__doc__},
2458 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2459 _Py_islower__doc__},
2460 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2461 _Py_isspace__doc__},
2462 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2463 _Py_istitle__doc__},
2464 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2465 _Py_isupper__doc__},
2466 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2467 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2468 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2469 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2470 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2471 _Py_maketrans__doc__},
2472 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2473 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2474 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2475 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2476 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2477 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2478 rpartition__doc__},
2479 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2480 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
2481 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2482 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS,
2483 splitlines__doc__},
2484 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2485 startswith__doc__},
2486 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2487 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2488 _Py_swapcase__doc__},
2489 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2490 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2491 translate__doc__},
2492 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2493 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2494 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2495 sizeof__doc__},
2496 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002497};
2498
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002499static PyObject *
2500str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2501
2502static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002503bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002504{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002505 PyObject *x = NULL;
2506 const char *encoding = NULL;
2507 const char *errors = NULL;
2508 PyObject *new = NULL;
2509 Py_ssize_t size;
2510 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002511
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002512 if (type != &PyBytes_Type)
2513 return str_subtype_new(type, args, kwds);
2514 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2515 &encoding, &errors))
2516 return NULL;
2517 if (x == NULL) {
2518 if (encoding != NULL || errors != NULL) {
2519 PyErr_SetString(PyExc_TypeError,
2520 "encoding or errors without sequence "
2521 "argument");
2522 return NULL;
2523 }
2524 return PyBytes_FromString("");
2525 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002526
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002527 if (PyUnicode_Check(x)) {
2528 /* Encode via the codec registry */
2529 if (encoding == NULL) {
2530 PyErr_SetString(PyExc_TypeError,
2531 "string argument without an encoding");
2532 return NULL;
2533 }
2534 new = PyUnicode_AsEncodedString(x, encoding, errors);
2535 if (new == NULL)
2536 return NULL;
2537 assert(PyBytes_Check(new));
2538 return new;
2539 }
2540 /* Is it an integer? */
2541 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2542 if (size == -1 && PyErr_Occurred()) {
2543 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2544 return NULL;
2545 PyErr_Clear();
2546 }
2547 else if (size < 0) {
2548 PyErr_SetString(PyExc_ValueError, "negative count");
2549 return NULL;
2550 }
2551 else {
2552 new = PyBytes_FromStringAndSize(NULL, size);
2553 if (new == NULL) {
2554 return NULL;
2555 }
2556 if (size > 0) {
2557 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2558 }
2559 return new;
2560 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002561
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002562 /* If it's not unicode, there can't be encoding or errors */
2563 if (encoding != NULL || errors != NULL) {
2564 PyErr_SetString(PyExc_TypeError,
2565 "encoding or errors without a string argument");
2566 return NULL;
2567 }
2568 return PyObject_Bytes(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002569}
2570
2571PyObject *
2572PyBytes_FromObject(PyObject *x)
2573{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002574 PyObject *new, *it;
2575 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002576
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002577 if (x == NULL) {
2578 PyErr_BadInternalCall();
2579 return NULL;
2580 }
2581 /* Use the modern buffer interface */
2582 if (PyObject_CheckBuffer(x)) {
2583 Py_buffer view;
2584 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2585 return NULL;
2586 new = PyBytes_FromStringAndSize(NULL, view.len);
2587 if (!new)
2588 goto fail;
2589 /* XXX(brett.cannon): Better way to get to internal buffer? */
2590 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2591 &view, view.len, 'C') < 0)
2592 goto fail;
2593 PyBuffer_Release(&view);
2594 return new;
2595 fail:
2596 Py_XDECREF(new);
2597 PyBuffer_Release(&view);
2598 return NULL;
2599 }
2600 if (PyUnicode_Check(x)) {
2601 PyErr_SetString(PyExc_TypeError,
2602 "cannot convert unicode object to bytes");
2603 return NULL;
2604 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002605
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002606 if (PyList_CheckExact(x)) {
2607 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2608 if (new == NULL)
2609 return NULL;
2610 for (i = 0; i < Py_SIZE(x); i++) {
2611 Py_ssize_t value = PyNumber_AsSsize_t(
2612 PyList_GET_ITEM(x, i), PyExc_ValueError);
2613 if (value == -1 && PyErr_Occurred()) {
2614 Py_DECREF(new);
2615 return NULL;
2616 }
2617 if (value < 0 || value >= 256) {
2618 PyErr_SetString(PyExc_ValueError,
2619 "bytes must be in range(0, 256)");
2620 Py_DECREF(new);
2621 return NULL;
2622 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002623 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002624 }
2625 return new;
2626 }
2627 if (PyTuple_CheckExact(x)) {
2628 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2629 if (new == NULL)
2630 return NULL;
2631 for (i = 0; i < Py_SIZE(x); i++) {
2632 Py_ssize_t value = PyNumber_AsSsize_t(
2633 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2634 if (value == -1 && PyErr_Occurred()) {
2635 Py_DECREF(new);
2636 return NULL;
2637 }
2638 if (value < 0 || value >= 256) {
2639 PyErr_SetString(PyExc_ValueError,
2640 "bytes must be in range(0, 256)");
2641 Py_DECREF(new);
2642 return NULL;
2643 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002644 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002645 }
2646 return new;
2647 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002648
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002649 /* For iterator version, create a string object and resize as needed */
2650 size = _PyObject_LengthHint(x, 64);
2651 if (size == -1 && PyErr_Occurred())
2652 return NULL;
2653 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2654 returning a shared empty bytes string. This required because we
2655 want to call _PyBytes_Resize() the returned object, which we can
2656 only do on bytes objects with refcount == 1. */
2657 size += 1;
2658 new = PyBytes_FromStringAndSize(NULL, size);
2659 if (new == NULL)
2660 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002661
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002662 /* Get the iterator */
2663 it = PyObject_GetIter(x);
2664 if (it == NULL)
2665 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002666
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002667 /* Run the iterator to exhaustion */
2668 for (i = 0; ; i++) {
2669 PyObject *item;
2670 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002672 /* Get the next item */
2673 item = PyIter_Next(it);
2674 if (item == NULL) {
2675 if (PyErr_Occurred())
2676 goto error;
2677 break;
2678 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002679
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002680 /* Interpret it as an int (__index__) */
2681 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2682 Py_DECREF(item);
2683 if (value == -1 && PyErr_Occurred())
2684 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002685
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002686 /* Range check */
2687 if (value < 0 || value >= 256) {
2688 PyErr_SetString(PyExc_ValueError,
2689 "bytes must be in range(0, 256)");
2690 goto error;
2691 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002692
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002693 /* Append the byte */
2694 if (i >= size) {
2695 size = 2 * size + 1;
2696 if (_PyBytes_Resize(&new, size) < 0)
2697 goto error;
2698 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002699 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002700 }
2701 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002702
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002703 /* Clean up and return success */
2704 Py_DECREF(it);
2705 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002706
2707 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002708 /* Error handling when new != NULL */
2709 Py_XDECREF(it);
2710 Py_DECREF(new);
2711 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002712}
2713
2714static PyObject *
2715str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2716{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002717 PyObject *tmp, *pnew;
2718 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002719
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002720 assert(PyType_IsSubtype(type, &PyBytes_Type));
2721 tmp = bytes_new(&PyBytes_Type, args, kwds);
2722 if (tmp == NULL)
2723 return NULL;
2724 assert(PyBytes_CheckExact(tmp));
2725 n = PyBytes_GET_SIZE(tmp);
2726 pnew = type->tp_alloc(type, n);
2727 if (pnew != NULL) {
2728 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2729 PyBytes_AS_STRING(tmp), n+1);
2730 ((PyBytesObject *)pnew)->ob_shash =
2731 ((PyBytesObject *)tmp)->ob_shash;
2732 }
2733 Py_DECREF(tmp);
2734 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002735}
2736
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002737PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002738"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002739bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002740bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002741bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2742bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002743\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002744Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002745 - an iterable yielding integers in range(256)\n\
2746 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002747 - any object implementing the buffer API.\n\
2748 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002749
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002750static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002751
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002752PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002753 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2754 "bytes",
2755 PyBytesObject_SIZE,
2756 sizeof(char),
2757 bytes_dealloc, /* tp_dealloc */
2758 0, /* tp_print */
2759 0, /* tp_getattr */
2760 0, /* tp_setattr */
2761 0, /* tp_reserved */
2762 (reprfunc)bytes_repr, /* tp_repr */
2763 0, /* tp_as_number */
2764 &bytes_as_sequence, /* tp_as_sequence */
2765 &bytes_as_mapping, /* tp_as_mapping */
2766 (hashfunc)bytes_hash, /* tp_hash */
2767 0, /* tp_call */
2768 bytes_str, /* tp_str */
2769 PyObject_GenericGetAttr, /* tp_getattro */
2770 0, /* tp_setattro */
2771 &bytes_as_buffer, /* tp_as_buffer */
2772 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2773 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2774 bytes_doc, /* tp_doc */
2775 0, /* tp_traverse */
2776 0, /* tp_clear */
2777 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2778 0, /* tp_weaklistoffset */
2779 bytes_iter, /* tp_iter */
2780 0, /* tp_iternext */
2781 bytes_methods, /* tp_methods */
2782 0, /* tp_members */
2783 0, /* tp_getset */
2784 &PyBaseObject_Type, /* tp_base */
2785 0, /* tp_dict */
2786 0, /* tp_descr_get */
2787 0, /* tp_descr_set */
2788 0, /* tp_dictoffset */
2789 0, /* tp_init */
2790 0, /* tp_alloc */
2791 bytes_new, /* tp_new */
2792 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002793};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002794
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002795void
2796PyBytes_Concat(register PyObject **pv, register PyObject *w)
2797{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002798 register PyObject *v;
2799 assert(pv != NULL);
2800 if (*pv == NULL)
2801 return;
2802 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002803 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002804 return;
2805 }
2806 v = bytes_concat(*pv, w);
2807 Py_DECREF(*pv);
2808 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002809}
2810
2811void
2812PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
2813{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002814 PyBytes_Concat(pv, w);
2815 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002816}
2817
2818
2819/* The following function breaks the notion that strings are immutable:
2820 it changes the size of a string. We get away with this only if there
2821 is only one module referencing the object. You can also think of it
2822 as creating a new string object and destroying the old one, only
2823 more efficiently. In any case, don't use this if the string may
2824 already be known to some other part of the code...
2825 Note that if there's not enough memory to resize the string, the original
2826 string object at *pv is deallocated, *pv is set to NULL, an "out of
2827 memory" exception is set, and -1 is returned. Else (on success) 0 is
2828 returned, and the value in *pv may or may not be the same as on input.
2829 As always, an extra byte is allocated for a trailing \0 byte (newsize
2830 does *not* include that), and a trailing \0 byte is stored.
2831*/
2832
2833int
2834_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2835{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002836 register PyObject *v;
2837 register PyBytesObject *sv;
2838 v = *pv;
2839 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2840 *pv = 0;
2841 Py_DECREF(v);
2842 PyErr_BadInternalCall();
2843 return -1;
2844 }
2845 /* XXX UNREF/NEWREF interface should be more symmetrical */
2846 _Py_DEC_REFTOTAL;
2847 _Py_ForgetReference(v);
2848 *pv = (PyObject *)
2849 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2850 if (*pv == NULL) {
2851 PyObject_Del(v);
2852 PyErr_NoMemory();
2853 return -1;
2854 }
2855 _Py_NewReference(*pv);
2856 sv = (PyBytesObject *) *pv;
2857 Py_SIZE(sv) = newsize;
2858 sv->ob_sval[newsize] = '\0';
2859 sv->ob_shash = -1; /* invalidate cached hash value */
2860 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002861}
2862
2863/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
2864 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2865 * Python's regular ints.
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002866 * Return value: a new PyBytes*, or NULL if error.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002867 * . *pbuf is set to point into it,
2868 * *plen set to the # of chars following that.
2869 * Caller must decref it when done using pbuf.
2870 * The string starting at *pbuf is of the form
2871 * "-"? ("0x" | "0X")? digit+
2872 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2873 * set in flags. The case of hex digits will be correct,
2874 * There will be at least prec digits, zero-filled on the left if
2875 * necessary to get that many.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002876 * val object to be converted
2877 * flags bitmask of format flags; only F_ALT is looked at
2878 * prec minimum number of digits; 0-fill on left if needed
2879 * type a character in [duoxX]; u acts the same as d
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002880 *
2881 * CAUTION: o, x and X conversions on regular ints can never
2882 * produce a '-' sign, but can for Python's unbounded ints.
2883 */
2884PyObject*
2885_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002886 char **pbuf, int *plen)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002887{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002888 PyObject *result = NULL;
2889 char *buf;
2890 Py_ssize_t i;
2891 int sign; /* 1 if '-', else 0 */
2892 int len; /* number of characters */
2893 Py_ssize_t llen;
2894 int numdigits; /* len == numnondigits + numdigits */
2895 int numnondigits = 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002896
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002897 /* Avoid exceeding SSIZE_T_MAX */
2898 if (prec > INT_MAX-3) {
2899 PyErr_SetString(PyExc_OverflowError,
2900 "precision too large");
2901 return NULL;
2902 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002903
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002904 switch (type) {
2905 case 'd':
2906 case 'u':
2907 /* Special-case boolean: we want 0/1 */
2908 if (PyBool_Check(val))
2909 result = PyNumber_ToBase(val, 10);
2910 else
2911 result = Py_TYPE(val)->tp_str(val);
2912 break;
2913 case 'o':
2914 numnondigits = 2;
2915 result = PyNumber_ToBase(val, 8);
2916 break;
2917 case 'x':
2918 case 'X':
2919 numnondigits = 2;
2920 result = PyNumber_ToBase(val, 16);
2921 break;
2922 default:
2923 assert(!"'type' not in [duoxX]");
2924 }
2925 if (!result)
2926 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002927
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002928 buf = _PyUnicode_AsString(result);
2929 if (!buf) {
2930 Py_DECREF(result);
2931 return NULL;
2932 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002933
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002934 /* To modify the string in-place, there can only be one reference. */
2935 if (Py_REFCNT(result) != 1) {
2936 PyErr_BadInternalCall();
2937 return NULL;
2938 }
2939 llen = PyUnicode_GetSize(result);
2940 if (llen > INT_MAX) {
2941 PyErr_SetString(PyExc_ValueError,
2942 "string too large in _PyBytes_FormatLong");
2943 return NULL;
2944 }
2945 len = (int)llen;
2946 if (buf[len-1] == 'L') {
2947 --len;
2948 buf[len] = '\0';
2949 }
2950 sign = buf[0] == '-';
2951 numnondigits += sign;
2952 numdigits = len - numnondigits;
2953 assert(numdigits > 0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002954
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002955 /* Get rid of base marker unless F_ALT */
2956 if (((flags & F_ALT) == 0 &&
2957 (type == 'o' || type == 'x' || type == 'X'))) {
2958 assert(buf[sign] == '0');
2959 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
2960 buf[sign+1] == 'o');
2961 numnondigits -= 2;
2962 buf += 2;
2963 len -= 2;
2964 if (sign)
2965 buf[0] = '-';
2966 assert(len == numnondigits + numdigits);
2967 assert(numdigits > 0);
2968 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002969
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002970 /* Fill with leading zeroes to meet minimum width. */
2971 if (prec > numdigits) {
2972 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
2973 numnondigits + prec);
2974 char *b1;
2975 if (!r1) {
2976 Py_DECREF(result);
2977 return NULL;
2978 }
2979 b1 = PyBytes_AS_STRING(r1);
2980 for (i = 0; i < numnondigits; ++i)
2981 *b1++ = *buf++;
2982 for (i = 0; i < prec - numdigits; i++)
2983 *b1++ = '0';
2984 for (i = 0; i < numdigits; i++)
2985 *b1++ = *buf++;
2986 *b1 = '\0';
2987 Py_DECREF(result);
2988 result = r1;
2989 buf = PyBytes_AS_STRING(result);
2990 len = numnondigits + prec;
2991 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002992
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002993 /* Fix up case for hex conversions. */
2994 if (type == 'X') {
2995 /* Need to convert all lower case letters to upper case.
2996 and need to convert 0x to 0X (and -0x to -0X). */
2997 for (i = 0; i < len; i++)
2998 if (buf[i] >= 'a' && buf[i] <= 'x')
2999 buf[i] -= 'a'-'A';
3000 }
3001 *pbuf = buf;
3002 *plen = len;
3003 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003004}
3005
3006void
3007PyBytes_Fini(void)
3008{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003009 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003010 for (i = 0; i < UCHAR_MAX + 1; i++)
3011 Py_CLEAR(characters[i]);
3012 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003013}
3014
Benjamin Peterson4116f362008-05-27 00:36:20 +00003015/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003016
3017typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003018 PyObject_HEAD
3019 Py_ssize_t it_index;
3020 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003021} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003022
3023static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003024striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003025{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003026 _PyObject_GC_UNTRACK(it);
3027 Py_XDECREF(it->it_seq);
3028 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003029}
3030
3031static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003032striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003033{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003034 Py_VISIT(it->it_seq);
3035 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003036}
3037
3038static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003039striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003040{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003041 PyBytesObject *seq;
3042 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003043
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003044 assert(it != NULL);
3045 seq = it->it_seq;
3046 if (seq == NULL)
3047 return NULL;
3048 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003049
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003050 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3051 item = PyLong_FromLong(
3052 (unsigned char)seq->ob_sval[it->it_index]);
3053 if (item != NULL)
3054 ++it->it_index;
3055 return item;
3056 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003057
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003058 Py_DECREF(seq);
3059 it->it_seq = NULL;
3060 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003061}
3062
3063static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003064striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003065{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003066 Py_ssize_t len = 0;
3067 if (it->it_seq)
3068 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3069 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003070}
3071
3072PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003073 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003074
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003075static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003076 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3077 length_hint_doc},
3078 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003079};
3080
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003081PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003082 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3083 "bytes_iterator", /* tp_name */
3084 sizeof(striterobject), /* tp_basicsize */
3085 0, /* tp_itemsize */
3086 /* methods */
3087 (destructor)striter_dealloc, /* tp_dealloc */
3088 0, /* tp_print */
3089 0, /* tp_getattr */
3090 0, /* tp_setattr */
3091 0, /* tp_reserved */
3092 0, /* tp_repr */
3093 0, /* tp_as_number */
3094 0, /* tp_as_sequence */
3095 0, /* tp_as_mapping */
3096 0, /* tp_hash */
3097 0, /* tp_call */
3098 0, /* tp_str */
3099 PyObject_GenericGetAttr, /* tp_getattro */
3100 0, /* tp_setattro */
3101 0, /* tp_as_buffer */
3102 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3103 0, /* tp_doc */
3104 (traverseproc)striter_traverse, /* tp_traverse */
3105 0, /* tp_clear */
3106 0, /* tp_richcompare */
3107 0, /* tp_weaklistoffset */
3108 PyObject_SelfIter, /* tp_iter */
3109 (iternextfunc)striter_next, /* tp_iternext */
3110 striter_methods, /* tp_methods */
3111 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003112};
3113
3114static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003115bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003116{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003117 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003118
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003119 if (!PyBytes_Check(seq)) {
3120 PyErr_BadInternalCall();
3121 return NULL;
3122 }
3123 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3124 if (it == NULL)
3125 return NULL;
3126 it->it_index = 0;
3127 Py_INCREF(seq);
3128 it->it_seq = (PyBytesObject *)seq;
3129 _PyObject_GC_TRACK(it);
3130 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003131}