blob: d63fabcc9ebb67abfc8174d78448e22d332f84f5 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroud1188562010-06-09 16:38:55 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
44 For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
45 parameter `size' denotes number of characters to allocate, not counting any
46 null terminating character.
47
48 For PyBytes_FromString(), the parameter `str' points to a null-terminated
49 string containing exactly `size' bytes.
50
51 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
52 either NULL or else points to a string containing at least `size' bytes.
53 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
54 not have to be null-terminated. (Therefore it is safe to construct a
55 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
56 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
57 bytes (setting the last byte to the null terminating character) and you can
58 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000059 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000060 alter the data yourself, since the strings may be shared.
61
62 The PyObject member `op->ob_size', which denotes the number of "extra
63 items" in a variable-size object, will contain the number of bytes
64 allocated for string data, not counting the null terminating character. It
65 is therefore equal to the equal to the `size' parameter (for
66 PyBytes_FromStringAndSize()) or the length of the string in the `str'
67 parameter (for PyBytes_FromString()).
68*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000069PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000071{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 register PyBytesObject *op;
73 if (size < 0) {
74 PyErr_SetString(PyExc_SystemError,
75 "Negative size passed to PyBytes_FromStringAndSize");
76 return NULL;
77 }
78 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000079#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000081#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85 if (size == 1 && str != NULL &&
86 (op = characters[*str & UCHAR_MAX]) != NULL)
87 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000088#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 Py_INCREF(op);
92 return (PyObject *)op;
93 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
96 PyErr_SetString(PyExc_OverflowError,
97 "byte string is too large");
98 return NULL;
99 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +0000100
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101 /* Inline PyObject_NewVar */
102 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
103 if (op == NULL)
104 return PyErr_NoMemory();
105 PyObject_INIT_VAR(op, &PyBytes_Type, size);
106 op->ob_shash = -1;
107 if (str != NULL)
108 Py_MEMCPY(op->ob_sval, str, size);
109 op->ob_sval[size] = '\0';
110 /* share short strings */
111 if (size == 0) {
112 nullstring = op;
113 Py_INCREF(op);
114 } else if (size == 1 && str != NULL) {
115 characters[*str & UCHAR_MAX] = op;
116 Py_INCREF(op);
117 }
118 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000119}
120
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000121PyObject *
122PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000123{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000124 register size_t size;
125 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000127 assert(str != NULL);
128 size = strlen(str);
129 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
130 PyErr_SetString(PyExc_OverflowError,
131 "byte string is too long");
132 return NULL;
133 }
134 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000135#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000136 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000137#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
141 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000144#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 /* Inline PyObject_NewVar */
150 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
151 if (op == NULL)
152 return PyErr_NoMemory();
153 PyObject_INIT_VAR(op, &PyBytes_Type, size);
154 op->ob_shash = -1;
155 Py_MEMCPY(op->ob_sval, str, size+1);
156 /* share short strings */
157 if (size == 0) {
158 nullstring = op;
159 Py_INCREF(op);
160 } else if (size == 1) {
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000165}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000166
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167PyObject *
168PyBytes_FromFormatV(const char *format, va_list vargs)
169{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000175
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000176 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 /* step 1: figure out how large a buffer we need */
178 for (f = format; *f; f++) {
179 if (*f == '%') {
180 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000181 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000183
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
185 * they don't affect the amount of space we reserve.
186 */
187 if ((*f == 'l' || *f == 'z') &&
188 (f[1] == 'd' || f[1] == 'u'))
189 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 switch (*f) {
192 case 'c':
193 (void)va_arg(count, int);
194 /* fall through... */
195 case '%':
196 n++;
197 break;
198 case 'd': case 'u': case 'i': case 'x':
199 (void) va_arg(count, int);
200 /* 20 bytes is enough to hold a 64-bit
201 integer. Decimal takes the most space.
202 This isn't enough for octal. */
203 n += 20;
204 break;
205 case 's':
206 s = va_arg(count, char*);
207 n += strlen(s);
208 break;
209 case 'p':
210 (void) va_arg(count, int);
211 /* maximum 64-bit pointer representation:
212 * 0xffffffffffffffff
213 * so 19 characters is enough.
214 * XXX I count 18 -- what's the extra for?
215 */
216 n += 19;
217 break;
218 default:
219 /* if we stumble upon an unknown
220 formatting code, copy the rest of
221 the format string to the output
222 string. (we cannot just skip the
223 code, since there's no way to know
224 what's in the argument list) */
225 n += strlen(p);
226 goto expand;
227 }
228 } else
229 n++;
230 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000231 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000232 /* step 2: fill the buffer */
233 /* Since we've analyzed how much space we need for the worst case,
234 use sprintf directly instead of the slower PyOS_snprintf. */
235 string = PyBytes_FromStringAndSize(NULL, n);
236 if (!string)
237 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000238
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000239 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000240
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000241 for (f = format; *f; f++) {
242 if (*f == '%') {
243 const char* p = f++;
244 Py_ssize_t i;
245 int longflag = 0;
246 int size_tflag = 0;
247 /* parse the width.precision part (we're only
248 interested in the precision value, if any) */
249 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000250 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000251 n = (n*10) + *f++ - '0';
252 if (*f == '.') {
253 f++;
254 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000255 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 n = (n*10) + *f++ - '0';
257 }
David Malcolm96960882010-11-05 17:23:41 +0000258 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000259 f++;
260 /* handle the long flag, but only for %ld and %lu.
261 others can be added when necessary. */
262 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
263 longflag = 1;
264 ++f;
265 }
266 /* handle the size_t flag. */
267 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
268 size_tflag = 1;
269 ++f;
270 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000271
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 switch (*f) {
273 case 'c':
274 *s++ = va_arg(vargs, int);
275 break;
276 case 'd':
277 if (longflag)
278 sprintf(s, "%ld", va_arg(vargs, long));
279 else if (size_tflag)
280 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
281 va_arg(vargs, Py_ssize_t));
282 else
283 sprintf(s, "%d", va_arg(vargs, int));
284 s += strlen(s);
285 break;
286 case 'u':
287 if (longflag)
288 sprintf(s, "%lu",
289 va_arg(vargs, unsigned long));
290 else if (size_tflag)
291 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
292 va_arg(vargs, size_t));
293 else
294 sprintf(s, "%u",
295 va_arg(vargs, unsigned int));
296 s += strlen(s);
297 break;
298 case 'i':
299 sprintf(s, "%i", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 'x':
303 sprintf(s, "%x", va_arg(vargs, int));
304 s += strlen(s);
305 break;
306 case 's':
307 p = va_arg(vargs, char*);
308 i = strlen(p);
309 if (n > 0 && i > n)
310 i = n;
311 Py_MEMCPY(s, p, i);
312 s += i;
313 break;
314 case 'p':
315 sprintf(s, "%p", va_arg(vargs, void*));
316 /* %p is ill-defined: ensure leading 0x. */
317 if (s[1] == 'X')
318 s[1] = 'x';
319 else if (s[1] != 'x') {
320 memmove(s+2, s, strlen(s)+1);
321 s[0] = '0';
322 s[1] = 'x';
323 }
324 s += strlen(s);
325 break;
326 case '%':
327 *s++ = '%';
328 break;
329 default:
330 strcpy(s, p);
331 s += strlen(s);
332 goto end;
333 }
334 } else
335 *s++ = *f;
336 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000337
338 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000339 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
340 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000341}
342
343PyObject *
344PyBytes_FromFormat(const char *format, ...)
345{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 PyObject* ret;
347 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000348
349#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000351#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000352 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000353#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000354 ret = PyBytes_FromFormatV(format, vargs);
355 va_end(vargs);
356 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000357}
358
359static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000360bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000361{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000362 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000363}
364
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000365/* Unescape a backslash-escaped string. If unicode is non-zero,
366 the string is a u-literal. If recode_encoding is non-zero,
367 the string is UTF-8 encoded and should be re-encoded in the
368 specified encoding. */
369
370PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 Py_ssize_t len,
372 const char *errors,
373 Py_ssize_t unicode,
374 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000375{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000376 int c;
377 char *p, *buf;
378 const char *end;
379 PyObject *v;
380 Py_ssize_t newlen = recode_encoding ? 4*len:len;
381 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
382 if (v == NULL)
383 return NULL;
384 p = buf = PyBytes_AsString(v);
385 end = s + len;
386 while (s < end) {
387 if (*s != '\\') {
388 non_esc:
389 if (recode_encoding && (*s & 0x80)) {
390 PyObject *u, *w;
391 char *r;
392 const char* t;
393 Py_ssize_t rn;
394 t = s;
395 /* Decode non-ASCII bytes as UTF-8. */
396 while (t < end && (*t & 0x80)) t++;
397 u = PyUnicode_DecodeUTF8(s, t - s, errors);
398 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000399
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000400 /* Recode them in target encoding. */
401 w = PyUnicode_AsEncodedString(
402 u, recode_encoding, errors);
403 Py_DECREF(u);
404 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000405
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000406 /* Append bytes to output buffer. */
407 assert(PyBytes_Check(w));
408 r = PyBytes_AS_STRING(w);
409 rn = PyBytes_GET_SIZE(w);
410 Py_MEMCPY(p, r, rn);
411 p += rn;
412 Py_DECREF(w);
413 s = t;
414 } else {
415 *p++ = *s++;
416 }
417 continue;
418 }
419 s++;
420 if (s==end) {
421 PyErr_SetString(PyExc_ValueError,
422 "Trailing \\ in string");
423 goto failed;
424 }
425 switch (*s++) {
426 /* XXX This assumes ASCII! */
427 case '\n': break;
428 case '\\': *p++ = '\\'; break;
429 case '\'': *p++ = '\''; break;
430 case '\"': *p++ = '\"'; break;
431 case 'b': *p++ = '\b'; break;
432 case 'f': *p++ = '\014'; break; /* FF */
433 case 't': *p++ = '\t'; break;
434 case 'n': *p++ = '\n'; break;
435 case 'r': *p++ = '\r'; break;
436 case 'v': *p++ = '\013'; break; /* VT */
437 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
438 case '0': case '1': case '2': case '3':
439 case '4': case '5': case '6': case '7':
440 c = s[-1] - '0';
441 if (s < end && '0' <= *s && *s <= '7') {
442 c = (c<<3) + *s++ - '0';
443 if (s < end && '0' <= *s && *s <= '7')
444 c = (c<<3) + *s++ - '0';
445 }
446 *p++ = c;
447 break;
448 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000449 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 unsigned int x = 0;
451 c = Py_CHARMASK(*s);
452 s++;
David Malcolm96960882010-11-05 17:23:41 +0000453 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000454 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000455 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000456 x = 10 + c - 'a';
457 else
458 x = 10 + c - 'A';
459 x = x << 4;
460 c = Py_CHARMASK(*s);
461 s++;
David Malcolm96960882010-11-05 17:23:41 +0000462 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000463 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000464 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000465 x += 10 + c - 'a';
466 else
467 x += 10 + c - 'A';
468 *p++ = x;
469 break;
470 }
471 if (!errors || strcmp(errors, "strict") == 0) {
472 PyErr_SetString(PyExc_ValueError,
473 "invalid \\x escape");
474 goto failed;
475 }
476 if (strcmp(errors, "replace") == 0) {
477 *p++ = '?';
478 } else if (strcmp(errors, "ignore") == 0)
479 /* do nothing */;
480 else {
481 PyErr_Format(PyExc_ValueError,
482 "decoding error; unknown "
483 "error handling code: %.400s",
484 errors);
485 goto failed;
486 }
487 default:
488 *p++ = '\\';
489 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200490 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000491 UTF-8 bytes may follow. */
492 }
493 }
494 if (p-buf < newlen)
495 _PyBytes_Resize(&v, p - buf);
496 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000497 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000498 Py_DECREF(v);
499 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000500}
501
502/* -------------------------------------------------------------------- */
503/* object api */
504
505Py_ssize_t
506PyBytes_Size(register PyObject *op)
507{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000508 if (!PyBytes_Check(op)) {
509 PyErr_Format(PyExc_TypeError,
510 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
511 return -1;
512 }
513 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000514}
515
516char *
517PyBytes_AsString(register PyObject *op)
518{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000519 if (!PyBytes_Check(op)) {
520 PyErr_Format(PyExc_TypeError,
521 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
522 return NULL;
523 }
524 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000525}
526
527int
528PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000529 register char **s,
530 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000531{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000532 if (s == NULL) {
533 PyErr_BadInternalCall();
534 return -1;
535 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000536
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000537 if (!PyBytes_Check(obj)) {
538 PyErr_Format(PyExc_TypeError,
539 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
540 return -1;
541 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000542
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000543 *s = PyBytes_AS_STRING(obj);
544 if (len != NULL)
545 *len = PyBytes_GET_SIZE(obj);
546 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
547 PyErr_SetString(PyExc_TypeError,
548 "expected bytes with no null");
549 return -1;
550 }
551 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000552}
Neal Norwitz6968b052007-02-27 19:02:19 +0000553
554/* -------------------------------------------------------------------- */
555/* Methods */
556
Eric Smith0923d1d2009-04-16 20:16:10 +0000557#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000558
559#include "stringlib/fastsearch.h"
560#include "stringlib/count.h"
561#include "stringlib/find.h"
562#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000563#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000564#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000565
Eric Smith0f78bff2009-11-30 01:01:42 +0000566#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000567
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000568PyObject *
569PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000570{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000571 static const char *hexdigits = "0123456789abcdef";
572 register PyBytesObject* op = (PyBytesObject*) obj;
573 Py_ssize_t length = Py_SIZE(op);
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000574 size_t newsize;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000575 PyObject *v;
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000576 if (length > (PY_SSIZE_T_MAX - 3) / 4) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000577 PyErr_SetString(PyExc_OverflowError,
578 "bytes object is too large to make repr");
579 return NULL;
580 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000581 newsize = 3 + 4 * length;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000582 v = PyUnicode_FromUnicode(NULL, newsize);
583 if (v == NULL) {
584 return NULL;
585 }
586 else {
587 register Py_ssize_t i;
588 register Py_UNICODE c;
589 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
590 int quote;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000591
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000592 /* Figure out which quote to use; single is preferred */
593 quote = '\'';
594 if (smartquotes) {
595 char *test, *start;
596 start = PyBytes_AS_STRING(op);
597 for (test = start; test < start+length; ++test) {
598 if (*test == '"') {
599 quote = '\''; /* back to single */
600 goto decided;
601 }
602 else if (*test == '\'')
603 quote = '"';
604 }
605 decided:
606 ;
607 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000608
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000609 *p++ = 'b', *p++ = quote;
610 for (i = 0; i < length; i++) {
611 /* There's at least enough room for a hex escape
612 and a closing quote. */
613 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
614 c = op->ob_sval[i];
615 if (c == quote || c == '\\')
616 *p++ = '\\', *p++ = c;
617 else if (c == '\t')
618 *p++ = '\\', *p++ = 't';
619 else if (c == '\n')
620 *p++ = '\\', *p++ = 'n';
621 else if (c == '\r')
622 *p++ = '\\', *p++ = 'r';
623 else if (c < ' ' || c >= 0x7f) {
624 *p++ = '\\';
625 *p++ = 'x';
626 *p++ = hexdigits[(c & 0xf0) >> 4];
627 *p++ = hexdigits[c & 0xf];
628 }
629 else
630 *p++ = c;
631 }
632 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
633 *p++ = quote;
634 *p = '\0';
635 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
636 Py_DECREF(v);
637 return NULL;
638 }
639 return v;
640 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000641}
642
Neal Norwitz6968b052007-02-27 19:02:19 +0000643static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000644bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000645{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000646 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000647}
648
Neal Norwitz6968b052007-02-27 19:02:19 +0000649static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000650bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000651{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000652 if (Py_BytesWarningFlag) {
653 if (PyErr_WarnEx(PyExc_BytesWarning,
654 "str() on a bytes instance", 1))
655 return NULL;
656 }
657 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000658}
659
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000660static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000661bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000662{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000663 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000664}
Neal Norwitz6968b052007-02-27 19:02:19 +0000665
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000666/* This is also used by PyBytes_Concat() */
667static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000668bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000669{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000670 Py_ssize_t size;
671 Py_buffer va, vb;
672 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000673
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000674 va.len = -1;
675 vb.len = -1;
676 if (_getbuffer(a, &va) < 0 ||
677 _getbuffer(b, &vb) < 0) {
678 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
679 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
680 goto done;
681 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000682
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000683 /* Optimize end cases */
684 if (va.len == 0 && PyBytes_CheckExact(b)) {
685 result = b;
686 Py_INCREF(result);
687 goto done;
688 }
689 if (vb.len == 0 && PyBytes_CheckExact(a)) {
690 result = a;
691 Py_INCREF(result);
692 goto done;
693 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000694
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000695 size = va.len + vb.len;
696 if (size < 0) {
697 PyErr_NoMemory();
698 goto done;
699 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000700
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000701 result = PyBytes_FromStringAndSize(NULL, size);
702 if (result != NULL) {
703 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
704 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
705 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000706
707 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000708 if (va.len != -1)
709 PyBuffer_Release(&va);
710 if (vb.len != -1)
711 PyBuffer_Release(&vb);
712 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000713}
Neal Norwitz6968b052007-02-27 19:02:19 +0000714
715static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000716bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000717{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000718 register Py_ssize_t i;
719 register Py_ssize_t j;
720 register Py_ssize_t size;
721 register PyBytesObject *op;
722 size_t nbytes;
723 if (n < 0)
724 n = 0;
725 /* watch out for overflows: the size can overflow int,
726 * and the # of bytes needed can overflow size_t
727 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000728 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000729 PyErr_SetString(PyExc_OverflowError,
730 "repeated bytes are too long");
731 return NULL;
732 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000733 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000734 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
735 Py_INCREF(a);
736 return (PyObject *)a;
737 }
738 nbytes = (size_t)size;
739 if (nbytes + PyBytesObject_SIZE <= nbytes) {
740 PyErr_SetString(PyExc_OverflowError,
741 "repeated bytes are too long");
742 return NULL;
743 }
744 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
745 if (op == NULL)
746 return PyErr_NoMemory();
747 PyObject_INIT_VAR(op, &PyBytes_Type, size);
748 op->ob_shash = -1;
749 op->ob_sval[size] = '\0';
750 if (Py_SIZE(a) == 1 && n > 0) {
751 memset(op->ob_sval, a->ob_sval[0] , n);
752 return (PyObject *) op;
753 }
754 i = 0;
755 if (i < size) {
756 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
757 i = Py_SIZE(a);
758 }
759 while (i < size) {
760 j = (i <= size-i) ? i : size-i;
761 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
762 i += j;
763 }
764 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000765}
766
Guido van Rossum98297ee2007-11-06 21:34:58 +0000767static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000768bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000769{
770 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
771 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000772 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000773 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000774 PyErr_Clear();
775 if (_getbuffer(arg, &varg) < 0)
776 return -1;
777 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
778 varg.buf, varg.len, 0);
779 PyBuffer_Release(&varg);
780 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000781 }
782 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000783 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
784 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000785 }
786
Antoine Pitrou0010d372010-08-15 17:12:55 +0000787 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000788}
789
Neal Norwitz6968b052007-02-27 19:02:19 +0000790static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000791bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000792{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000793 if (i < 0 || i >= Py_SIZE(a)) {
794 PyErr_SetString(PyExc_IndexError, "index out of range");
795 return NULL;
796 }
797 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000798}
799
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000800static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000801bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000802{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000803 int c;
804 Py_ssize_t len_a, len_b;
805 Py_ssize_t min_len;
806 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000807
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000808 /* Make sure both arguments are strings. */
809 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
810 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
811 (PyObject_IsInstance((PyObject*)a,
812 (PyObject*)&PyUnicode_Type) ||
813 PyObject_IsInstance((PyObject*)b,
814 (PyObject*)&PyUnicode_Type))) {
815 if (PyErr_WarnEx(PyExc_BytesWarning,
816 "Comparison between bytes and string", 1))
817 return NULL;
818 }
819 result = Py_NotImplemented;
820 goto out;
821 }
822 if (a == b) {
823 switch (op) {
824 case Py_EQ:case Py_LE:case Py_GE:
825 result = Py_True;
826 goto out;
827 case Py_NE:case Py_LT:case Py_GT:
828 result = Py_False;
829 goto out;
830 }
831 }
832 if (op == Py_EQ) {
833 /* Supporting Py_NE here as well does not save
834 much time, since Py_NE is rarely used. */
835 if (Py_SIZE(a) == Py_SIZE(b)
836 && (a->ob_sval[0] == b->ob_sval[0]
837 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
838 result = Py_True;
839 } else {
840 result = Py_False;
841 }
842 goto out;
843 }
844 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
845 min_len = (len_a < len_b) ? len_a : len_b;
846 if (min_len > 0) {
847 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
848 if (c==0)
849 c = memcmp(a->ob_sval, b->ob_sval, min_len);
850 } else
851 c = 0;
852 if (c == 0)
853 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
854 switch (op) {
855 case Py_LT: c = c < 0; break;
856 case Py_LE: c = c <= 0; break;
857 case Py_EQ: assert(0); break; /* unreachable */
858 case Py_NE: c = c != 0; break;
859 case Py_GT: c = c > 0; break;
860 case Py_GE: c = c >= 0; break;
861 default:
862 result = Py_NotImplemented;
863 goto out;
864 }
865 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000866 out:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000867 Py_INCREF(result);
868 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000869}
870
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000871static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000872bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000873{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000874 register Py_ssize_t len;
875 register unsigned char *p;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000876 register Py_hash_t x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000877
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000878 if (a->ob_shash != -1)
879 return a->ob_shash;
880 len = Py_SIZE(a);
Georg Brandl2daf6ae2012-02-20 19:54:16 +0100881 /*
882 We make the hash of the empty string be 0, rather than using
883 (prefix ^ suffix), since this slightly obfuscates the hash secret
884 */
885 if (len == 0) {
886 a->ob_shash = 0;
887 return 0;
888 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000889 p = (unsigned char *) a->ob_sval;
Georg Brandl2daf6ae2012-02-20 19:54:16 +0100890 x = _Py_HashSecret.prefix;
891 x ^= *p << 7;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000892 while (--len >= 0)
Gregory P. Smith63e6c322012-01-14 15:31:34 -0800893 x = (_PyHASH_MULTIPLIER*x) ^ *p++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000894 x ^= Py_SIZE(a);
Georg Brandl2daf6ae2012-02-20 19:54:16 +0100895 x ^= _Py_HashSecret.suffix;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000896 if (x == -1)
897 x = -2;
898 a->ob_shash = x;
899 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000900}
901
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000902static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000903bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000904{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000905 if (PyIndex_Check(item)) {
906 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
907 if (i == -1 && PyErr_Occurred())
908 return NULL;
909 if (i < 0)
910 i += PyBytes_GET_SIZE(self);
911 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
912 PyErr_SetString(PyExc_IndexError,
913 "index out of range");
914 return NULL;
915 }
916 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
917 }
918 else if (PySlice_Check(item)) {
919 Py_ssize_t start, stop, step, slicelength, cur, i;
920 char* source_buf;
921 char* result_buf;
922 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000923
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000924 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000925 PyBytes_GET_SIZE(self),
926 &start, &stop, &step, &slicelength) < 0) {
927 return NULL;
928 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000929
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000930 if (slicelength <= 0) {
931 return PyBytes_FromStringAndSize("", 0);
932 }
933 else if (start == 0 && step == 1 &&
934 slicelength == PyBytes_GET_SIZE(self) &&
935 PyBytes_CheckExact(self)) {
936 Py_INCREF(self);
937 return (PyObject *)self;
938 }
939 else if (step == 1) {
940 return PyBytes_FromStringAndSize(
941 PyBytes_AS_STRING(self) + start,
942 slicelength);
943 }
944 else {
945 source_buf = PyBytes_AS_STRING(self);
946 result = PyBytes_FromStringAndSize(NULL, slicelength);
947 if (result == NULL)
948 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000949
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000950 result_buf = PyBytes_AS_STRING(result);
951 for (cur = start, i = 0; i < slicelength;
952 cur += step, i++) {
953 result_buf[i] = source_buf[cur];
954 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000955
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000956 return result;
957 }
958 }
959 else {
960 PyErr_Format(PyExc_TypeError,
961 "byte indices must be integers, not %.200s",
962 Py_TYPE(item)->tp_name);
963 return NULL;
964 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000965}
966
967static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000968bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000969{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000970 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
971 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000972}
973
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000974static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000975 (lenfunc)bytes_length, /*sq_length*/
976 (binaryfunc)bytes_concat, /*sq_concat*/
977 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
978 (ssizeargfunc)bytes_item, /*sq_item*/
979 0, /*sq_slice*/
980 0, /*sq_ass_item*/
981 0, /*sq_ass_slice*/
982 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000983};
984
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000985static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000986 (lenfunc)bytes_length,
987 (binaryfunc)bytes_subscript,
988 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000989};
990
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000991static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000992 (getbufferproc)bytes_buffer_getbuffer,
993 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000994};
995
996
997#define LEFTSTRIP 0
998#define RIGHTSTRIP 1
999#define BOTHSTRIP 2
1000
1001/* Arrays indexed by above */
1002static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1003
1004#define STRIPNAME(i) (stripformat[i]+3)
1005
Neal Norwitz6968b052007-02-27 19:02:19 +00001006PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001007"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001008\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001009Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001010If sep is not specified or is None, B is split on ASCII whitespace\n\
1011characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001012If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001013
1014static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001015bytes_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001016{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001017 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1018 Py_ssize_t maxsplit = -1;
1019 const char *s = PyBytes_AS_STRING(self), *sub;
1020 Py_buffer vsub;
1021 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001022
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001023 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1024 return NULL;
1025 if (maxsplit < 0)
1026 maxsplit = PY_SSIZE_T_MAX;
1027 if (subobj == Py_None)
1028 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1029 if (_getbuffer(subobj, &vsub) < 0)
1030 return NULL;
1031 sub = vsub.buf;
1032 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001033
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001034 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1035 PyBuffer_Release(&vsub);
1036 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001037}
1038
Neal Norwitz6968b052007-02-27 19:02:19 +00001039PyDoc_STRVAR(partition__doc__,
1040"B.partition(sep) -> (head, sep, tail)\n\
1041\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001042Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001043the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001044found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001045
1046static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001047bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001048{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001049 const char *sep;
1050 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001051
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001052 if (PyBytes_Check(sep_obj)) {
1053 sep = PyBytes_AS_STRING(sep_obj);
1054 sep_len = PyBytes_GET_SIZE(sep_obj);
1055 }
1056 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1057 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001058
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001059 return stringlib_partition(
1060 (PyObject*) self,
1061 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1062 sep_obj, sep, sep_len
1063 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001064}
1065
1066PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001067"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001068\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001069Search for the separator sep in B, starting at the end of B,\n\
1070and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001071part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001072bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001073
1074static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001075bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001076{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001077 const char *sep;
1078 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001079
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001080 if (PyBytes_Check(sep_obj)) {
1081 sep = PyBytes_AS_STRING(sep_obj);
1082 sep_len = PyBytes_GET_SIZE(sep_obj);
1083 }
1084 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1085 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001086
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001087 return stringlib_rpartition(
1088 (PyObject*) self,
1089 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1090 sep_obj, sep, sep_len
1091 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001092}
1093
Neal Norwitz6968b052007-02-27 19:02:19 +00001094PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001095"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001096\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001097Return a list of the sections in B, using sep as the delimiter,\n\
1098starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001099If sep is not given, B is split on ASCII whitespace characters\n\
1100(space, tab, return, newline, formfeed, vertical tab).\n\
1101If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001102
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001103
Neal Norwitz6968b052007-02-27 19:02:19 +00001104static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001105bytes_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001106{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001107 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1108 Py_ssize_t maxsplit = -1;
1109 const char *s = PyBytes_AS_STRING(self), *sub;
1110 Py_buffer vsub;
1111 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001112
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001113 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1114 return NULL;
1115 if (maxsplit < 0)
1116 maxsplit = PY_SSIZE_T_MAX;
1117 if (subobj == Py_None)
1118 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1119 if (_getbuffer(subobj, &vsub) < 0)
1120 return NULL;
1121 sub = vsub.buf;
1122 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001123
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001124 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1125 PyBuffer_Release(&vsub);
1126 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001127}
1128
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001129
1130PyDoc_STRVAR(join__doc__,
1131"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001132\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001133Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001134Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1135
Neal Norwitz6968b052007-02-27 19:02:19 +00001136static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001137bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001138{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001139 char *sep = PyBytes_AS_STRING(self);
1140 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1141 PyObject *res = NULL;
1142 char *p;
1143 Py_ssize_t seqlen = 0;
1144 size_t sz = 0;
1145 Py_ssize_t i;
1146 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001147
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001148 seq = PySequence_Fast(orig, "");
1149 if (seq == NULL) {
1150 return NULL;
1151 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001152
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001153 seqlen = PySequence_Size(seq);
1154 if (seqlen == 0) {
1155 Py_DECREF(seq);
1156 return PyBytes_FromString("");
1157 }
1158 if (seqlen == 1) {
1159 item = PySequence_Fast_GET_ITEM(seq, 0);
1160 if (PyBytes_CheckExact(item)) {
1161 Py_INCREF(item);
1162 Py_DECREF(seq);
1163 return item;
1164 }
1165 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001166
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001167 /* There are at least two things to join, or else we have a subclass
1168 * of the builtin types in the sequence.
1169 * Do a pre-pass to figure out the total amount of space we'll
1170 * need (sz), and see whether all argument are bytes.
1171 */
1172 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1173 for (i = 0; i < seqlen; i++) {
1174 const size_t old_sz = sz;
1175 item = PySequence_Fast_GET_ITEM(seq, i);
1176 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1177 PyErr_Format(PyExc_TypeError,
1178 "sequence item %zd: expected bytes,"
1179 " %.80s found",
1180 i, Py_TYPE(item)->tp_name);
1181 Py_DECREF(seq);
1182 return NULL;
1183 }
1184 sz += Py_SIZE(item);
1185 if (i != 0)
1186 sz += seplen;
1187 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1188 PyErr_SetString(PyExc_OverflowError,
1189 "join() result is too long for bytes");
1190 Py_DECREF(seq);
1191 return NULL;
1192 }
1193 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001194
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001195 /* Allocate result space. */
1196 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1197 if (res == NULL) {
1198 Py_DECREF(seq);
1199 return NULL;
1200 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001201
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001202 /* Catenate everything. */
1203 /* I'm not worried about a PyByteArray item growing because there's
1204 nowhere in this function where we release the GIL. */
1205 p = PyBytes_AS_STRING(res);
1206 for (i = 0; i < seqlen; ++i) {
1207 size_t n;
1208 char *q;
1209 if (i) {
1210 Py_MEMCPY(p, sep, seplen);
1211 p += seplen;
1212 }
1213 item = PySequence_Fast_GET_ITEM(seq, i);
1214 n = Py_SIZE(item);
1215 if (PyBytes_Check(item))
1216 q = PyBytes_AS_STRING(item);
1217 else
1218 q = PyByteArray_AS_STRING(item);
1219 Py_MEMCPY(p, q, n);
1220 p += n;
1221 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001222
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001223 Py_DECREF(seq);
1224 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001225}
1226
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001227PyObject *
1228_PyBytes_Join(PyObject *sep, PyObject *x)
1229{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001230 assert(sep != NULL && PyBytes_Check(sep));
1231 assert(x != NULL);
1232 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001233}
1234
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001235/* helper macro to fixup start/end slice values */
1236#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001237 if (end > len) \
1238 end = len; \
1239 else if (end < 0) { \
1240 end += len; \
1241 if (end < 0) \
1242 end = 0; \
1243 } \
1244 if (start < 0) { \
1245 start += len; \
1246 if (start < 0) \
1247 start = 0; \
1248 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001249
1250Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001251bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001252{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001253 PyObject *subobj;
1254 const char *sub;
1255 Py_ssize_t sub_len;
1256 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001257
Jesus Ceaac451502011-04-20 17:09:23 +02001258 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1259 args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001260 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001261
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001262 if (PyBytes_Check(subobj)) {
1263 sub = PyBytes_AS_STRING(subobj);
1264 sub_len = PyBytes_GET_SIZE(subobj);
1265 }
1266 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1267 /* XXX - the "expected a character buffer object" is pretty
1268 confusing for a non-expert. remap to something else ? */
1269 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001270
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001271 if (dir > 0)
1272 return stringlib_find_slice(
1273 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1274 sub, sub_len, start, end);
1275 else
1276 return stringlib_rfind_slice(
1277 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1278 sub, sub_len, start, end);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001279}
1280
1281
1282PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001283"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001284\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001285Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001286such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001287arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001288\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001289Return -1 on failure.");
1290
Neal Norwitz6968b052007-02-27 19:02:19 +00001291static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001292bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001293{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001294 Py_ssize_t result = bytes_find_internal(self, args, +1);
1295 if (result == -2)
1296 return NULL;
1297 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001298}
1299
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001300
1301PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001302"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001303\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001304Like B.find() but raise ValueError when the substring is not found.");
1305
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001306static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001307bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001308{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001309 Py_ssize_t result = bytes_find_internal(self, args, +1);
1310 if (result == -2)
1311 return NULL;
1312 if (result == -1) {
1313 PyErr_SetString(PyExc_ValueError,
1314 "substring not found");
1315 return NULL;
1316 }
1317 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001318}
1319
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001320
1321PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001322"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001323\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001324Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001325such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001326arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001327\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001328Return -1 on failure.");
1329
Neal Norwitz6968b052007-02-27 19:02:19 +00001330static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001331bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001332{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001333 Py_ssize_t result = bytes_find_internal(self, args, -1);
1334 if (result == -2)
1335 return NULL;
1336 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001337}
1338
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001339
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001340PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001341"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001342\n\
1343Like B.rfind() but raise ValueError when the substring is not found.");
1344
1345static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001346bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001347{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001348 Py_ssize_t result = bytes_find_internal(self, args, -1);
1349 if (result == -2)
1350 return NULL;
1351 if (result == -1) {
1352 PyErr_SetString(PyExc_ValueError,
1353 "substring not found");
1354 return NULL;
1355 }
1356 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001357}
1358
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001359
1360Py_LOCAL_INLINE(PyObject *)
1361do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001362{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001363 Py_buffer vsep;
1364 char *s = PyBytes_AS_STRING(self);
1365 Py_ssize_t len = PyBytes_GET_SIZE(self);
1366 char *sep;
1367 Py_ssize_t seplen;
1368 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001369
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 if (_getbuffer(sepobj, &vsep) < 0)
1371 return NULL;
1372 sep = vsep.buf;
1373 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001374
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001375 i = 0;
1376 if (striptype != RIGHTSTRIP) {
1377 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1378 i++;
1379 }
1380 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001381
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001382 j = len;
1383 if (striptype != LEFTSTRIP) {
1384 do {
1385 j--;
1386 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1387 j++;
1388 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001389
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001391
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001392 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1393 Py_INCREF(self);
1394 return (PyObject*)self;
1395 }
1396 else
1397 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001398}
1399
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001400
1401Py_LOCAL_INLINE(PyObject *)
1402do_strip(PyBytesObject *self, int striptype)
1403{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001404 char *s = PyBytes_AS_STRING(self);
1405 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001406
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001407 i = 0;
1408 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001409 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001410 i++;
1411 }
1412 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001413
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001414 j = len;
1415 if (striptype != LEFTSTRIP) {
1416 do {
1417 j--;
David Malcolm96960882010-11-05 17:23:41 +00001418 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001419 j++;
1420 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1423 Py_INCREF(self);
1424 return (PyObject*)self;
1425 }
1426 else
1427 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001428}
1429
1430
1431Py_LOCAL_INLINE(PyObject *)
1432do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1433{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001434 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001435
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001436 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1437 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001438
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001439 if (sep != NULL && sep != Py_None) {
1440 return do_xstrip(self, striptype, sep);
1441 }
1442 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001443}
1444
1445
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001446PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001447"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001448\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001449Strip leading and trailing bytes contained in the argument.\n\
Georg Brandlbeca27a2012-01-22 21:31:21 +01001450If the argument is omitted, strip leading and trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001451static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001452bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001453{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001454 if (PyTuple_GET_SIZE(args) == 0)
1455 return do_strip(self, BOTHSTRIP); /* Common case */
1456 else
1457 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001458}
1459
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001460
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001461PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001462"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001463\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001464Strip leading bytes contained in the argument.\n\
1465If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001466static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001467bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001468{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001469 if (PyTuple_GET_SIZE(args) == 0)
1470 return do_strip(self, LEFTSTRIP); /* Common case */
1471 else
1472 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001473}
1474
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001475
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001476PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001477"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001478\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001479Strip trailing bytes contained in the argument.\n\
1480If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001481static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001482bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001483{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001484 if (PyTuple_GET_SIZE(args) == 0)
1485 return do_strip(self, RIGHTSTRIP); /* Common case */
1486 else
1487 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001488}
Neal Norwitz6968b052007-02-27 19:02:19 +00001489
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001490
1491PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001492"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001493\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001494Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001495string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001496as in slice notation.");
1497
1498static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001499bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001500{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001501 PyObject *sub_obj;
1502 const char *str = PyBytes_AS_STRING(self), *sub;
1503 Py_ssize_t sub_len;
1504 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001505
Jesus Ceaac451502011-04-20 17:09:23 +02001506 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001507 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001508
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001509 if (PyBytes_Check(sub_obj)) {
1510 sub = PyBytes_AS_STRING(sub_obj);
1511 sub_len = PyBytes_GET_SIZE(sub_obj);
1512 }
1513 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1514 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001515
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001516 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001517
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001518 return PyLong_FromSsize_t(
1519 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1520 );
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001521}
1522
1523
1524PyDoc_STRVAR(translate__doc__,
1525"B.translate(table[, deletechars]) -> bytes\n\
1526\n\
1527Return a copy of B, where all characters occurring in the\n\
1528optional argument deletechars are removed, and the remaining\n\
1529characters have been mapped through the given translation\n\
1530table, which must be a bytes object of length 256.");
1531
1532static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001533bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001534{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001535 register char *input, *output;
1536 const char *table;
1537 register Py_ssize_t i, c, changed = 0;
1538 PyObject *input_obj = (PyObject*)self;
1539 const char *output_start, *del_table=NULL;
1540 Py_ssize_t inlen, tablen, dellen = 0;
1541 PyObject *result;
1542 int trans_table[256];
1543 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001544
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001545 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1546 &tableobj, &delobj))
1547 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001548
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001549 if (PyBytes_Check(tableobj)) {
1550 table = PyBytes_AS_STRING(tableobj);
1551 tablen = PyBytes_GET_SIZE(tableobj);
1552 }
1553 else if (tableobj == Py_None) {
1554 table = NULL;
1555 tablen = 256;
1556 }
1557 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1558 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001559
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001560 if (tablen != 256) {
1561 PyErr_SetString(PyExc_ValueError,
1562 "translation table must be 256 characters long");
1563 return NULL;
1564 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001565
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001566 if (delobj != NULL) {
1567 if (PyBytes_Check(delobj)) {
1568 del_table = PyBytes_AS_STRING(delobj);
1569 dellen = PyBytes_GET_SIZE(delobj);
1570 }
1571 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1572 return NULL;
1573 }
1574 else {
1575 del_table = NULL;
1576 dellen = 0;
1577 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001578
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001579 inlen = PyBytes_GET_SIZE(input_obj);
1580 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1581 if (result == NULL)
1582 return NULL;
1583 output_start = output = PyBytes_AsString(result);
1584 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001585
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001586 if (dellen == 0 && table != NULL) {
1587 /* If no deletions are required, use faster code */
1588 for (i = inlen; --i >= 0; ) {
1589 c = Py_CHARMASK(*input++);
1590 if (Py_CHARMASK((*output++ = table[c])) != c)
1591 changed = 1;
1592 }
1593 if (changed || !PyBytes_CheckExact(input_obj))
1594 return result;
1595 Py_DECREF(result);
1596 Py_INCREF(input_obj);
1597 return input_obj;
1598 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001599
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001600 if (table == NULL) {
1601 for (i = 0; i < 256; i++)
1602 trans_table[i] = Py_CHARMASK(i);
1603 } else {
1604 for (i = 0; i < 256; i++)
1605 trans_table[i] = Py_CHARMASK(table[i]);
1606 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001607
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001608 for (i = 0; i < dellen; i++)
1609 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001610
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001611 for (i = inlen; --i >= 0; ) {
1612 c = Py_CHARMASK(*input++);
1613 if (trans_table[c] != -1)
1614 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1615 continue;
1616 changed = 1;
1617 }
1618 if (!changed && PyBytes_CheckExact(input_obj)) {
1619 Py_DECREF(result);
1620 Py_INCREF(input_obj);
1621 return input_obj;
1622 }
1623 /* Fix the size of the resulting string */
1624 if (inlen > 0)
1625 _PyBytes_Resize(&result, output - output_start);
1626 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001627}
1628
1629
Georg Brandlabc38772009-04-12 15:51:51 +00001630static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001631bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001632{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001633 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001634}
1635
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001636/* find and count characters and substrings */
1637
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001638#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001639 ((char *)memchr((const void *)(target), c, target_len))
1640
1641/* String ops must return a string. */
1642/* If the object is subclass of string, create a copy */
1643Py_LOCAL(PyBytesObject *)
1644return_self(PyBytesObject *self)
1645{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001646 if (PyBytes_CheckExact(self)) {
1647 Py_INCREF(self);
1648 return self;
1649 }
1650 return (PyBytesObject *)PyBytes_FromStringAndSize(
1651 PyBytes_AS_STRING(self),
1652 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001653}
1654
1655Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001656countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001657{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001658 Py_ssize_t count=0;
1659 const char *start=target;
1660 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001661
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001662 while ( (start=findchar(start, end-start, c)) != NULL ) {
1663 count++;
1664 if (count >= maxcount)
1665 break;
1666 start += 1;
1667 }
1668 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001669}
1670
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001671
1672/* Algorithms for different cases of string replacement */
1673
1674/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1675Py_LOCAL(PyBytesObject *)
1676replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001677 const char *to_s, Py_ssize_t to_len,
1678 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001679{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001680 char *self_s, *result_s;
1681 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001682 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001683 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001684
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001685 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001686
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001687 /* 1 at the end plus 1 after every character;
1688 count = min(maxcount, self_len + 1) */
1689 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001690 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001691 else
1692 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1693 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001694
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001695 /* Check for overflow */
1696 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001697 assert(count > 0);
1698 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001699 PyErr_SetString(PyExc_OverflowError,
1700 "replacement bytes are too long");
1701 return NULL;
1702 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001703 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001704
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001705 if (! (result = (PyBytesObject *)
1706 PyBytes_FromStringAndSize(NULL, result_len)) )
1707 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001708
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001709 self_s = PyBytes_AS_STRING(self);
1710 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001711
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001712 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001713
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001714 /* Lay the first one down (guaranteed this will occur) */
1715 Py_MEMCPY(result_s, to_s, to_len);
1716 result_s += to_len;
1717 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001718
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001719 for (i=0; i<count; i++) {
1720 *result_s++ = *self_s++;
1721 Py_MEMCPY(result_s, to_s, to_len);
1722 result_s += to_len;
1723 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001724
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001725 /* Copy the rest of the original string */
1726 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001727
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001728 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001729}
1730
1731/* Special case for deleting a single character */
1732/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1733Py_LOCAL(PyBytesObject *)
1734replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001735 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001736{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001737 char *self_s, *result_s;
1738 char *start, *next, *end;
1739 Py_ssize_t self_len, result_len;
1740 Py_ssize_t count;
1741 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001742
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001743 self_len = PyBytes_GET_SIZE(self);
1744 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001745
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001746 count = countchar(self_s, self_len, from_c, maxcount);
1747 if (count == 0) {
1748 return return_self(self);
1749 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001750
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001751 result_len = self_len - count; /* from_len == 1 */
1752 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001753
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001754 if ( (result = (PyBytesObject *)
1755 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1756 return NULL;
1757 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001758
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001759 start = self_s;
1760 end = self_s + self_len;
1761 while (count-- > 0) {
1762 next = findchar(start, end-start, from_c);
1763 if (next == NULL)
1764 break;
1765 Py_MEMCPY(result_s, start, next-start);
1766 result_s += (next-start);
1767 start = next+1;
1768 }
1769 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001770
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001771 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001772}
1773
1774/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1775
1776Py_LOCAL(PyBytesObject *)
1777replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001778 const char *from_s, Py_ssize_t from_len,
1779 Py_ssize_t maxcount) {
1780 char *self_s, *result_s;
1781 char *start, *next, *end;
1782 Py_ssize_t self_len, result_len;
1783 Py_ssize_t count, offset;
1784 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001785
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001786 self_len = PyBytes_GET_SIZE(self);
1787 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001788
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001789 count = stringlib_count(self_s, self_len,
1790 from_s, from_len,
1791 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001792
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001793 if (count == 0) {
1794 /* no matches */
1795 return return_self(self);
1796 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001797
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001798 result_len = self_len - (count * from_len);
1799 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001800
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001801 if ( (result = (PyBytesObject *)
1802 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1803 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001804
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001805 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001806
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001807 start = self_s;
1808 end = self_s + self_len;
1809 while (count-- > 0) {
1810 offset = stringlib_find(start, end-start,
1811 from_s, from_len,
1812 0);
1813 if (offset == -1)
1814 break;
1815 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001816
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001817 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001818
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001819 result_s += (next-start);
1820 start = next+from_len;
1821 }
1822 Py_MEMCPY(result_s, start, end-start);
1823 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001824}
1825
1826/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1827Py_LOCAL(PyBytesObject *)
1828replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001829 char from_c, char to_c,
1830 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001831{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001832 char *self_s, *result_s, *start, *end, *next;
1833 Py_ssize_t self_len;
1834 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001835
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001836 /* The result string will be the same size */
1837 self_s = PyBytes_AS_STRING(self);
1838 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001839
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001840 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001841
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001842 if (next == NULL) {
1843 /* No matches; return the original string */
1844 return return_self(self);
1845 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001846
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001847 /* Need to make a new string */
1848 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1849 if (result == NULL)
1850 return NULL;
1851 result_s = PyBytes_AS_STRING(result);
1852 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001853
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001854 /* change everything in-place, starting with this one */
1855 start = result_s + (next-self_s);
1856 *start = to_c;
1857 start++;
1858 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001859
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001860 while (--maxcount > 0) {
1861 next = findchar(start, end-start, from_c);
1862 if (next == NULL)
1863 break;
1864 *next = to_c;
1865 start = next+1;
1866 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001867
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001868 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001869}
1870
1871/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1872Py_LOCAL(PyBytesObject *)
1873replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001874 const char *from_s, Py_ssize_t from_len,
1875 const char *to_s, Py_ssize_t to_len,
1876 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001877{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001878 char *result_s, *start, *end;
1879 char *self_s;
1880 Py_ssize_t self_len, offset;
1881 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001882
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001883 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001884
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001885 self_s = PyBytes_AS_STRING(self);
1886 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001887
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001888 offset = stringlib_find(self_s, self_len,
1889 from_s, from_len,
1890 0);
1891 if (offset == -1) {
1892 /* No matches; return the original string */
1893 return return_self(self);
1894 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001895
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001896 /* Need to make a new string */
1897 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1898 if (result == NULL)
1899 return NULL;
1900 result_s = PyBytes_AS_STRING(result);
1901 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001902
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001903 /* change everything in-place, starting with this one */
1904 start = result_s + offset;
1905 Py_MEMCPY(start, to_s, from_len);
1906 start += from_len;
1907 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001908
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001909 while ( --maxcount > 0) {
1910 offset = stringlib_find(start, end-start,
1911 from_s, from_len,
1912 0);
1913 if (offset==-1)
1914 break;
1915 Py_MEMCPY(start+offset, to_s, from_len);
1916 start += offset+from_len;
1917 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001918
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001919 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001920}
1921
1922/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1923Py_LOCAL(PyBytesObject *)
1924replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001925 char from_c,
1926 const char *to_s, Py_ssize_t to_len,
1927 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001928{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001929 char *self_s, *result_s;
1930 char *start, *next, *end;
1931 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001932 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001933 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001934
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001935 self_s = PyBytes_AS_STRING(self);
1936 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001937
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001938 count = countchar(self_s, self_len, from_c, maxcount);
1939 if (count == 0) {
1940 /* no matches, return unchanged */
1941 return return_self(self);
1942 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001943
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001944 /* use the difference between current and new, hence the "-1" */
1945 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001946 assert(count > 0);
1947 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001948 PyErr_SetString(PyExc_OverflowError,
1949 "replacement bytes are too long");
1950 return NULL;
1951 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001952 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001953
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001954 if ( (result = (PyBytesObject *)
1955 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1956 return NULL;
1957 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001958
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001959 start = self_s;
1960 end = self_s + self_len;
1961 while (count-- > 0) {
1962 next = findchar(start, end-start, from_c);
1963 if (next == NULL)
1964 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001965
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001966 if (next == start) {
1967 /* replace with the 'to' */
1968 Py_MEMCPY(result_s, to_s, to_len);
1969 result_s += to_len;
1970 start += 1;
1971 } else {
1972 /* copy the unchanged old then the 'to' */
1973 Py_MEMCPY(result_s, start, next-start);
1974 result_s += (next-start);
1975 Py_MEMCPY(result_s, to_s, to_len);
1976 result_s += to_len;
1977 start = next+1;
1978 }
1979 }
1980 /* Copy the remainder of the remaining string */
1981 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001982
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001983 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001984}
1985
1986/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1987Py_LOCAL(PyBytesObject *)
1988replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001989 const char *from_s, Py_ssize_t from_len,
1990 const char *to_s, Py_ssize_t to_len,
1991 Py_ssize_t maxcount) {
1992 char *self_s, *result_s;
1993 char *start, *next, *end;
1994 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001995 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001996 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001997
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001998 self_s = PyBytes_AS_STRING(self);
1999 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002000
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002001 count = stringlib_count(self_s, self_len,
2002 from_s, from_len,
2003 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002004
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002005 if (count == 0) {
2006 /* no matches, return unchanged */
2007 return return_self(self);
2008 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002009
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002010 /* Check for overflow */
2011 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002012 assert(count > 0);
2013 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002014 PyErr_SetString(PyExc_OverflowError,
2015 "replacement bytes are too long");
2016 return NULL;
2017 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002018 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002019
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002020 if ( (result = (PyBytesObject *)
2021 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2022 return NULL;
2023 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002024
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002025 start = self_s;
2026 end = self_s + self_len;
2027 while (count-- > 0) {
2028 offset = stringlib_find(start, end-start,
2029 from_s, from_len,
2030 0);
2031 if (offset == -1)
2032 break;
2033 next = start+offset;
2034 if (next == start) {
2035 /* replace with the 'to' */
2036 Py_MEMCPY(result_s, to_s, to_len);
2037 result_s += to_len;
2038 start += from_len;
2039 } else {
2040 /* copy the unchanged old then the 'to' */
2041 Py_MEMCPY(result_s, start, next-start);
2042 result_s += (next-start);
2043 Py_MEMCPY(result_s, to_s, to_len);
2044 result_s += to_len;
2045 start = next+from_len;
2046 }
2047 }
2048 /* Copy the remainder of the remaining string */
2049 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002050
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002051 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002052}
2053
2054
2055Py_LOCAL(PyBytesObject *)
2056replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002057 const char *from_s, Py_ssize_t from_len,
2058 const char *to_s, Py_ssize_t to_len,
2059 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002060{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002061 if (maxcount < 0) {
2062 maxcount = PY_SSIZE_T_MAX;
2063 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2064 /* nothing to do; return the original string */
2065 return return_self(self);
2066 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002067
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002068 if (maxcount == 0 ||
2069 (from_len == 0 && to_len == 0)) {
2070 /* nothing to do; return the original string */
2071 return return_self(self);
2072 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002073
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002074 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002075
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002076 if (from_len == 0) {
2077 /* insert the 'to' string everywhere. */
2078 /* >>> "Python".replace("", ".") */
2079 /* '.P.y.t.h.o.n.' */
2080 return replace_interleave(self, to_s, to_len, maxcount);
2081 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002082
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002083 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2084 /* point for an empty self string to generate a non-empty string */
2085 /* Special case so the remaining code always gets a non-empty string */
2086 if (PyBytes_GET_SIZE(self) == 0) {
2087 return return_self(self);
2088 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002089
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002090 if (to_len == 0) {
2091 /* delete all occurrences of 'from' string */
2092 if (from_len == 1) {
2093 return replace_delete_single_character(
2094 self, from_s[0], maxcount);
2095 } else {
2096 return replace_delete_substring(self, from_s,
2097 from_len, maxcount);
2098 }
2099 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002100
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002101 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002102
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002103 if (from_len == to_len) {
2104 if (from_len == 1) {
2105 return replace_single_character_in_place(
2106 self,
2107 from_s[0],
2108 to_s[0],
2109 maxcount);
2110 } else {
2111 return replace_substring_in_place(
2112 self, from_s, from_len, to_s, to_len,
2113 maxcount);
2114 }
2115 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002116
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002117 /* Otherwise use the more generic algorithms */
2118 if (from_len == 1) {
2119 return replace_single_character(self, from_s[0],
2120 to_s, to_len, maxcount);
2121 } else {
2122 /* len('from')>=2, len('to')>=1 */
2123 return replace_substring(self, from_s, from_len, to_s, to_len,
2124 maxcount);
2125 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002126}
2127
2128PyDoc_STRVAR(replace__doc__,
2129"B.replace(old, new[, count]) -> bytes\n\
2130\n\
2131Return a copy of B with all occurrences of subsection\n\
2132old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002133given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002134
2135static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002136bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002137{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002138 Py_ssize_t count = -1;
2139 PyObject *from, *to;
2140 const char *from_s, *to_s;
2141 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002143 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2144 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002145
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002146 if (PyBytes_Check(from)) {
2147 from_s = PyBytes_AS_STRING(from);
2148 from_len = PyBytes_GET_SIZE(from);
2149 }
2150 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2151 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002152
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002153 if (PyBytes_Check(to)) {
2154 to_s = PyBytes_AS_STRING(to);
2155 to_len = PyBytes_GET_SIZE(to);
2156 }
2157 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2158 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002160 return (PyObject *)replace((PyBytesObject *) self,
2161 from_s, from_len,
2162 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002163}
2164
2165/** End DALKE **/
2166
2167/* Matches the end (direction >= 0) or start (direction < 0) of self
2168 * against substr, using the start and end arguments. Returns
2169 * -1 on error, 0 if not found and 1 if found.
2170 */
2171Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002172_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002173 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002174{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002175 Py_ssize_t len = PyBytes_GET_SIZE(self);
2176 Py_ssize_t slen;
2177 const char* sub;
2178 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002180 if (PyBytes_Check(substr)) {
2181 sub = PyBytes_AS_STRING(substr);
2182 slen = PyBytes_GET_SIZE(substr);
2183 }
2184 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2185 return -1;
2186 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002187
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002188 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002189
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002190 if (direction < 0) {
2191 /* startswith */
2192 if (start+slen > len)
2193 return 0;
2194 } else {
2195 /* endswith */
2196 if (end-start < slen || start > len)
2197 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002198
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002199 if (end-slen > start)
2200 start = end - slen;
2201 }
2202 if (end-start >= slen)
2203 return ! memcmp(str+start, sub, slen);
2204 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002205}
2206
2207
2208PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002209"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002210\n\
2211Return True if B starts with the specified prefix, False otherwise.\n\
2212With optional start, test B beginning at that position.\n\
2213With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002214prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002215
2216static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002217bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002218{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002219 Py_ssize_t start = 0;
2220 Py_ssize_t end = PY_SSIZE_T_MAX;
2221 PyObject *subobj;
2222 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002223
Jesus Ceaac451502011-04-20 17:09:23 +02002224 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002225 return NULL;
2226 if (PyTuple_Check(subobj)) {
2227 Py_ssize_t i;
2228 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2229 result = _bytes_tailmatch(self,
2230 PyTuple_GET_ITEM(subobj, i),
2231 start, end, -1);
2232 if (result == -1)
2233 return NULL;
2234 else if (result) {
2235 Py_RETURN_TRUE;
2236 }
2237 }
2238 Py_RETURN_FALSE;
2239 }
2240 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002241 if (result == -1) {
2242 if (PyErr_ExceptionMatches(PyExc_TypeError))
2243 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2244 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002245 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002246 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002247 else
2248 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002249}
2250
2251
2252PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002253"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002254\n\
2255Return True if B ends with the specified suffix, False otherwise.\n\
2256With optional start, test B beginning at that position.\n\
2257With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002258suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002259
2260static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002261bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002262{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002263 Py_ssize_t start = 0;
2264 Py_ssize_t end = PY_SSIZE_T_MAX;
2265 PyObject *subobj;
2266 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002267
Jesus Ceaac451502011-04-20 17:09:23 +02002268 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002269 return NULL;
2270 if (PyTuple_Check(subobj)) {
2271 Py_ssize_t i;
2272 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2273 result = _bytes_tailmatch(self,
2274 PyTuple_GET_ITEM(subobj, i),
2275 start, end, +1);
2276 if (result == -1)
2277 return NULL;
2278 else if (result) {
2279 Py_RETURN_TRUE;
2280 }
2281 }
2282 Py_RETURN_FALSE;
2283 }
2284 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002285 if (result == -1) {
2286 if (PyErr_ExceptionMatches(PyExc_TypeError))
2287 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2288 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002289 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002290 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002291 else
2292 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002293}
2294
2295
2296PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002297"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002298\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002299Decode B using the codec registered for encoding. Default encoding\n\
2300is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002301handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2302a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002303as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002304able to handle UnicodeDecodeErrors.");
2305
2306static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002307bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002308{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002309 const char *encoding = NULL;
2310 const char *errors = NULL;
2311 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002312
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002313 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2314 return NULL;
2315 if (encoding == NULL)
2316 encoding = PyUnicode_GetDefaultEncoding();
2317 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002318}
2319
Guido van Rossum20188312006-05-05 15:15:40 +00002320
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002321PyDoc_STRVAR(splitlines__doc__,
2322"B.splitlines([keepends]) -> list of lines\n\
2323\n\
2324Return a list of the lines in B, breaking at line boundaries.\n\
2325Line breaks are not included in the resulting list unless keepends\n\
2326is given and true.");
2327
2328static PyObject*
2329bytes_splitlines(PyObject *self, PyObject *args)
2330{
2331 int keepends = 0;
2332
2333 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002334 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002335
2336 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002337 (PyObject*) self, PyBytes_AS_STRING(self),
2338 PyBytes_GET_SIZE(self), keepends
2339 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002340}
2341
2342
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002343PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002344"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002345\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002346Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002347Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002348Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002349
2350static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002351hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002352{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002353 if (c >= 128)
2354 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002355 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002356 return c - '0';
2357 else {
David Malcolm96960882010-11-05 17:23:41 +00002358 if (Py_ISUPPER(c))
2359 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002360 if (c >= 'a' && c <= 'f')
2361 return c - 'a' + 10;
2362 }
2363 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002364}
2365
2366static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002367bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002368{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002369 PyObject *newstring, *hexobj;
2370 char *buf;
2371 Py_UNICODE *hex;
2372 Py_ssize_t hexlen, byteslen, i, j;
2373 int top, bot;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002374
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002375 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2376 return NULL;
2377 assert(PyUnicode_Check(hexobj));
2378 hexlen = PyUnicode_GET_SIZE(hexobj);
2379 hex = PyUnicode_AS_UNICODE(hexobj);
2380 byteslen = hexlen/2; /* This overestimates if there are spaces */
2381 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2382 if (!newstring)
2383 return NULL;
2384 buf = PyBytes_AS_STRING(newstring);
2385 for (i = j = 0; i < hexlen; i += 2) {
2386 /* skip over spaces in the input */
2387 while (hex[i] == ' ')
2388 i++;
2389 if (i >= hexlen)
2390 break;
2391 top = hex_digit_to_int(hex[i]);
2392 bot = hex_digit_to_int(hex[i+1]);
2393 if (top == -1 || bot == -1) {
2394 PyErr_Format(PyExc_ValueError,
2395 "non-hexadecimal number found in "
2396 "fromhex() arg at position %zd", i);
2397 goto error;
2398 }
2399 buf[j++] = (top << 4) + bot;
2400 }
2401 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2402 goto error;
2403 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002404
2405 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002406 Py_XDECREF(newstring);
2407 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002408}
2409
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002410PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002411"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002412
2413static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002414bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002415{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002416 Py_ssize_t res;
2417 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2418 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002419}
2420
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002421
2422static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002423bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002424{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002425 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002426}
2427
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002428
2429static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002430bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002431 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2432 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2433 _Py_capitalize__doc__},
2434 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2435 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2436 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2437 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2438 endswith__doc__},
2439 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2440 expandtabs__doc__},
2441 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2442 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2443 fromhex_doc},
2444 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2445 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2446 _Py_isalnum__doc__},
2447 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2448 _Py_isalpha__doc__},
2449 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2450 _Py_isdigit__doc__},
2451 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2452 _Py_islower__doc__},
2453 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2454 _Py_isspace__doc__},
2455 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2456 _Py_istitle__doc__},
2457 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2458 _Py_isupper__doc__},
2459 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2460 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2461 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2462 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2463 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2464 _Py_maketrans__doc__},
2465 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2466 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2467 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2468 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2469 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2470 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2471 rpartition__doc__},
2472 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2473 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
2474 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2475 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS,
2476 splitlines__doc__},
2477 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2478 startswith__doc__},
2479 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2480 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2481 _Py_swapcase__doc__},
2482 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2483 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2484 translate__doc__},
2485 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2486 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2487 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2488 sizeof__doc__},
2489 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002490};
2491
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002492static PyObject *
2493str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2494
2495static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002496bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002497{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002498 PyObject *x = NULL;
2499 const char *encoding = NULL;
2500 const char *errors = NULL;
2501 PyObject *new = NULL;
2502 Py_ssize_t size;
2503 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002504
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002505 if (type != &PyBytes_Type)
2506 return str_subtype_new(type, args, kwds);
2507 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2508 &encoding, &errors))
2509 return NULL;
2510 if (x == NULL) {
2511 if (encoding != NULL || errors != NULL) {
2512 PyErr_SetString(PyExc_TypeError,
2513 "encoding or errors without sequence "
2514 "argument");
2515 return NULL;
2516 }
2517 return PyBytes_FromString("");
2518 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002519
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002520 if (PyUnicode_Check(x)) {
2521 /* Encode via the codec registry */
2522 if (encoding == NULL) {
2523 PyErr_SetString(PyExc_TypeError,
2524 "string argument without an encoding");
2525 return NULL;
2526 }
2527 new = PyUnicode_AsEncodedString(x, encoding, errors);
2528 if (new == NULL)
2529 return NULL;
2530 assert(PyBytes_Check(new));
2531 return new;
2532 }
2533 /* Is it an integer? */
2534 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2535 if (size == -1 && PyErr_Occurred()) {
2536 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2537 return NULL;
2538 PyErr_Clear();
2539 }
2540 else if (size < 0) {
2541 PyErr_SetString(PyExc_ValueError, "negative count");
2542 return NULL;
2543 }
2544 else {
2545 new = PyBytes_FromStringAndSize(NULL, size);
2546 if (new == NULL) {
2547 return NULL;
2548 }
2549 if (size > 0) {
2550 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2551 }
2552 return new;
2553 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002554
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002555 /* If it's not unicode, there can't be encoding or errors */
2556 if (encoding != NULL || errors != NULL) {
2557 PyErr_SetString(PyExc_TypeError,
2558 "encoding or errors without a string argument");
2559 return NULL;
2560 }
2561 return PyObject_Bytes(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002562}
2563
2564PyObject *
2565PyBytes_FromObject(PyObject *x)
2566{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002567 PyObject *new, *it;
2568 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002569
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002570 if (x == NULL) {
2571 PyErr_BadInternalCall();
2572 return NULL;
2573 }
2574 /* Use the modern buffer interface */
2575 if (PyObject_CheckBuffer(x)) {
2576 Py_buffer view;
2577 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2578 return NULL;
2579 new = PyBytes_FromStringAndSize(NULL, view.len);
2580 if (!new)
2581 goto fail;
2582 /* XXX(brett.cannon): Better way to get to internal buffer? */
2583 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2584 &view, view.len, 'C') < 0)
2585 goto fail;
2586 PyBuffer_Release(&view);
2587 return new;
2588 fail:
2589 Py_XDECREF(new);
2590 PyBuffer_Release(&view);
2591 return NULL;
2592 }
2593 if (PyUnicode_Check(x)) {
2594 PyErr_SetString(PyExc_TypeError,
2595 "cannot convert unicode object to bytes");
2596 return NULL;
2597 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002598
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002599 if (PyList_CheckExact(x)) {
2600 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2601 if (new == NULL)
2602 return NULL;
2603 for (i = 0; i < Py_SIZE(x); i++) {
2604 Py_ssize_t value = PyNumber_AsSsize_t(
2605 PyList_GET_ITEM(x, i), PyExc_ValueError);
2606 if (value == -1 && PyErr_Occurred()) {
2607 Py_DECREF(new);
2608 return NULL;
2609 }
2610 if (value < 0 || value >= 256) {
2611 PyErr_SetString(PyExc_ValueError,
2612 "bytes must be in range(0, 256)");
2613 Py_DECREF(new);
2614 return NULL;
2615 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002616 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002617 }
2618 return new;
2619 }
2620 if (PyTuple_CheckExact(x)) {
2621 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2622 if (new == NULL)
2623 return NULL;
2624 for (i = 0; i < Py_SIZE(x); i++) {
2625 Py_ssize_t value = PyNumber_AsSsize_t(
2626 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2627 if (value == -1 && PyErr_Occurred()) {
2628 Py_DECREF(new);
2629 return NULL;
2630 }
2631 if (value < 0 || value >= 256) {
2632 PyErr_SetString(PyExc_ValueError,
2633 "bytes must be in range(0, 256)");
2634 Py_DECREF(new);
2635 return NULL;
2636 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002637 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002638 }
2639 return new;
2640 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002641
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002642 /* For iterator version, create a string object and resize as needed */
2643 size = _PyObject_LengthHint(x, 64);
2644 if (size == -1 && PyErr_Occurred())
2645 return NULL;
2646 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2647 returning a shared empty bytes string. This required because we
2648 want to call _PyBytes_Resize() the returned object, which we can
2649 only do on bytes objects with refcount == 1. */
2650 size += 1;
2651 new = PyBytes_FromStringAndSize(NULL, size);
2652 if (new == NULL)
2653 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002654
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002655 /* Get the iterator */
2656 it = PyObject_GetIter(x);
2657 if (it == NULL)
2658 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002659
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002660 /* Run the iterator to exhaustion */
2661 for (i = 0; ; i++) {
2662 PyObject *item;
2663 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002664
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002665 /* Get the next item */
2666 item = PyIter_Next(it);
2667 if (item == NULL) {
2668 if (PyErr_Occurred())
2669 goto error;
2670 break;
2671 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002672
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002673 /* Interpret it as an int (__index__) */
2674 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2675 Py_DECREF(item);
2676 if (value == -1 && PyErr_Occurred())
2677 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002678
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002679 /* Range check */
2680 if (value < 0 || value >= 256) {
2681 PyErr_SetString(PyExc_ValueError,
2682 "bytes must be in range(0, 256)");
2683 goto error;
2684 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002685
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002686 /* Append the byte */
2687 if (i >= size) {
2688 size = 2 * size + 1;
2689 if (_PyBytes_Resize(&new, size) < 0)
2690 goto error;
2691 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002692 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002693 }
2694 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002695
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002696 /* Clean up and return success */
2697 Py_DECREF(it);
2698 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002699
2700 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002701 /* Error handling when new != NULL */
2702 Py_XDECREF(it);
2703 Py_DECREF(new);
2704 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002705}
2706
2707static PyObject *
2708str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2709{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002710 PyObject *tmp, *pnew;
2711 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002712
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002713 assert(PyType_IsSubtype(type, &PyBytes_Type));
2714 tmp = bytes_new(&PyBytes_Type, args, kwds);
2715 if (tmp == NULL)
2716 return NULL;
2717 assert(PyBytes_CheckExact(tmp));
2718 n = PyBytes_GET_SIZE(tmp);
2719 pnew = type->tp_alloc(type, n);
2720 if (pnew != NULL) {
2721 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2722 PyBytes_AS_STRING(tmp), n+1);
2723 ((PyBytesObject *)pnew)->ob_shash =
2724 ((PyBytesObject *)tmp)->ob_shash;
2725 }
2726 Py_DECREF(tmp);
2727 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002728}
2729
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002730PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002731"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002732bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002733bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002734bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2735bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002736\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002737Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002738 - an iterable yielding integers in range(256)\n\
2739 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002740 - any object implementing the buffer API.\n\
2741 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002742
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002743static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002744
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002745PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002746 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2747 "bytes",
2748 PyBytesObject_SIZE,
2749 sizeof(char),
2750 bytes_dealloc, /* tp_dealloc */
2751 0, /* tp_print */
2752 0, /* tp_getattr */
2753 0, /* tp_setattr */
2754 0, /* tp_reserved */
2755 (reprfunc)bytes_repr, /* tp_repr */
2756 0, /* tp_as_number */
2757 &bytes_as_sequence, /* tp_as_sequence */
2758 &bytes_as_mapping, /* tp_as_mapping */
2759 (hashfunc)bytes_hash, /* tp_hash */
2760 0, /* tp_call */
2761 bytes_str, /* tp_str */
2762 PyObject_GenericGetAttr, /* tp_getattro */
2763 0, /* tp_setattro */
2764 &bytes_as_buffer, /* tp_as_buffer */
2765 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2766 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2767 bytes_doc, /* tp_doc */
2768 0, /* tp_traverse */
2769 0, /* tp_clear */
2770 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2771 0, /* tp_weaklistoffset */
2772 bytes_iter, /* tp_iter */
2773 0, /* tp_iternext */
2774 bytes_methods, /* tp_methods */
2775 0, /* tp_members */
2776 0, /* tp_getset */
2777 &PyBaseObject_Type, /* tp_base */
2778 0, /* tp_dict */
2779 0, /* tp_descr_get */
2780 0, /* tp_descr_set */
2781 0, /* tp_dictoffset */
2782 0, /* tp_init */
2783 0, /* tp_alloc */
2784 bytes_new, /* tp_new */
2785 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002786};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002787
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002788void
2789PyBytes_Concat(register PyObject **pv, register PyObject *w)
2790{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002791 register PyObject *v;
2792 assert(pv != NULL);
2793 if (*pv == NULL)
2794 return;
2795 if (w == NULL) {
2796 Py_DECREF(*pv);
2797 *pv = NULL;
2798 return;
2799 }
2800 v = bytes_concat(*pv, w);
2801 Py_DECREF(*pv);
2802 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002803}
2804
2805void
2806PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
2807{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002808 PyBytes_Concat(pv, w);
2809 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002810}
2811
2812
2813/* The following function breaks the notion that strings are immutable:
2814 it changes the size of a string. We get away with this only if there
2815 is only one module referencing the object. You can also think of it
2816 as creating a new string object and destroying the old one, only
2817 more efficiently. In any case, don't use this if the string may
2818 already be known to some other part of the code...
2819 Note that if there's not enough memory to resize the string, the original
2820 string object at *pv is deallocated, *pv is set to NULL, an "out of
2821 memory" exception is set, and -1 is returned. Else (on success) 0 is
2822 returned, and the value in *pv may or may not be the same as on input.
2823 As always, an extra byte is allocated for a trailing \0 byte (newsize
2824 does *not* include that), and a trailing \0 byte is stored.
2825*/
2826
2827int
2828_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2829{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002830 register PyObject *v;
2831 register PyBytesObject *sv;
2832 v = *pv;
2833 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2834 *pv = 0;
2835 Py_DECREF(v);
2836 PyErr_BadInternalCall();
2837 return -1;
2838 }
2839 /* XXX UNREF/NEWREF interface should be more symmetrical */
2840 _Py_DEC_REFTOTAL;
2841 _Py_ForgetReference(v);
2842 *pv = (PyObject *)
2843 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2844 if (*pv == NULL) {
2845 PyObject_Del(v);
2846 PyErr_NoMemory();
2847 return -1;
2848 }
2849 _Py_NewReference(*pv);
2850 sv = (PyBytesObject *) *pv;
2851 Py_SIZE(sv) = newsize;
2852 sv->ob_sval[newsize] = '\0';
2853 sv->ob_shash = -1; /* invalidate cached hash value */
2854 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002855}
2856
2857/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
2858 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2859 * Python's regular ints.
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002860 * Return value: a new PyBytes*, or NULL if error.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002861 * . *pbuf is set to point into it,
2862 * *plen set to the # of chars following that.
2863 * Caller must decref it when done using pbuf.
2864 * The string starting at *pbuf is of the form
2865 * "-"? ("0x" | "0X")? digit+
2866 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2867 * set in flags. The case of hex digits will be correct,
2868 * There will be at least prec digits, zero-filled on the left if
2869 * necessary to get that many.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002870 * val object to be converted
2871 * flags bitmask of format flags; only F_ALT is looked at
2872 * prec minimum number of digits; 0-fill on left if needed
2873 * type a character in [duoxX]; u acts the same as d
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002874 *
2875 * CAUTION: o, x and X conversions on regular ints can never
2876 * produce a '-' sign, but can for Python's unbounded ints.
2877 */
2878PyObject*
2879_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002880 char **pbuf, int *plen)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002881{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002882 PyObject *result = NULL;
2883 char *buf;
2884 Py_ssize_t i;
2885 int sign; /* 1 if '-', else 0 */
2886 int len; /* number of characters */
2887 Py_ssize_t llen;
2888 int numdigits; /* len == numnondigits + numdigits */
2889 int numnondigits = 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002890
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002891 /* Avoid exceeding SSIZE_T_MAX */
2892 if (prec > INT_MAX-3) {
2893 PyErr_SetString(PyExc_OverflowError,
2894 "precision too large");
2895 return NULL;
2896 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002897
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002898 switch (type) {
2899 case 'd':
2900 case 'u':
2901 /* Special-case boolean: we want 0/1 */
2902 if (PyBool_Check(val))
2903 result = PyNumber_ToBase(val, 10);
2904 else
2905 result = Py_TYPE(val)->tp_str(val);
2906 break;
2907 case 'o':
2908 numnondigits = 2;
2909 result = PyNumber_ToBase(val, 8);
2910 break;
2911 case 'x':
2912 case 'X':
2913 numnondigits = 2;
2914 result = PyNumber_ToBase(val, 16);
2915 break;
2916 default:
2917 assert(!"'type' not in [duoxX]");
2918 }
2919 if (!result)
2920 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002921
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002922 buf = _PyUnicode_AsString(result);
2923 if (!buf) {
2924 Py_DECREF(result);
2925 return NULL;
2926 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002927
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002928 /* To modify the string in-place, there can only be one reference. */
2929 if (Py_REFCNT(result) != 1) {
2930 PyErr_BadInternalCall();
2931 return NULL;
2932 }
2933 llen = PyUnicode_GetSize(result);
2934 if (llen > INT_MAX) {
2935 PyErr_SetString(PyExc_ValueError,
2936 "string too large in _PyBytes_FormatLong");
2937 return NULL;
2938 }
2939 len = (int)llen;
2940 if (buf[len-1] == 'L') {
2941 --len;
2942 buf[len] = '\0';
2943 }
2944 sign = buf[0] == '-';
2945 numnondigits += sign;
2946 numdigits = len - numnondigits;
2947 assert(numdigits > 0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002948
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002949 /* Get rid of base marker unless F_ALT */
2950 if (((flags & F_ALT) == 0 &&
2951 (type == 'o' || type == 'x' || type == 'X'))) {
2952 assert(buf[sign] == '0');
2953 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
2954 buf[sign+1] == 'o');
2955 numnondigits -= 2;
2956 buf += 2;
2957 len -= 2;
2958 if (sign)
2959 buf[0] = '-';
2960 assert(len == numnondigits + numdigits);
2961 assert(numdigits > 0);
2962 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002963
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002964 /* Fill with leading zeroes to meet minimum width. */
2965 if (prec > numdigits) {
2966 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
2967 numnondigits + prec);
2968 char *b1;
2969 if (!r1) {
2970 Py_DECREF(result);
2971 return NULL;
2972 }
2973 b1 = PyBytes_AS_STRING(r1);
2974 for (i = 0; i < numnondigits; ++i)
2975 *b1++ = *buf++;
2976 for (i = 0; i < prec - numdigits; i++)
2977 *b1++ = '0';
2978 for (i = 0; i < numdigits; i++)
2979 *b1++ = *buf++;
2980 *b1 = '\0';
2981 Py_DECREF(result);
2982 result = r1;
2983 buf = PyBytes_AS_STRING(result);
2984 len = numnondigits + prec;
2985 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002986
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002987 /* Fix up case for hex conversions. */
2988 if (type == 'X') {
2989 /* Need to convert all lower case letters to upper case.
2990 and need to convert 0x to 0X (and -0x to -0X). */
2991 for (i = 0; i < len; i++)
2992 if (buf[i] >= 'a' && buf[i] <= 'x')
2993 buf[i] -= 'a'-'A';
2994 }
2995 *pbuf = buf;
2996 *plen = len;
2997 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002998}
2999
3000void
3001PyBytes_Fini(void)
3002{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003003 int i;
3004 for (i = 0; i < UCHAR_MAX + 1; i++) {
3005 Py_XDECREF(characters[i]);
3006 characters[i] = NULL;
3007 }
3008 Py_XDECREF(nullstring);
3009 nullstring = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003010}
3011
Benjamin Peterson4116f362008-05-27 00:36:20 +00003012/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003013
3014typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003015 PyObject_HEAD
3016 Py_ssize_t it_index;
3017 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003018} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003019
3020static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003021striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003022{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003023 _PyObject_GC_UNTRACK(it);
3024 Py_XDECREF(it->it_seq);
3025 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003026}
3027
3028static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003029striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003030{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003031 Py_VISIT(it->it_seq);
3032 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003033}
3034
3035static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003036striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003037{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003038 PyBytesObject *seq;
3039 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003040
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003041 assert(it != NULL);
3042 seq = it->it_seq;
3043 if (seq == NULL)
3044 return NULL;
3045 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003046
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003047 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3048 item = PyLong_FromLong(
3049 (unsigned char)seq->ob_sval[it->it_index]);
3050 if (item != NULL)
3051 ++it->it_index;
3052 return item;
3053 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003054
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003055 Py_DECREF(seq);
3056 it->it_seq = NULL;
3057 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003058}
3059
3060static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003061striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003062{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003063 Py_ssize_t len = 0;
3064 if (it->it_seq)
3065 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3066 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003067}
3068
3069PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003070 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003071
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003072static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003073 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3074 length_hint_doc},
3075 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003076};
3077
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003078PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003079 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3080 "bytes_iterator", /* tp_name */
3081 sizeof(striterobject), /* tp_basicsize */
3082 0, /* tp_itemsize */
3083 /* methods */
3084 (destructor)striter_dealloc, /* tp_dealloc */
3085 0, /* tp_print */
3086 0, /* tp_getattr */
3087 0, /* tp_setattr */
3088 0, /* tp_reserved */
3089 0, /* tp_repr */
3090 0, /* tp_as_number */
3091 0, /* tp_as_sequence */
3092 0, /* tp_as_mapping */
3093 0, /* tp_hash */
3094 0, /* tp_call */
3095 0, /* tp_str */
3096 PyObject_GenericGetAttr, /* tp_getattro */
3097 0, /* tp_setattro */
3098 0, /* tp_as_buffer */
3099 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3100 0, /* tp_doc */
3101 (traverseproc)striter_traverse, /* tp_traverse */
3102 0, /* tp_clear */
3103 0, /* tp_richcompare */
3104 0, /* tp_weaklistoffset */
3105 PyObject_SelfIter, /* tp_iter */
3106 (iternextfunc)striter_next, /* tp_iternext */
3107 striter_methods, /* tp_methods */
3108 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003109};
3110
3111static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003112bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003113{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003114 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003115
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003116 if (!PyBytes_Check(seq)) {
3117 PyErr_BadInternalCall();
3118 return NULL;
3119 }
3120 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3121 if (it == NULL)
3122 return NULL;
3123 it->it_index = 0;
3124 Py_INCREF(seq);
3125 it->it_seq = (PyBytesObject *)seq;
3126 _PyObject_GC_TRACK(it);
3127 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003128}