blob: 796e400a822051afa6cccecc4da3ebba6b473785 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroud1188562010-06-09 16:38:55 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
44 For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
45 parameter `size' denotes number of characters to allocate, not counting any
46 null terminating character.
47
48 For PyBytes_FromString(), the parameter `str' points to a null-terminated
49 string containing exactly `size' bytes.
50
51 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
52 either NULL or else points to a string containing at least `size' bytes.
53 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
54 not have to be null-terminated. (Therefore it is safe to construct a
55 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
56 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
57 bytes (setting the last byte to the null terminating character) and you can
58 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000059 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000060 alter the data yourself, since the strings may be shared.
61
62 The PyObject member `op->ob_size', which denotes the number of "extra
63 items" in a variable-size object, will contain the number of bytes
64 allocated for string data, not counting the null terminating character. It
65 is therefore equal to the equal to the `size' parameter (for
66 PyBytes_FromStringAndSize()) or the length of the string in the `str'
67 parameter (for PyBytes_FromString()).
68*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000069PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000071{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 register PyBytesObject *op;
73 if (size < 0) {
74 PyErr_SetString(PyExc_SystemError,
75 "Negative size passed to PyBytes_FromStringAndSize");
76 return NULL;
77 }
78 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000079#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000081#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85 if (size == 1 && str != NULL &&
86 (op = characters[*str & UCHAR_MAX]) != NULL)
87 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000088#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 Py_INCREF(op);
92 return (PyObject *)op;
93 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
96 PyErr_SetString(PyExc_OverflowError,
97 "byte string is too large");
98 return NULL;
99 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +0000100
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101 /* Inline PyObject_NewVar */
102 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
103 if (op == NULL)
104 return PyErr_NoMemory();
105 PyObject_INIT_VAR(op, &PyBytes_Type, size);
106 op->ob_shash = -1;
107 if (str != NULL)
108 Py_MEMCPY(op->ob_sval, str, size);
109 op->ob_sval[size] = '\0';
110 /* share short strings */
111 if (size == 0) {
112 nullstring = op;
113 Py_INCREF(op);
114 } else if (size == 1 && str != NULL) {
115 characters[*str & UCHAR_MAX] = op;
116 Py_INCREF(op);
117 }
118 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000119}
120
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000121PyObject *
122PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000123{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000124 register size_t size;
125 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000127 assert(str != NULL);
128 size = strlen(str);
129 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
130 PyErr_SetString(PyExc_OverflowError,
131 "byte string is too long");
132 return NULL;
133 }
134 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000135#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000136 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000137#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
141 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000144#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 /* Inline PyObject_NewVar */
150 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
151 if (op == NULL)
152 return PyErr_NoMemory();
153 PyObject_INIT_VAR(op, &PyBytes_Type, size);
154 op->ob_shash = -1;
155 Py_MEMCPY(op->ob_sval, str, size+1);
156 /* share short strings */
157 if (size == 0) {
158 nullstring = op;
159 Py_INCREF(op);
160 } else if (size == 1) {
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000165}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000166
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167PyObject *
168PyBytes_FromFormatV(const char *format, va_list vargs)
169{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000175
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000176 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 /* step 1: figure out how large a buffer we need */
178 for (f = format; *f; f++) {
179 if (*f == '%') {
180 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000181 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000183
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
185 * they don't affect the amount of space we reserve.
186 */
187 if ((*f == 'l' || *f == 'z') &&
188 (f[1] == 'd' || f[1] == 'u'))
189 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 switch (*f) {
192 case 'c':
193 (void)va_arg(count, int);
194 /* fall through... */
195 case '%':
196 n++;
197 break;
198 case 'd': case 'u': case 'i': case 'x':
199 (void) va_arg(count, int);
200 /* 20 bytes is enough to hold a 64-bit
201 integer. Decimal takes the most space.
202 This isn't enough for octal. */
203 n += 20;
204 break;
205 case 's':
206 s = va_arg(count, char*);
207 n += strlen(s);
208 break;
209 case 'p':
210 (void) va_arg(count, int);
211 /* maximum 64-bit pointer representation:
212 * 0xffffffffffffffff
213 * so 19 characters is enough.
214 * XXX I count 18 -- what's the extra for?
215 */
216 n += 19;
217 break;
218 default:
219 /* if we stumble upon an unknown
220 formatting code, copy the rest of
221 the format string to the output
222 string. (we cannot just skip the
223 code, since there's no way to know
224 what's in the argument list) */
225 n += strlen(p);
226 goto expand;
227 }
228 } else
229 n++;
230 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000231 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000232 /* step 2: fill the buffer */
233 /* Since we've analyzed how much space we need for the worst case,
234 use sprintf directly instead of the slower PyOS_snprintf. */
235 string = PyBytes_FromStringAndSize(NULL, n);
236 if (!string)
237 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000238
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000239 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000240
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000241 for (f = format; *f; f++) {
242 if (*f == '%') {
243 const char* p = f++;
244 Py_ssize_t i;
245 int longflag = 0;
246 int size_tflag = 0;
247 /* parse the width.precision part (we're only
248 interested in the precision value, if any) */
249 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000250 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000251 n = (n*10) + *f++ - '0';
252 if (*f == '.') {
253 f++;
254 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000255 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 n = (n*10) + *f++ - '0';
257 }
David Malcolm96960882010-11-05 17:23:41 +0000258 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000259 f++;
260 /* handle the long flag, but only for %ld and %lu.
261 others can be added when necessary. */
262 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
263 longflag = 1;
264 ++f;
265 }
266 /* handle the size_t flag. */
267 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
268 size_tflag = 1;
269 ++f;
270 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000271
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 switch (*f) {
273 case 'c':
274 *s++ = va_arg(vargs, int);
275 break;
276 case 'd':
277 if (longflag)
278 sprintf(s, "%ld", va_arg(vargs, long));
279 else if (size_tflag)
280 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
281 va_arg(vargs, Py_ssize_t));
282 else
283 sprintf(s, "%d", va_arg(vargs, int));
284 s += strlen(s);
285 break;
286 case 'u':
287 if (longflag)
288 sprintf(s, "%lu",
289 va_arg(vargs, unsigned long));
290 else if (size_tflag)
291 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
292 va_arg(vargs, size_t));
293 else
294 sprintf(s, "%u",
295 va_arg(vargs, unsigned int));
296 s += strlen(s);
297 break;
298 case 'i':
299 sprintf(s, "%i", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 'x':
303 sprintf(s, "%x", va_arg(vargs, int));
304 s += strlen(s);
305 break;
306 case 's':
307 p = va_arg(vargs, char*);
308 i = strlen(p);
309 if (n > 0 && i > n)
310 i = n;
311 Py_MEMCPY(s, p, i);
312 s += i;
313 break;
314 case 'p':
315 sprintf(s, "%p", va_arg(vargs, void*));
316 /* %p is ill-defined: ensure leading 0x. */
317 if (s[1] == 'X')
318 s[1] = 'x';
319 else if (s[1] != 'x') {
320 memmove(s+2, s, strlen(s)+1);
321 s[0] = '0';
322 s[1] = 'x';
323 }
324 s += strlen(s);
325 break;
326 case '%':
327 *s++ = '%';
328 break;
329 default:
330 strcpy(s, p);
331 s += strlen(s);
332 goto end;
333 }
334 } else
335 *s++ = *f;
336 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000337
338 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000339 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
340 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000341}
342
343PyObject *
344PyBytes_FromFormat(const char *format, ...)
345{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 PyObject* ret;
347 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000348
349#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000351#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000352 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000353#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000354 ret = PyBytes_FromFormatV(format, vargs);
355 va_end(vargs);
356 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000357}
358
359static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000360bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000361{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000362 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000363}
364
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000365/* Unescape a backslash-escaped string. If unicode is non-zero,
366 the string is a u-literal. If recode_encoding is non-zero,
367 the string is UTF-8 encoded and should be re-encoded in the
368 specified encoding. */
369
370PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 Py_ssize_t len,
372 const char *errors,
373 Py_ssize_t unicode,
374 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000375{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000376 int c;
377 char *p, *buf;
378 const char *end;
379 PyObject *v;
380 Py_ssize_t newlen = recode_encoding ? 4*len:len;
381 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
382 if (v == NULL)
383 return NULL;
384 p = buf = PyBytes_AsString(v);
385 end = s + len;
386 while (s < end) {
387 if (*s != '\\') {
388 non_esc:
389 if (recode_encoding && (*s & 0x80)) {
390 PyObject *u, *w;
391 char *r;
392 const char* t;
393 Py_ssize_t rn;
394 t = s;
395 /* Decode non-ASCII bytes as UTF-8. */
396 while (t < end && (*t & 0x80)) t++;
397 u = PyUnicode_DecodeUTF8(s, t - s, errors);
398 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000399
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000400 /* Recode them in target encoding. */
401 w = PyUnicode_AsEncodedString(
402 u, recode_encoding, errors);
403 Py_DECREF(u);
404 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000405
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000406 /* Append bytes to output buffer. */
407 assert(PyBytes_Check(w));
408 r = PyBytes_AS_STRING(w);
409 rn = PyBytes_GET_SIZE(w);
410 Py_MEMCPY(p, r, rn);
411 p += rn;
412 Py_DECREF(w);
413 s = t;
414 } else {
415 *p++ = *s++;
416 }
417 continue;
418 }
419 s++;
420 if (s==end) {
421 PyErr_SetString(PyExc_ValueError,
422 "Trailing \\ in string");
423 goto failed;
424 }
425 switch (*s++) {
426 /* XXX This assumes ASCII! */
427 case '\n': break;
428 case '\\': *p++ = '\\'; break;
429 case '\'': *p++ = '\''; break;
430 case '\"': *p++ = '\"'; break;
431 case 'b': *p++ = '\b'; break;
432 case 'f': *p++ = '\014'; break; /* FF */
433 case 't': *p++ = '\t'; break;
434 case 'n': *p++ = '\n'; break;
435 case 'r': *p++ = '\r'; break;
436 case 'v': *p++ = '\013'; break; /* VT */
437 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
438 case '0': case '1': case '2': case '3':
439 case '4': case '5': case '6': case '7':
440 c = s[-1] - '0';
441 if (s < end && '0' <= *s && *s <= '7') {
442 c = (c<<3) + *s++ - '0';
443 if (s < end && '0' <= *s && *s <= '7')
444 c = (c<<3) + *s++ - '0';
445 }
446 *p++ = c;
447 break;
448 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000449 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 unsigned int x = 0;
451 c = Py_CHARMASK(*s);
452 s++;
David Malcolm96960882010-11-05 17:23:41 +0000453 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000454 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000455 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000456 x = 10 + c - 'a';
457 else
458 x = 10 + c - 'A';
459 x = x << 4;
460 c = Py_CHARMASK(*s);
461 s++;
David Malcolm96960882010-11-05 17:23:41 +0000462 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000463 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000464 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000465 x += 10 + c - 'a';
466 else
467 x += 10 + c - 'A';
468 *p++ = x;
469 break;
470 }
471 if (!errors || strcmp(errors, "strict") == 0) {
472 PyErr_SetString(PyExc_ValueError,
473 "invalid \\x escape");
474 goto failed;
475 }
476 if (strcmp(errors, "replace") == 0) {
477 *p++ = '?';
478 } else if (strcmp(errors, "ignore") == 0)
479 /* do nothing */;
480 else {
481 PyErr_Format(PyExc_ValueError,
482 "decoding error; unknown "
483 "error handling code: %.400s",
484 errors);
485 goto failed;
486 }
487 default:
488 *p++ = '\\';
489 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200490 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000491 UTF-8 bytes may follow. */
492 }
493 }
494 if (p-buf < newlen)
495 _PyBytes_Resize(&v, p - buf);
496 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000497 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000498 Py_DECREF(v);
499 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000500}
501
502/* -------------------------------------------------------------------- */
503/* object api */
504
505Py_ssize_t
506PyBytes_Size(register PyObject *op)
507{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000508 if (!PyBytes_Check(op)) {
509 PyErr_Format(PyExc_TypeError,
510 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
511 return -1;
512 }
513 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000514}
515
516char *
517PyBytes_AsString(register PyObject *op)
518{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000519 if (!PyBytes_Check(op)) {
520 PyErr_Format(PyExc_TypeError,
521 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
522 return NULL;
523 }
524 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000525}
526
527int
528PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000529 register char **s,
530 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000531{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000532 if (s == NULL) {
533 PyErr_BadInternalCall();
534 return -1;
535 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000536
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000537 if (!PyBytes_Check(obj)) {
538 PyErr_Format(PyExc_TypeError,
539 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
540 return -1;
541 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000542
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000543 *s = PyBytes_AS_STRING(obj);
544 if (len != NULL)
545 *len = PyBytes_GET_SIZE(obj);
546 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
547 PyErr_SetString(PyExc_TypeError,
548 "expected bytes with no null");
549 return -1;
550 }
551 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000552}
Neal Norwitz6968b052007-02-27 19:02:19 +0000553
554/* -------------------------------------------------------------------- */
555/* Methods */
556
Eric Smith0923d1d2009-04-16 20:16:10 +0000557#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000558
559#include "stringlib/fastsearch.h"
560#include "stringlib/count.h"
561#include "stringlib/find.h"
562#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000563#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000564#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000565
Eric Smith0f78bff2009-11-30 01:01:42 +0000566#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000567
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000568PyObject *
569PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000570{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000571 static const char *hexdigits = "0123456789abcdef";
572 register PyBytesObject* op = (PyBytesObject*) obj;
573 Py_ssize_t length = Py_SIZE(op);
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000574 size_t newsize;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000575 PyObject *v;
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000576 if (length > (PY_SSIZE_T_MAX - 3) / 4) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000577 PyErr_SetString(PyExc_OverflowError,
578 "bytes object is too large to make repr");
579 return NULL;
580 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000581 newsize = 3 + 4 * length;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000582 v = PyUnicode_FromUnicode(NULL, newsize);
583 if (v == NULL) {
584 return NULL;
585 }
586 else {
587 register Py_ssize_t i;
588 register Py_UNICODE c;
589 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
590 int quote;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000591
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000592 /* Figure out which quote to use; single is preferred */
593 quote = '\'';
594 if (smartquotes) {
595 char *test, *start;
596 start = PyBytes_AS_STRING(op);
597 for (test = start; test < start+length; ++test) {
598 if (*test == '"') {
599 quote = '\''; /* back to single */
600 goto decided;
601 }
602 else if (*test == '\'')
603 quote = '"';
604 }
605 decided:
606 ;
607 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000608
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000609 *p++ = 'b', *p++ = quote;
610 for (i = 0; i < length; i++) {
611 /* There's at least enough room for a hex escape
612 and a closing quote. */
613 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
614 c = op->ob_sval[i];
615 if (c == quote || c == '\\')
616 *p++ = '\\', *p++ = c;
617 else if (c == '\t')
618 *p++ = '\\', *p++ = 't';
619 else if (c == '\n')
620 *p++ = '\\', *p++ = 'n';
621 else if (c == '\r')
622 *p++ = '\\', *p++ = 'r';
623 else if (c < ' ' || c >= 0x7f) {
624 *p++ = '\\';
625 *p++ = 'x';
626 *p++ = hexdigits[(c & 0xf0) >> 4];
627 *p++ = hexdigits[c & 0xf];
628 }
629 else
630 *p++ = c;
631 }
632 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
633 *p++ = quote;
634 *p = '\0';
635 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
636 Py_DECREF(v);
637 return NULL;
638 }
639 return v;
640 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000641}
642
Neal Norwitz6968b052007-02-27 19:02:19 +0000643static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000644bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000645{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000646 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000647}
648
Neal Norwitz6968b052007-02-27 19:02:19 +0000649static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000650bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000651{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000652 if (Py_BytesWarningFlag) {
653 if (PyErr_WarnEx(PyExc_BytesWarning,
654 "str() on a bytes instance", 1))
655 return NULL;
656 }
657 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000658}
659
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000660static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000661bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000662{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000663 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000664}
Neal Norwitz6968b052007-02-27 19:02:19 +0000665
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000666/* This is also used by PyBytes_Concat() */
667static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000668bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000669{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000670 Py_ssize_t size;
671 Py_buffer va, vb;
672 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000673
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000674 va.len = -1;
675 vb.len = -1;
676 if (_getbuffer(a, &va) < 0 ||
677 _getbuffer(b, &vb) < 0) {
678 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
679 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
680 goto done;
681 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000682
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000683 /* Optimize end cases */
684 if (va.len == 0 && PyBytes_CheckExact(b)) {
685 result = b;
686 Py_INCREF(result);
687 goto done;
688 }
689 if (vb.len == 0 && PyBytes_CheckExact(a)) {
690 result = a;
691 Py_INCREF(result);
692 goto done;
693 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000694
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000695 size = va.len + vb.len;
696 if (size < 0) {
697 PyErr_NoMemory();
698 goto done;
699 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000700
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000701 result = PyBytes_FromStringAndSize(NULL, size);
702 if (result != NULL) {
703 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
704 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
705 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000706
707 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000708 if (va.len != -1)
709 PyBuffer_Release(&va);
710 if (vb.len != -1)
711 PyBuffer_Release(&vb);
712 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000713}
Neal Norwitz6968b052007-02-27 19:02:19 +0000714
715static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000716bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000717{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000718 register Py_ssize_t i;
719 register Py_ssize_t j;
720 register Py_ssize_t size;
721 register PyBytesObject *op;
722 size_t nbytes;
723 if (n < 0)
724 n = 0;
725 /* watch out for overflows: the size can overflow int,
726 * and the # of bytes needed can overflow size_t
727 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000728 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000729 PyErr_SetString(PyExc_OverflowError,
730 "repeated bytes are too long");
731 return NULL;
732 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000733 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000734 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
735 Py_INCREF(a);
736 return (PyObject *)a;
737 }
738 nbytes = (size_t)size;
739 if (nbytes + PyBytesObject_SIZE <= nbytes) {
740 PyErr_SetString(PyExc_OverflowError,
741 "repeated bytes are too long");
742 return NULL;
743 }
744 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
745 if (op == NULL)
746 return PyErr_NoMemory();
747 PyObject_INIT_VAR(op, &PyBytes_Type, size);
748 op->ob_shash = -1;
749 op->ob_sval[size] = '\0';
750 if (Py_SIZE(a) == 1 && n > 0) {
751 memset(op->ob_sval, a->ob_sval[0] , n);
752 return (PyObject *) op;
753 }
754 i = 0;
755 if (i < size) {
756 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
757 i = Py_SIZE(a);
758 }
759 while (i < size) {
760 j = (i <= size-i) ? i : size-i;
761 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
762 i += j;
763 }
764 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000765}
766
Guido van Rossum98297ee2007-11-06 21:34:58 +0000767static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000768bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000769{
770 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
771 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000772 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000773 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000774 PyErr_Clear();
775 if (_getbuffer(arg, &varg) < 0)
776 return -1;
777 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
778 varg.buf, varg.len, 0);
779 PyBuffer_Release(&varg);
780 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000781 }
782 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000783 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
784 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000785 }
786
Antoine Pitrou0010d372010-08-15 17:12:55 +0000787 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000788}
789
Neal Norwitz6968b052007-02-27 19:02:19 +0000790static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000791bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000792{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000793 if (i < 0 || i >= Py_SIZE(a)) {
794 PyErr_SetString(PyExc_IndexError, "index out of range");
795 return NULL;
796 }
797 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000798}
799
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000800static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000801bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000802{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000803 int c;
804 Py_ssize_t len_a, len_b;
805 Py_ssize_t min_len;
806 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000807
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000808 /* Make sure both arguments are strings. */
809 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
810 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
811 (PyObject_IsInstance((PyObject*)a,
812 (PyObject*)&PyUnicode_Type) ||
813 PyObject_IsInstance((PyObject*)b,
814 (PyObject*)&PyUnicode_Type))) {
815 if (PyErr_WarnEx(PyExc_BytesWarning,
816 "Comparison between bytes and string", 1))
817 return NULL;
818 }
819 result = Py_NotImplemented;
820 goto out;
821 }
822 if (a == b) {
823 switch (op) {
824 case Py_EQ:case Py_LE:case Py_GE:
825 result = Py_True;
826 goto out;
827 case Py_NE:case Py_LT:case Py_GT:
828 result = Py_False;
829 goto out;
830 }
831 }
832 if (op == Py_EQ) {
833 /* Supporting Py_NE here as well does not save
834 much time, since Py_NE is rarely used. */
835 if (Py_SIZE(a) == Py_SIZE(b)
836 && (a->ob_sval[0] == b->ob_sval[0]
837 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
838 result = Py_True;
839 } else {
840 result = Py_False;
841 }
842 goto out;
843 }
844 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
845 min_len = (len_a < len_b) ? len_a : len_b;
846 if (min_len > 0) {
847 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
848 if (c==0)
849 c = memcmp(a->ob_sval, b->ob_sval, min_len);
850 } else
851 c = 0;
852 if (c == 0)
853 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
854 switch (op) {
855 case Py_LT: c = c < 0; break;
856 case Py_LE: c = c <= 0; break;
857 case Py_EQ: assert(0); break; /* unreachable */
858 case Py_NE: c = c != 0; break;
859 case Py_GT: c = c > 0; break;
860 case Py_GE: c = c >= 0; break;
861 default:
862 result = Py_NotImplemented;
863 goto out;
864 }
865 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000866 out:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000867 Py_INCREF(result);
868 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000869}
870
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000871static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000872bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000873{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000874 register Py_ssize_t len;
875 register unsigned char *p;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000876 register Py_hash_t x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000877
Benjamin Petersonf6622c82012-04-09 14:53:07 -0400878#ifdef Py_DEBUG
Benjamin Peterson69e97272012-02-21 11:08:50 -0500879 assert(_Py_HashSecret_Initialized);
Benjamin Petersonf6622c82012-04-09 14:53:07 -0400880#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000881 if (a->ob_shash != -1)
882 return a->ob_shash;
883 len = Py_SIZE(a);
Georg Brandl2daf6ae2012-02-20 19:54:16 +0100884 /*
885 We make the hash of the empty string be 0, rather than using
886 (prefix ^ suffix), since this slightly obfuscates the hash secret
887 */
888 if (len == 0) {
889 a->ob_shash = 0;
890 return 0;
891 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000892 p = (unsigned char *) a->ob_sval;
Georg Brandl2daf6ae2012-02-20 19:54:16 +0100893 x = _Py_HashSecret.prefix;
894 x ^= *p << 7;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000895 while (--len >= 0)
Gregory P. Smith63e6c322012-01-14 15:31:34 -0800896 x = (_PyHASH_MULTIPLIER*x) ^ *p++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000897 x ^= Py_SIZE(a);
Georg Brandl2daf6ae2012-02-20 19:54:16 +0100898 x ^= _Py_HashSecret.suffix;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000899 if (x == -1)
900 x = -2;
901 a->ob_shash = x;
902 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000903}
904
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000905static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000906bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000907{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000908 if (PyIndex_Check(item)) {
909 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
910 if (i == -1 && PyErr_Occurred())
911 return NULL;
912 if (i < 0)
913 i += PyBytes_GET_SIZE(self);
914 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
915 PyErr_SetString(PyExc_IndexError,
916 "index out of range");
917 return NULL;
918 }
919 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
920 }
921 else if (PySlice_Check(item)) {
922 Py_ssize_t start, stop, step, slicelength, cur, i;
923 char* source_buf;
924 char* result_buf;
925 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000926
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000927 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000928 PyBytes_GET_SIZE(self),
929 &start, &stop, &step, &slicelength) < 0) {
930 return NULL;
931 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000932
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000933 if (slicelength <= 0) {
934 return PyBytes_FromStringAndSize("", 0);
935 }
936 else if (start == 0 && step == 1 &&
937 slicelength == PyBytes_GET_SIZE(self) &&
938 PyBytes_CheckExact(self)) {
939 Py_INCREF(self);
940 return (PyObject *)self;
941 }
942 else if (step == 1) {
943 return PyBytes_FromStringAndSize(
944 PyBytes_AS_STRING(self) + start,
945 slicelength);
946 }
947 else {
948 source_buf = PyBytes_AS_STRING(self);
949 result = PyBytes_FromStringAndSize(NULL, slicelength);
950 if (result == NULL)
951 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000952
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000953 result_buf = PyBytes_AS_STRING(result);
954 for (cur = start, i = 0; i < slicelength;
955 cur += step, i++) {
956 result_buf[i] = source_buf[cur];
957 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000958
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000959 return result;
960 }
961 }
962 else {
963 PyErr_Format(PyExc_TypeError,
964 "byte indices must be integers, not %.200s",
965 Py_TYPE(item)->tp_name);
966 return NULL;
967 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000968}
969
970static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000971bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000972{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000973 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
974 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000975}
976
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000977static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000978 (lenfunc)bytes_length, /*sq_length*/
979 (binaryfunc)bytes_concat, /*sq_concat*/
980 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
981 (ssizeargfunc)bytes_item, /*sq_item*/
982 0, /*sq_slice*/
983 0, /*sq_ass_item*/
984 0, /*sq_ass_slice*/
985 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000986};
987
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000988static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000989 (lenfunc)bytes_length,
990 (binaryfunc)bytes_subscript,
991 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000992};
993
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000994static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000995 (getbufferproc)bytes_buffer_getbuffer,
996 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000997};
998
999
1000#define LEFTSTRIP 0
1001#define RIGHTSTRIP 1
1002#define BOTHSTRIP 2
1003
1004/* Arrays indexed by above */
1005static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1006
1007#define STRIPNAME(i) (stripformat[i]+3)
1008
Neal Norwitz6968b052007-02-27 19:02:19 +00001009PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001010"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001011\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001012Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001013If sep is not specified or is None, B is split on ASCII whitespace\n\
1014characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001015If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001016
1017static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001018bytes_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001019{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1021 Py_ssize_t maxsplit = -1;
1022 const char *s = PyBytes_AS_STRING(self), *sub;
1023 Py_buffer vsub;
1024 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001025
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001026 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1027 return NULL;
1028 if (maxsplit < 0)
1029 maxsplit = PY_SSIZE_T_MAX;
1030 if (subobj == Py_None)
1031 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1032 if (_getbuffer(subobj, &vsub) < 0)
1033 return NULL;
1034 sub = vsub.buf;
1035 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001036
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001037 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1038 PyBuffer_Release(&vsub);
1039 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001040}
1041
Neal Norwitz6968b052007-02-27 19:02:19 +00001042PyDoc_STRVAR(partition__doc__,
1043"B.partition(sep) -> (head, sep, tail)\n\
1044\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001045Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001046the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001047found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001048
1049static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001050bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001051{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001052 const char *sep;
1053 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001054
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001055 if (PyBytes_Check(sep_obj)) {
1056 sep = PyBytes_AS_STRING(sep_obj);
1057 sep_len = PyBytes_GET_SIZE(sep_obj);
1058 }
1059 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1060 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001061
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001062 return stringlib_partition(
1063 (PyObject*) self,
1064 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1065 sep_obj, sep, sep_len
1066 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001067}
1068
1069PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001070"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001071\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001072Search for the separator sep in B, starting at the end of B,\n\
1073and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001074part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001075bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001076
1077static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001078bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001079{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001080 const char *sep;
1081 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001082
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001083 if (PyBytes_Check(sep_obj)) {
1084 sep = PyBytes_AS_STRING(sep_obj);
1085 sep_len = PyBytes_GET_SIZE(sep_obj);
1086 }
1087 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1088 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001089
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001090 return stringlib_rpartition(
1091 (PyObject*) self,
1092 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1093 sep_obj, sep, sep_len
1094 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001095}
1096
Neal Norwitz6968b052007-02-27 19:02:19 +00001097PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001098"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001099\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001100Return a list of the sections in B, using sep as the delimiter,\n\
1101starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001102If sep is not given, B is split on ASCII whitespace characters\n\
1103(space, tab, return, newline, formfeed, vertical tab).\n\
1104If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001105
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001106
Neal Norwitz6968b052007-02-27 19:02:19 +00001107static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001108bytes_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001109{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001110 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1111 Py_ssize_t maxsplit = -1;
1112 const char *s = PyBytes_AS_STRING(self), *sub;
1113 Py_buffer vsub;
1114 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001115
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001116 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1117 return NULL;
1118 if (maxsplit < 0)
1119 maxsplit = PY_SSIZE_T_MAX;
1120 if (subobj == Py_None)
1121 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1122 if (_getbuffer(subobj, &vsub) < 0)
1123 return NULL;
1124 sub = vsub.buf;
1125 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001127 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1128 PyBuffer_Release(&vsub);
1129 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001130}
1131
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001132
1133PyDoc_STRVAR(join__doc__,
1134"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001135\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001136Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001137Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1138
Neal Norwitz6968b052007-02-27 19:02:19 +00001139static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001140bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001141{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001142 char *sep = PyBytes_AS_STRING(self);
1143 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1144 PyObject *res = NULL;
1145 char *p;
1146 Py_ssize_t seqlen = 0;
1147 size_t sz = 0;
1148 Py_ssize_t i;
1149 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001150
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001151 seq = PySequence_Fast(orig, "");
1152 if (seq == NULL) {
1153 return NULL;
1154 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001156 seqlen = PySequence_Size(seq);
1157 if (seqlen == 0) {
1158 Py_DECREF(seq);
1159 return PyBytes_FromString("");
1160 }
1161 if (seqlen == 1) {
1162 item = PySequence_Fast_GET_ITEM(seq, 0);
1163 if (PyBytes_CheckExact(item)) {
1164 Py_INCREF(item);
1165 Py_DECREF(seq);
1166 return item;
1167 }
1168 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001169
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001170 /* There are at least two things to join, or else we have a subclass
1171 * of the builtin types in the sequence.
1172 * Do a pre-pass to figure out the total amount of space we'll
1173 * need (sz), and see whether all argument are bytes.
1174 */
1175 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1176 for (i = 0; i < seqlen; i++) {
1177 const size_t old_sz = sz;
1178 item = PySequence_Fast_GET_ITEM(seq, i);
1179 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1180 PyErr_Format(PyExc_TypeError,
1181 "sequence item %zd: expected bytes,"
1182 " %.80s found",
1183 i, Py_TYPE(item)->tp_name);
1184 Py_DECREF(seq);
1185 return NULL;
1186 }
1187 sz += Py_SIZE(item);
1188 if (i != 0)
1189 sz += seplen;
1190 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1191 PyErr_SetString(PyExc_OverflowError,
1192 "join() result is too long for bytes");
1193 Py_DECREF(seq);
1194 return NULL;
1195 }
1196 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001197
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001198 /* Allocate result space. */
1199 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1200 if (res == NULL) {
1201 Py_DECREF(seq);
1202 return NULL;
1203 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001204
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001205 /* Catenate everything. */
1206 /* I'm not worried about a PyByteArray item growing because there's
1207 nowhere in this function where we release the GIL. */
1208 p = PyBytes_AS_STRING(res);
1209 for (i = 0; i < seqlen; ++i) {
1210 size_t n;
1211 char *q;
1212 if (i) {
1213 Py_MEMCPY(p, sep, seplen);
1214 p += seplen;
1215 }
1216 item = PySequence_Fast_GET_ITEM(seq, i);
1217 n = Py_SIZE(item);
1218 if (PyBytes_Check(item))
1219 q = PyBytes_AS_STRING(item);
1220 else
1221 q = PyByteArray_AS_STRING(item);
1222 Py_MEMCPY(p, q, n);
1223 p += n;
1224 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001225
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 Py_DECREF(seq);
1227 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001228}
1229
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001230PyObject *
1231_PyBytes_Join(PyObject *sep, PyObject *x)
1232{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001233 assert(sep != NULL && PyBytes_Check(sep));
1234 assert(x != NULL);
1235 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001236}
1237
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001238/* helper macro to fixup start/end slice values */
1239#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001240 if (end > len) \
1241 end = len; \
1242 else if (end < 0) { \
1243 end += len; \
1244 if (end < 0) \
1245 end = 0; \
1246 } \
1247 if (start < 0) { \
1248 start += len; \
1249 if (start < 0) \
1250 start = 0; \
1251 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001252
1253Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001254bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001255{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001256 PyObject *subobj;
1257 const char *sub;
1258 Py_ssize_t sub_len;
1259 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001260
Jesus Ceaac451502011-04-20 17:09:23 +02001261 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1262 args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001263 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001264
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001265 if (PyBytes_Check(subobj)) {
1266 sub = PyBytes_AS_STRING(subobj);
1267 sub_len = PyBytes_GET_SIZE(subobj);
1268 }
1269 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1270 /* XXX - the "expected a character buffer object" is pretty
1271 confusing for a non-expert. remap to something else ? */
1272 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001273
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001274 if (dir > 0)
1275 return stringlib_find_slice(
1276 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1277 sub, sub_len, start, end);
1278 else
1279 return stringlib_rfind_slice(
1280 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1281 sub, sub_len, start, end);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001282}
1283
1284
1285PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001286"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001287\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001288Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001289such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001290arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001291\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001292Return -1 on failure.");
1293
Neal Norwitz6968b052007-02-27 19:02:19 +00001294static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001295bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001296{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001297 Py_ssize_t result = bytes_find_internal(self, args, +1);
1298 if (result == -2)
1299 return NULL;
1300 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001301}
1302
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001303
1304PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001305"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001306\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001307Like B.find() but raise ValueError when the substring is not found.");
1308
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001309static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001310bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001311{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001312 Py_ssize_t result = bytes_find_internal(self, args, +1);
1313 if (result == -2)
1314 return NULL;
1315 if (result == -1) {
1316 PyErr_SetString(PyExc_ValueError,
1317 "substring not found");
1318 return NULL;
1319 }
1320 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001321}
1322
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001323
1324PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001325"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001326\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001327Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001328such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001329arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001330\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001331Return -1 on failure.");
1332
Neal Norwitz6968b052007-02-27 19:02:19 +00001333static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001334bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001335{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001336 Py_ssize_t result = bytes_find_internal(self, args, -1);
1337 if (result == -2)
1338 return NULL;
1339 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001340}
1341
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001342
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001343PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001344"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001345\n\
1346Like B.rfind() but raise ValueError when the substring is not found.");
1347
1348static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001349bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001350{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001351 Py_ssize_t result = bytes_find_internal(self, args, -1);
1352 if (result == -2)
1353 return NULL;
1354 if (result == -1) {
1355 PyErr_SetString(PyExc_ValueError,
1356 "substring not found");
1357 return NULL;
1358 }
1359 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001360}
1361
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001362
1363Py_LOCAL_INLINE(PyObject *)
1364do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001365{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001366 Py_buffer vsep;
1367 char *s = PyBytes_AS_STRING(self);
1368 Py_ssize_t len = PyBytes_GET_SIZE(self);
1369 char *sep;
1370 Py_ssize_t seplen;
1371 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001372
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001373 if (_getbuffer(sepobj, &vsep) < 0)
1374 return NULL;
1375 sep = vsep.buf;
1376 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001377
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001378 i = 0;
1379 if (striptype != RIGHTSTRIP) {
1380 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1381 i++;
1382 }
1383 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001384
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001385 j = len;
1386 if (striptype != LEFTSTRIP) {
1387 do {
1388 j--;
1389 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1390 j++;
1391 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001392
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001393 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001394
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001395 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1396 Py_INCREF(self);
1397 return (PyObject*)self;
1398 }
1399 else
1400 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001401}
1402
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001403
1404Py_LOCAL_INLINE(PyObject *)
1405do_strip(PyBytesObject *self, int striptype)
1406{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001407 char *s = PyBytes_AS_STRING(self);
1408 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001409
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001410 i = 0;
1411 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001412 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001413 i++;
1414 }
1415 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001416
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 j = len;
1418 if (striptype != LEFTSTRIP) {
1419 do {
1420 j--;
David Malcolm96960882010-11-05 17:23:41 +00001421 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 j++;
1423 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001424
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001425 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1426 Py_INCREF(self);
1427 return (PyObject*)self;
1428 }
1429 else
1430 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001431}
1432
1433
1434Py_LOCAL_INLINE(PyObject *)
1435do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1436{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001437 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001438
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001439 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1440 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001441
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 if (sep != NULL && sep != Py_None) {
1443 return do_xstrip(self, striptype, sep);
1444 }
1445 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001446}
1447
1448
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001449PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001450"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001451\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001452Strip leading and trailing bytes contained in the argument.\n\
Georg Brandlbeca27a2012-01-22 21:31:21 +01001453If the argument is omitted, strip leading and trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001454static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001455bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001456{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001457 if (PyTuple_GET_SIZE(args) == 0)
1458 return do_strip(self, BOTHSTRIP); /* Common case */
1459 else
1460 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001461}
1462
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001463
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001464PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001465"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001466\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001467Strip leading bytes contained in the argument.\n\
1468If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001469static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001470bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001471{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001472 if (PyTuple_GET_SIZE(args) == 0)
1473 return do_strip(self, LEFTSTRIP); /* Common case */
1474 else
1475 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001476}
1477
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001478
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001479PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001480"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001481\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001482Strip trailing bytes contained in the argument.\n\
1483If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001484static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001485bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001486{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001487 if (PyTuple_GET_SIZE(args) == 0)
1488 return do_strip(self, RIGHTSTRIP); /* Common case */
1489 else
1490 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001491}
Neal Norwitz6968b052007-02-27 19:02:19 +00001492
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001493
1494PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001495"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001496\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001497Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001498string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001499as in slice notation.");
1500
1501static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001502bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001503{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001504 PyObject *sub_obj;
1505 const char *str = PyBytes_AS_STRING(self), *sub;
1506 Py_ssize_t sub_len;
1507 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001508
Jesus Ceaac451502011-04-20 17:09:23 +02001509 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001510 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001511
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001512 if (PyBytes_Check(sub_obj)) {
1513 sub = PyBytes_AS_STRING(sub_obj);
1514 sub_len = PyBytes_GET_SIZE(sub_obj);
1515 }
1516 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1517 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001518
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001519 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001520
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001521 return PyLong_FromSsize_t(
1522 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1523 );
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001524}
1525
1526
1527PyDoc_STRVAR(translate__doc__,
1528"B.translate(table[, deletechars]) -> bytes\n\
1529\n\
1530Return a copy of B, where all characters occurring in the\n\
1531optional argument deletechars are removed, and the remaining\n\
1532characters have been mapped through the given translation\n\
1533table, which must be a bytes object of length 256.");
1534
1535static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001536bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001537{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001538 register char *input, *output;
1539 const char *table;
1540 register Py_ssize_t i, c, changed = 0;
1541 PyObject *input_obj = (PyObject*)self;
1542 const char *output_start, *del_table=NULL;
1543 Py_ssize_t inlen, tablen, dellen = 0;
1544 PyObject *result;
1545 int trans_table[256];
1546 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001547
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001548 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1549 &tableobj, &delobj))
1550 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001551
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001552 if (PyBytes_Check(tableobj)) {
1553 table = PyBytes_AS_STRING(tableobj);
1554 tablen = PyBytes_GET_SIZE(tableobj);
1555 }
1556 else if (tableobj == Py_None) {
1557 table = NULL;
1558 tablen = 256;
1559 }
1560 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1561 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001562
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001563 if (tablen != 256) {
1564 PyErr_SetString(PyExc_ValueError,
1565 "translation table must be 256 characters long");
1566 return NULL;
1567 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001568
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001569 if (delobj != NULL) {
1570 if (PyBytes_Check(delobj)) {
1571 del_table = PyBytes_AS_STRING(delobj);
1572 dellen = PyBytes_GET_SIZE(delobj);
1573 }
1574 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1575 return NULL;
1576 }
1577 else {
1578 del_table = NULL;
1579 dellen = 0;
1580 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001581
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001582 inlen = PyBytes_GET_SIZE(input_obj);
1583 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1584 if (result == NULL)
1585 return NULL;
1586 output_start = output = PyBytes_AsString(result);
1587 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001588
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001589 if (dellen == 0 && table != NULL) {
1590 /* If no deletions are required, use faster code */
1591 for (i = inlen; --i >= 0; ) {
1592 c = Py_CHARMASK(*input++);
1593 if (Py_CHARMASK((*output++ = table[c])) != c)
1594 changed = 1;
1595 }
1596 if (changed || !PyBytes_CheckExact(input_obj))
1597 return result;
1598 Py_DECREF(result);
1599 Py_INCREF(input_obj);
1600 return input_obj;
1601 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001602
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001603 if (table == NULL) {
1604 for (i = 0; i < 256; i++)
1605 trans_table[i] = Py_CHARMASK(i);
1606 } else {
1607 for (i = 0; i < 256; i++)
1608 trans_table[i] = Py_CHARMASK(table[i]);
1609 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001610
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001611 for (i = 0; i < dellen; i++)
1612 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001613
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001614 for (i = inlen; --i >= 0; ) {
1615 c = Py_CHARMASK(*input++);
1616 if (trans_table[c] != -1)
1617 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1618 continue;
1619 changed = 1;
1620 }
1621 if (!changed && PyBytes_CheckExact(input_obj)) {
1622 Py_DECREF(result);
1623 Py_INCREF(input_obj);
1624 return input_obj;
1625 }
1626 /* Fix the size of the resulting string */
1627 if (inlen > 0)
1628 _PyBytes_Resize(&result, output - output_start);
1629 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001630}
1631
1632
Georg Brandlabc38772009-04-12 15:51:51 +00001633static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001634bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001635{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001636 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001637}
1638
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001639/* find and count characters and substrings */
1640
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001641#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001642 ((char *)memchr((const void *)(target), c, target_len))
1643
1644/* String ops must return a string. */
1645/* If the object is subclass of string, create a copy */
1646Py_LOCAL(PyBytesObject *)
1647return_self(PyBytesObject *self)
1648{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001649 if (PyBytes_CheckExact(self)) {
1650 Py_INCREF(self);
1651 return self;
1652 }
1653 return (PyBytesObject *)PyBytes_FromStringAndSize(
1654 PyBytes_AS_STRING(self),
1655 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001656}
1657
1658Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001659countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001660{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001661 Py_ssize_t count=0;
1662 const char *start=target;
1663 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001664
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001665 while ( (start=findchar(start, end-start, c)) != NULL ) {
1666 count++;
1667 if (count >= maxcount)
1668 break;
1669 start += 1;
1670 }
1671 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001672}
1673
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001674
1675/* Algorithms for different cases of string replacement */
1676
1677/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1678Py_LOCAL(PyBytesObject *)
1679replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001680 const char *to_s, Py_ssize_t to_len,
1681 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001682{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001683 char *self_s, *result_s;
1684 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001685 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001686 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001687
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001688 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001689
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001690 /* 1 at the end plus 1 after every character;
1691 count = min(maxcount, self_len + 1) */
1692 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001693 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001694 else
1695 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1696 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001697
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001698 /* Check for overflow */
1699 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001700 assert(count > 0);
1701 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001702 PyErr_SetString(PyExc_OverflowError,
1703 "replacement bytes are too long");
1704 return NULL;
1705 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001706 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001707
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001708 if (! (result = (PyBytesObject *)
1709 PyBytes_FromStringAndSize(NULL, result_len)) )
1710 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001711
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001712 self_s = PyBytes_AS_STRING(self);
1713 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001714
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001715 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001716
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001717 /* Lay the first one down (guaranteed this will occur) */
1718 Py_MEMCPY(result_s, to_s, to_len);
1719 result_s += to_len;
1720 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001721
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001722 for (i=0; i<count; i++) {
1723 *result_s++ = *self_s++;
1724 Py_MEMCPY(result_s, to_s, to_len);
1725 result_s += to_len;
1726 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001727
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001728 /* Copy the rest of the original string */
1729 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001730
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001731 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001732}
1733
1734/* Special case for deleting a single character */
1735/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1736Py_LOCAL(PyBytesObject *)
1737replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001738 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001739{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001740 char *self_s, *result_s;
1741 char *start, *next, *end;
1742 Py_ssize_t self_len, result_len;
1743 Py_ssize_t count;
1744 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001745
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001746 self_len = PyBytes_GET_SIZE(self);
1747 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001748
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001749 count = countchar(self_s, self_len, from_c, maxcount);
1750 if (count == 0) {
1751 return return_self(self);
1752 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001753
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001754 result_len = self_len - count; /* from_len == 1 */
1755 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001756
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001757 if ( (result = (PyBytesObject *)
1758 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1759 return NULL;
1760 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001761
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001762 start = self_s;
1763 end = self_s + self_len;
1764 while (count-- > 0) {
1765 next = findchar(start, end-start, from_c);
1766 if (next == NULL)
1767 break;
1768 Py_MEMCPY(result_s, start, next-start);
1769 result_s += (next-start);
1770 start = next+1;
1771 }
1772 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001773
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001774 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001775}
1776
1777/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1778
1779Py_LOCAL(PyBytesObject *)
1780replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001781 const char *from_s, Py_ssize_t from_len,
1782 Py_ssize_t maxcount) {
1783 char *self_s, *result_s;
1784 char *start, *next, *end;
1785 Py_ssize_t self_len, result_len;
1786 Py_ssize_t count, offset;
1787 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001788
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001789 self_len = PyBytes_GET_SIZE(self);
1790 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001791
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001792 count = stringlib_count(self_s, self_len,
1793 from_s, from_len,
1794 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001795
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001796 if (count == 0) {
1797 /* no matches */
1798 return return_self(self);
1799 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001800
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001801 result_len = self_len - (count * from_len);
1802 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001803
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001804 if ( (result = (PyBytesObject *)
1805 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1806 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001807
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001808 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001809
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001810 start = self_s;
1811 end = self_s + self_len;
1812 while (count-- > 0) {
1813 offset = stringlib_find(start, end-start,
1814 from_s, from_len,
1815 0);
1816 if (offset == -1)
1817 break;
1818 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001819
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001820 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001821
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001822 result_s += (next-start);
1823 start = next+from_len;
1824 }
1825 Py_MEMCPY(result_s, start, end-start);
1826 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001827}
1828
1829/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1830Py_LOCAL(PyBytesObject *)
1831replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001832 char from_c, char to_c,
1833 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001834{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001835 char *self_s, *result_s, *start, *end, *next;
1836 Py_ssize_t self_len;
1837 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001838
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001839 /* The result string will be the same size */
1840 self_s = PyBytes_AS_STRING(self);
1841 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001842
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001843 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001844
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001845 if (next == NULL) {
1846 /* No matches; return the original string */
1847 return return_self(self);
1848 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001849
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001850 /* Need to make a new string */
1851 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1852 if (result == NULL)
1853 return NULL;
1854 result_s = PyBytes_AS_STRING(result);
1855 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001856
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001857 /* change everything in-place, starting with this one */
1858 start = result_s + (next-self_s);
1859 *start = to_c;
1860 start++;
1861 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001862
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001863 while (--maxcount > 0) {
1864 next = findchar(start, end-start, from_c);
1865 if (next == NULL)
1866 break;
1867 *next = to_c;
1868 start = next+1;
1869 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001870
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001871 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001872}
1873
1874/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1875Py_LOCAL(PyBytesObject *)
1876replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001877 const char *from_s, Py_ssize_t from_len,
1878 const char *to_s, Py_ssize_t to_len,
1879 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001880{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001881 char *result_s, *start, *end;
1882 char *self_s;
1883 Py_ssize_t self_len, offset;
1884 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001885
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001886 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001887
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001888 self_s = PyBytes_AS_STRING(self);
1889 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001890
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001891 offset = stringlib_find(self_s, self_len,
1892 from_s, from_len,
1893 0);
1894 if (offset == -1) {
1895 /* No matches; return the original string */
1896 return return_self(self);
1897 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001898
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001899 /* Need to make a new string */
1900 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1901 if (result == NULL)
1902 return NULL;
1903 result_s = PyBytes_AS_STRING(result);
1904 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001905
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001906 /* change everything in-place, starting with this one */
1907 start = result_s + offset;
1908 Py_MEMCPY(start, to_s, from_len);
1909 start += from_len;
1910 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001911
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001912 while ( --maxcount > 0) {
1913 offset = stringlib_find(start, end-start,
1914 from_s, from_len,
1915 0);
1916 if (offset==-1)
1917 break;
1918 Py_MEMCPY(start+offset, to_s, from_len);
1919 start += offset+from_len;
1920 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001921
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001922 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001923}
1924
1925/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1926Py_LOCAL(PyBytesObject *)
1927replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001928 char from_c,
1929 const char *to_s, Py_ssize_t to_len,
1930 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001931{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001932 char *self_s, *result_s;
1933 char *start, *next, *end;
1934 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001935 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001936 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001937
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001938 self_s = PyBytes_AS_STRING(self);
1939 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001940
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001941 count = countchar(self_s, self_len, from_c, maxcount);
1942 if (count == 0) {
1943 /* no matches, return unchanged */
1944 return return_self(self);
1945 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001946
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001947 /* use the difference between current and new, hence the "-1" */
1948 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001949 assert(count > 0);
1950 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001951 PyErr_SetString(PyExc_OverflowError,
1952 "replacement bytes are too long");
1953 return NULL;
1954 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001955 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001956
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001957 if ( (result = (PyBytesObject *)
1958 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1959 return NULL;
1960 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001961
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001962 start = self_s;
1963 end = self_s + self_len;
1964 while (count-- > 0) {
1965 next = findchar(start, end-start, from_c);
1966 if (next == NULL)
1967 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001968
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001969 if (next == start) {
1970 /* replace with the 'to' */
1971 Py_MEMCPY(result_s, to_s, to_len);
1972 result_s += to_len;
1973 start += 1;
1974 } else {
1975 /* copy the unchanged old then the 'to' */
1976 Py_MEMCPY(result_s, start, next-start);
1977 result_s += (next-start);
1978 Py_MEMCPY(result_s, to_s, to_len);
1979 result_s += to_len;
1980 start = next+1;
1981 }
1982 }
1983 /* Copy the remainder of the remaining string */
1984 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001985
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001986 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001987}
1988
1989/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1990Py_LOCAL(PyBytesObject *)
1991replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001992 const char *from_s, Py_ssize_t from_len,
1993 const char *to_s, Py_ssize_t to_len,
1994 Py_ssize_t maxcount) {
1995 char *self_s, *result_s;
1996 char *start, *next, *end;
1997 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001998 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001999 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002000
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002001 self_s = PyBytes_AS_STRING(self);
2002 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002003
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002004 count = stringlib_count(self_s, self_len,
2005 from_s, from_len,
2006 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002007
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002008 if (count == 0) {
2009 /* no matches, return unchanged */
2010 return return_self(self);
2011 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002012
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002013 /* Check for overflow */
2014 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002015 assert(count > 0);
2016 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002017 PyErr_SetString(PyExc_OverflowError,
2018 "replacement bytes are too long");
2019 return NULL;
2020 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002021 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002022
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002023 if ( (result = (PyBytesObject *)
2024 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2025 return NULL;
2026 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002027
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002028 start = self_s;
2029 end = self_s + self_len;
2030 while (count-- > 0) {
2031 offset = stringlib_find(start, end-start,
2032 from_s, from_len,
2033 0);
2034 if (offset == -1)
2035 break;
2036 next = start+offset;
2037 if (next == start) {
2038 /* replace with the 'to' */
2039 Py_MEMCPY(result_s, to_s, to_len);
2040 result_s += to_len;
2041 start += from_len;
2042 } else {
2043 /* copy the unchanged old then the 'to' */
2044 Py_MEMCPY(result_s, start, next-start);
2045 result_s += (next-start);
2046 Py_MEMCPY(result_s, to_s, to_len);
2047 result_s += to_len;
2048 start = next+from_len;
2049 }
2050 }
2051 /* Copy the remainder of the remaining string */
2052 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002053
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002054 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002055}
2056
2057
2058Py_LOCAL(PyBytesObject *)
2059replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002060 const char *from_s, Py_ssize_t from_len,
2061 const char *to_s, Py_ssize_t to_len,
2062 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002063{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002064 if (maxcount < 0) {
2065 maxcount = PY_SSIZE_T_MAX;
2066 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2067 /* nothing to do; return the original string */
2068 return return_self(self);
2069 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002070
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002071 if (maxcount == 0 ||
2072 (from_len == 0 && to_len == 0)) {
2073 /* nothing to do; return the original string */
2074 return return_self(self);
2075 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002077 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002079 if (from_len == 0) {
2080 /* insert the 'to' string everywhere. */
2081 /* >>> "Python".replace("", ".") */
2082 /* '.P.y.t.h.o.n.' */
2083 return replace_interleave(self, to_s, to_len, maxcount);
2084 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002085
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002086 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2087 /* point for an empty self string to generate a non-empty string */
2088 /* Special case so the remaining code always gets a non-empty string */
2089 if (PyBytes_GET_SIZE(self) == 0) {
2090 return return_self(self);
2091 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002092
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002093 if (to_len == 0) {
2094 /* delete all occurrences of 'from' string */
2095 if (from_len == 1) {
2096 return replace_delete_single_character(
2097 self, from_s[0], maxcount);
2098 } else {
2099 return replace_delete_substring(self, from_s,
2100 from_len, maxcount);
2101 }
2102 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002103
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002104 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002106 if (from_len == to_len) {
2107 if (from_len == 1) {
2108 return replace_single_character_in_place(
2109 self,
2110 from_s[0],
2111 to_s[0],
2112 maxcount);
2113 } else {
2114 return replace_substring_in_place(
2115 self, from_s, from_len, to_s, to_len,
2116 maxcount);
2117 }
2118 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002119
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002120 /* Otherwise use the more generic algorithms */
2121 if (from_len == 1) {
2122 return replace_single_character(self, from_s[0],
2123 to_s, to_len, maxcount);
2124 } else {
2125 /* len('from')>=2, len('to')>=1 */
2126 return replace_substring(self, from_s, from_len, to_s, to_len,
2127 maxcount);
2128 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002129}
2130
2131PyDoc_STRVAR(replace__doc__,
2132"B.replace(old, new[, count]) -> bytes\n\
2133\n\
2134Return a copy of B with all occurrences of subsection\n\
2135old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002136given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002137
2138static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002139bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002140{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002141 Py_ssize_t count = -1;
2142 PyObject *from, *to;
2143 const char *from_s, *to_s;
2144 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002145
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002146 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2147 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002149 if (PyBytes_Check(from)) {
2150 from_s = PyBytes_AS_STRING(from);
2151 from_len = PyBytes_GET_SIZE(from);
2152 }
2153 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2154 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002156 if (PyBytes_Check(to)) {
2157 to_s = PyBytes_AS_STRING(to);
2158 to_len = PyBytes_GET_SIZE(to);
2159 }
2160 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2161 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002162
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002163 return (PyObject *)replace((PyBytesObject *) self,
2164 from_s, from_len,
2165 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002166}
2167
2168/** End DALKE **/
2169
2170/* Matches the end (direction >= 0) or start (direction < 0) of self
2171 * against substr, using the start and end arguments. Returns
2172 * -1 on error, 0 if not found and 1 if found.
2173 */
2174Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002175_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002176 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002177{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002178 Py_ssize_t len = PyBytes_GET_SIZE(self);
2179 Py_ssize_t slen;
2180 const char* sub;
2181 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002182
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002183 if (PyBytes_Check(substr)) {
2184 sub = PyBytes_AS_STRING(substr);
2185 slen = PyBytes_GET_SIZE(substr);
2186 }
2187 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2188 return -1;
2189 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002191 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002192
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002193 if (direction < 0) {
2194 /* startswith */
2195 if (start+slen > len)
2196 return 0;
2197 } else {
2198 /* endswith */
2199 if (end-start < slen || start > len)
2200 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002201
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002202 if (end-slen > start)
2203 start = end - slen;
2204 }
2205 if (end-start >= slen)
2206 return ! memcmp(str+start, sub, slen);
2207 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002208}
2209
2210
2211PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002212"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002213\n\
2214Return True if B starts with the specified prefix, False otherwise.\n\
2215With optional start, test B beginning at that position.\n\
2216With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002217prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002218
2219static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002220bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002221{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002222 Py_ssize_t start = 0;
2223 Py_ssize_t end = PY_SSIZE_T_MAX;
2224 PyObject *subobj;
2225 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002226
Jesus Ceaac451502011-04-20 17:09:23 +02002227 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002228 return NULL;
2229 if (PyTuple_Check(subobj)) {
2230 Py_ssize_t i;
2231 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2232 result = _bytes_tailmatch(self,
2233 PyTuple_GET_ITEM(subobj, i),
2234 start, end, -1);
2235 if (result == -1)
2236 return NULL;
2237 else if (result) {
2238 Py_RETURN_TRUE;
2239 }
2240 }
2241 Py_RETURN_FALSE;
2242 }
2243 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002244 if (result == -1) {
2245 if (PyErr_ExceptionMatches(PyExc_TypeError))
2246 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2247 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002248 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002249 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002250 else
2251 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002252}
2253
2254
2255PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002256"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002257\n\
2258Return True if B ends with the specified suffix, False otherwise.\n\
2259With optional start, test B beginning at that position.\n\
2260With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002261suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002262
2263static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002264bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002265{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002266 Py_ssize_t start = 0;
2267 Py_ssize_t end = PY_SSIZE_T_MAX;
2268 PyObject *subobj;
2269 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002270
Jesus Ceaac451502011-04-20 17:09:23 +02002271 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002272 return NULL;
2273 if (PyTuple_Check(subobj)) {
2274 Py_ssize_t i;
2275 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2276 result = _bytes_tailmatch(self,
2277 PyTuple_GET_ITEM(subobj, i),
2278 start, end, +1);
2279 if (result == -1)
2280 return NULL;
2281 else if (result) {
2282 Py_RETURN_TRUE;
2283 }
2284 }
2285 Py_RETURN_FALSE;
2286 }
2287 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002288 if (result == -1) {
2289 if (PyErr_ExceptionMatches(PyExc_TypeError))
2290 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2291 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002292 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002293 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002294 else
2295 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002296}
2297
2298
2299PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002300"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002301\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002302Decode B using the codec registered for encoding. Default encoding\n\
2303is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002304handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2305a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002306as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002307able to handle UnicodeDecodeErrors.");
2308
2309static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002310bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002311{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002312 const char *encoding = NULL;
2313 const char *errors = NULL;
2314 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002315
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002316 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2317 return NULL;
2318 if (encoding == NULL)
2319 encoding = PyUnicode_GetDefaultEncoding();
2320 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002321}
2322
Guido van Rossum20188312006-05-05 15:15:40 +00002323
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002324PyDoc_STRVAR(splitlines__doc__,
2325"B.splitlines([keepends]) -> list of lines\n\
2326\n\
2327Return a list of the lines in B, breaking at line boundaries.\n\
2328Line breaks are not included in the resulting list unless keepends\n\
2329is given and true.");
2330
2331static PyObject*
2332bytes_splitlines(PyObject *self, PyObject *args)
2333{
2334 int keepends = 0;
2335
2336 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002337 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002338
2339 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002340 (PyObject*) self, PyBytes_AS_STRING(self),
2341 PyBytes_GET_SIZE(self), keepends
2342 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002343}
2344
2345
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002346PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002347"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002348\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002349Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002350Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002351Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002352
2353static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002354hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002355{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002356 if (c >= 128)
2357 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002358 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002359 return c - '0';
2360 else {
David Malcolm96960882010-11-05 17:23:41 +00002361 if (Py_ISUPPER(c))
2362 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002363 if (c >= 'a' && c <= 'f')
2364 return c - 'a' + 10;
2365 }
2366 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002367}
2368
2369static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002370bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002371{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002372 PyObject *newstring, *hexobj;
2373 char *buf;
2374 Py_UNICODE *hex;
2375 Py_ssize_t hexlen, byteslen, i, j;
2376 int top, bot;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002377
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002378 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2379 return NULL;
2380 assert(PyUnicode_Check(hexobj));
2381 hexlen = PyUnicode_GET_SIZE(hexobj);
2382 hex = PyUnicode_AS_UNICODE(hexobj);
2383 byteslen = hexlen/2; /* This overestimates if there are spaces */
2384 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2385 if (!newstring)
2386 return NULL;
2387 buf = PyBytes_AS_STRING(newstring);
2388 for (i = j = 0; i < hexlen; i += 2) {
2389 /* skip over spaces in the input */
2390 while (hex[i] == ' ')
2391 i++;
2392 if (i >= hexlen)
2393 break;
2394 top = hex_digit_to_int(hex[i]);
2395 bot = hex_digit_to_int(hex[i+1]);
2396 if (top == -1 || bot == -1) {
2397 PyErr_Format(PyExc_ValueError,
2398 "non-hexadecimal number found in "
2399 "fromhex() arg at position %zd", i);
2400 goto error;
2401 }
2402 buf[j++] = (top << 4) + bot;
2403 }
2404 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2405 goto error;
2406 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002407
2408 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002409 Py_XDECREF(newstring);
2410 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002411}
2412
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002413PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002414"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002415
2416static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002417bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002418{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002419 Py_ssize_t res;
2420 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2421 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002422}
2423
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002424
2425static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002426bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002427{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002428 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002429}
2430
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002431
2432static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002433bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002434 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2435 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2436 _Py_capitalize__doc__},
2437 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2438 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2439 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2440 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2441 endswith__doc__},
2442 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2443 expandtabs__doc__},
2444 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2445 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2446 fromhex_doc},
2447 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2448 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2449 _Py_isalnum__doc__},
2450 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2451 _Py_isalpha__doc__},
2452 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2453 _Py_isdigit__doc__},
2454 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2455 _Py_islower__doc__},
2456 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2457 _Py_isspace__doc__},
2458 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2459 _Py_istitle__doc__},
2460 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2461 _Py_isupper__doc__},
2462 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2463 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2464 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2465 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2466 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2467 _Py_maketrans__doc__},
2468 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2469 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2470 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2471 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2472 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2473 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2474 rpartition__doc__},
2475 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2476 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
2477 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2478 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS,
2479 splitlines__doc__},
2480 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2481 startswith__doc__},
2482 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2483 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2484 _Py_swapcase__doc__},
2485 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2486 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2487 translate__doc__},
2488 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2489 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2490 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2491 sizeof__doc__},
2492 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002493};
2494
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002495static PyObject *
2496str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2497
2498static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002499bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002500{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002501 PyObject *x = NULL;
2502 const char *encoding = NULL;
2503 const char *errors = NULL;
2504 PyObject *new = NULL;
2505 Py_ssize_t size;
2506 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002507
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002508 if (type != &PyBytes_Type)
2509 return str_subtype_new(type, args, kwds);
2510 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2511 &encoding, &errors))
2512 return NULL;
2513 if (x == NULL) {
2514 if (encoding != NULL || errors != NULL) {
2515 PyErr_SetString(PyExc_TypeError,
2516 "encoding or errors without sequence "
2517 "argument");
2518 return NULL;
2519 }
2520 return PyBytes_FromString("");
2521 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002522
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002523 if (PyUnicode_Check(x)) {
2524 /* Encode via the codec registry */
2525 if (encoding == NULL) {
2526 PyErr_SetString(PyExc_TypeError,
2527 "string argument without an encoding");
2528 return NULL;
2529 }
2530 new = PyUnicode_AsEncodedString(x, encoding, errors);
2531 if (new == NULL)
2532 return NULL;
2533 assert(PyBytes_Check(new));
2534 return new;
2535 }
2536 /* Is it an integer? */
2537 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2538 if (size == -1 && PyErr_Occurred()) {
2539 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2540 return NULL;
2541 PyErr_Clear();
2542 }
2543 else if (size < 0) {
2544 PyErr_SetString(PyExc_ValueError, "negative count");
2545 return NULL;
2546 }
2547 else {
2548 new = PyBytes_FromStringAndSize(NULL, size);
2549 if (new == NULL) {
2550 return NULL;
2551 }
2552 if (size > 0) {
2553 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2554 }
2555 return new;
2556 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002557
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002558 /* If it's not unicode, there can't be encoding or errors */
2559 if (encoding != NULL || errors != NULL) {
2560 PyErr_SetString(PyExc_TypeError,
2561 "encoding or errors without a string argument");
2562 return NULL;
2563 }
2564 return PyObject_Bytes(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002565}
2566
2567PyObject *
2568PyBytes_FromObject(PyObject *x)
2569{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002570 PyObject *new, *it;
2571 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002572
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002573 if (x == NULL) {
2574 PyErr_BadInternalCall();
2575 return NULL;
2576 }
2577 /* Use the modern buffer interface */
2578 if (PyObject_CheckBuffer(x)) {
2579 Py_buffer view;
2580 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2581 return NULL;
2582 new = PyBytes_FromStringAndSize(NULL, view.len);
2583 if (!new)
2584 goto fail;
2585 /* XXX(brett.cannon): Better way to get to internal buffer? */
2586 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2587 &view, view.len, 'C') < 0)
2588 goto fail;
2589 PyBuffer_Release(&view);
2590 return new;
2591 fail:
2592 Py_XDECREF(new);
2593 PyBuffer_Release(&view);
2594 return NULL;
2595 }
2596 if (PyUnicode_Check(x)) {
2597 PyErr_SetString(PyExc_TypeError,
2598 "cannot convert unicode object to bytes");
2599 return NULL;
2600 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002601
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002602 if (PyList_CheckExact(x)) {
2603 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2604 if (new == NULL)
2605 return NULL;
2606 for (i = 0; i < Py_SIZE(x); i++) {
2607 Py_ssize_t value = PyNumber_AsSsize_t(
2608 PyList_GET_ITEM(x, i), PyExc_ValueError);
2609 if (value == -1 && PyErr_Occurred()) {
2610 Py_DECREF(new);
2611 return NULL;
2612 }
2613 if (value < 0 || value >= 256) {
2614 PyErr_SetString(PyExc_ValueError,
2615 "bytes must be in range(0, 256)");
2616 Py_DECREF(new);
2617 return NULL;
2618 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002619 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002620 }
2621 return new;
2622 }
2623 if (PyTuple_CheckExact(x)) {
2624 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2625 if (new == NULL)
2626 return NULL;
2627 for (i = 0; i < Py_SIZE(x); i++) {
2628 Py_ssize_t value = PyNumber_AsSsize_t(
2629 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2630 if (value == -1 && PyErr_Occurred()) {
2631 Py_DECREF(new);
2632 return NULL;
2633 }
2634 if (value < 0 || value >= 256) {
2635 PyErr_SetString(PyExc_ValueError,
2636 "bytes must be in range(0, 256)");
2637 Py_DECREF(new);
2638 return NULL;
2639 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002640 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002641 }
2642 return new;
2643 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002644
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002645 /* For iterator version, create a string object and resize as needed */
2646 size = _PyObject_LengthHint(x, 64);
2647 if (size == -1 && PyErr_Occurred())
2648 return NULL;
2649 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2650 returning a shared empty bytes string. This required because we
2651 want to call _PyBytes_Resize() the returned object, which we can
2652 only do on bytes objects with refcount == 1. */
2653 size += 1;
2654 new = PyBytes_FromStringAndSize(NULL, size);
2655 if (new == NULL)
2656 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002657
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002658 /* Get the iterator */
2659 it = PyObject_GetIter(x);
2660 if (it == NULL)
2661 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002662
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002663 /* Run the iterator to exhaustion */
2664 for (i = 0; ; i++) {
2665 PyObject *item;
2666 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002667
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002668 /* Get the next item */
2669 item = PyIter_Next(it);
2670 if (item == NULL) {
2671 if (PyErr_Occurred())
2672 goto error;
2673 break;
2674 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002675
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002676 /* Interpret it as an int (__index__) */
2677 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2678 Py_DECREF(item);
2679 if (value == -1 && PyErr_Occurred())
2680 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002681
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002682 /* Range check */
2683 if (value < 0 || value >= 256) {
2684 PyErr_SetString(PyExc_ValueError,
2685 "bytes must be in range(0, 256)");
2686 goto error;
2687 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002688
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002689 /* Append the byte */
2690 if (i >= size) {
2691 size = 2 * size + 1;
2692 if (_PyBytes_Resize(&new, size) < 0)
2693 goto error;
2694 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002695 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002696 }
2697 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002699 /* Clean up and return success */
2700 Py_DECREF(it);
2701 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002702
2703 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002704 /* Error handling when new != NULL */
2705 Py_XDECREF(it);
2706 Py_DECREF(new);
2707 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002708}
2709
2710static PyObject *
2711str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2712{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002713 PyObject *tmp, *pnew;
2714 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002715
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002716 assert(PyType_IsSubtype(type, &PyBytes_Type));
2717 tmp = bytes_new(&PyBytes_Type, args, kwds);
2718 if (tmp == NULL)
2719 return NULL;
2720 assert(PyBytes_CheckExact(tmp));
2721 n = PyBytes_GET_SIZE(tmp);
2722 pnew = type->tp_alloc(type, n);
2723 if (pnew != NULL) {
2724 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2725 PyBytes_AS_STRING(tmp), n+1);
2726 ((PyBytesObject *)pnew)->ob_shash =
2727 ((PyBytesObject *)tmp)->ob_shash;
2728 }
2729 Py_DECREF(tmp);
2730 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002731}
2732
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002733PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002734"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002735bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002736bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002737bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2738bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002739\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002740Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002741 - an iterable yielding integers in range(256)\n\
2742 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002743 - any object implementing the buffer API.\n\
2744 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002745
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002746static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002747
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002748PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002749 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2750 "bytes",
2751 PyBytesObject_SIZE,
2752 sizeof(char),
2753 bytes_dealloc, /* tp_dealloc */
2754 0, /* tp_print */
2755 0, /* tp_getattr */
2756 0, /* tp_setattr */
2757 0, /* tp_reserved */
2758 (reprfunc)bytes_repr, /* tp_repr */
2759 0, /* tp_as_number */
2760 &bytes_as_sequence, /* tp_as_sequence */
2761 &bytes_as_mapping, /* tp_as_mapping */
2762 (hashfunc)bytes_hash, /* tp_hash */
2763 0, /* tp_call */
2764 bytes_str, /* tp_str */
2765 PyObject_GenericGetAttr, /* tp_getattro */
2766 0, /* tp_setattro */
2767 &bytes_as_buffer, /* tp_as_buffer */
2768 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2769 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2770 bytes_doc, /* tp_doc */
2771 0, /* tp_traverse */
2772 0, /* tp_clear */
2773 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2774 0, /* tp_weaklistoffset */
2775 bytes_iter, /* tp_iter */
2776 0, /* tp_iternext */
2777 bytes_methods, /* tp_methods */
2778 0, /* tp_members */
2779 0, /* tp_getset */
2780 &PyBaseObject_Type, /* tp_base */
2781 0, /* tp_dict */
2782 0, /* tp_descr_get */
2783 0, /* tp_descr_set */
2784 0, /* tp_dictoffset */
2785 0, /* tp_init */
2786 0, /* tp_alloc */
2787 bytes_new, /* tp_new */
2788 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002789};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002790
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002791void
2792PyBytes_Concat(register PyObject **pv, register PyObject *w)
2793{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002794 register PyObject *v;
2795 assert(pv != NULL);
2796 if (*pv == NULL)
2797 return;
2798 if (w == NULL) {
2799 Py_DECREF(*pv);
2800 *pv = NULL;
2801 return;
2802 }
2803 v = bytes_concat(*pv, w);
2804 Py_DECREF(*pv);
2805 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002806}
2807
2808void
2809PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
2810{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002811 PyBytes_Concat(pv, w);
2812 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002813}
2814
2815
2816/* The following function breaks the notion that strings are immutable:
2817 it changes the size of a string. We get away with this only if there
2818 is only one module referencing the object. You can also think of it
2819 as creating a new string object and destroying the old one, only
2820 more efficiently. In any case, don't use this if the string may
2821 already be known to some other part of the code...
2822 Note that if there's not enough memory to resize the string, the original
2823 string object at *pv is deallocated, *pv is set to NULL, an "out of
2824 memory" exception is set, and -1 is returned. Else (on success) 0 is
2825 returned, and the value in *pv may or may not be the same as on input.
2826 As always, an extra byte is allocated for a trailing \0 byte (newsize
2827 does *not* include that), and a trailing \0 byte is stored.
2828*/
2829
2830int
2831_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2832{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002833 register PyObject *v;
2834 register PyBytesObject *sv;
2835 v = *pv;
2836 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2837 *pv = 0;
2838 Py_DECREF(v);
2839 PyErr_BadInternalCall();
2840 return -1;
2841 }
2842 /* XXX UNREF/NEWREF interface should be more symmetrical */
2843 _Py_DEC_REFTOTAL;
2844 _Py_ForgetReference(v);
2845 *pv = (PyObject *)
2846 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2847 if (*pv == NULL) {
2848 PyObject_Del(v);
2849 PyErr_NoMemory();
2850 return -1;
2851 }
2852 _Py_NewReference(*pv);
2853 sv = (PyBytesObject *) *pv;
2854 Py_SIZE(sv) = newsize;
2855 sv->ob_sval[newsize] = '\0';
2856 sv->ob_shash = -1; /* invalidate cached hash value */
2857 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002858}
2859
2860/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
2861 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2862 * Python's regular ints.
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002863 * Return value: a new PyBytes*, or NULL if error.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002864 * . *pbuf is set to point into it,
2865 * *plen set to the # of chars following that.
2866 * Caller must decref it when done using pbuf.
2867 * The string starting at *pbuf is of the form
2868 * "-"? ("0x" | "0X")? digit+
2869 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2870 * set in flags. The case of hex digits will be correct,
2871 * There will be at least prec digits, zero-filled on the left if
2872 * necessary to get that many.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002873 * val object to be converted
2874 * flags bitmask of format flags; only F_ALT is looked at
2875 * prec minimum number of digits; 0-fill on left if needed
2876 * type a character in [duoxX]; u acts the same as d
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002877 *
2878 * CAUTION: o, x and X conversions on regular ints can never
2879 * produce a '-' sign, but can for Python's unbounded ints.
2880 */
2881PyObject*
2882_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002883 char **pbuf, int *plen)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002884{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002885 PyObject *result = NULL;
2886 char *buf;
2887 Py_ssize_t i;
2888 int sign; /* 1 if '-', else 0 */
2889 int len; /* number of characters */
2890 Py_ssize_t llen;
2891 int numdigits; /* len == numnondigits + numdigits */
2892 int numnondigits = 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002893
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002894 /* Avoid exceeding SSIZE_T_MAX */
2895 if (prec > INT_MAX-3) {
2896 PyErr_SetString(PyExc_OverflowError,
2897 "precision too large");
2898 return NULL;
2899 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002900
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002901 switch (type) {
2902 case 'd':
2903 case 'u':
2904 /* Special-case boolean: we want 0/1 */
2905 if (PyBool_Check(val))
2906 result = PyNumber_ToBase(val, 10);
2907 else
2908 result = Py_TYPE(val)->tp_str(val);
2909 break;
2910 case 'o':
2911 numnondigits = 2;
2912 result = PyNumber_ToBase(val, 8);
2913 break;
2914 case 'x':
2915 case 'X':
2916 numnondigits = 2;
2917 result = PyNumber_ToBase(val, 16);
2918 break;
2919 default:
2920 assert(!"'type' not in [duoxX]");
2921 }
2922 if (!result)
2923 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002924
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002925 buf = _PyUnicode_AsString(result);
2926 if (!buf) {
2927 Py_DECREF(result);
2928 return NULL;
2929 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002930
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002931 /* To modify the string in-place, there can only be one reference. */
2932 if (Py_REFCNT(result) != 1) {
2933 PyErr_BadInternalCall();
2934 return NULL;
2935 }
2936 llen = PyUnicode_GetSize(result);
2937 if (llen > INT_MAX) {
2938 PyErr_SetString(PyExc_ValueError,
2939 "string too large in _PyBytes_FormatLong");
2940 return NULL;
2941 }
2942 len = (int)llen;
2943 if (buf[len-1] == 'L') {
2944 --len;
2945 buf[len] = '\0';
2946 }
2947 sign = buf[0] == '-';
2948 numnondigits += sign;
2949 numdigits = len - numnondigits;
2950 assert(numdigits > 0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002951
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002952 /* Get rid of base marker unless F_ALT */
2953 if (((flags & F_ALT) == 0 &&
2954 (type == 'o' || type == 'x' || type == 'X'))) {
2955 assert(buf[sign] == '0');
2956 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
2957 buf[sign+1] == 'o');
2958 numnondigits -= 2;
2959 buf += 2;
2960 len -= 2;
2961 if (sign)
2962 buf[0] = '-';
2963 assert(len == numnondigits + numdigits);
2964 assert(numdigits > 0);
2965 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002966
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002967 /* Fill with leading zeroes to meet minimum width. */
2968 if (prec > numdigits) {
2969 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
2970 numnondigits + prec);
2971 char *b1;
2972 if (!r1) {
2973 Py_DECREF(result);
2974 return NULL;
2975 }
2976 b1 = PyBytes_AS_STRING(r1);
2977 for (i = 0; i < numnondigits; ++i)
2978 *b1++ = *buf++;
2979 for (i = 0; i < prec - numdigits; i++)
2980 *b1++ = '0';
2981 for (i = 0; i < numdigits; i++)
2982 *b1++ = *buf++;
2983 *b1 = '\0';
2984 Py_DECREF(result);
2985 result = r1;
2986 buf = PyBytes_AS_STRING(result);
2987 len = numnondigits + prec;
2988 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002989
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002990 /* Fix up case for hex conversions. */
2991 if (type == 'X') {
2992 /* Need to convert all lower case letters to upper case.
2993 and need to convert 0x to 0X (and -0x to -0X). */
2994 for (i = 0; i < len; i++)
2995 if (buf[i] >= 'a' && buf[i] <= 'x')
2996 buf[i] -= 'a'-'A';
2997 }
2998 *pbuf = buf;
2999 *plen = len;
3000 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003001}
3002
3003void
3004PyBytes_Fini(void)
3005{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003006 int i;
3007 for (i = 0; i < UCHAR_MAX + 1; i++) {
3008 Py_XDECREF(characters[i]);
3009 characters[i] = NULL;
3010 }
3011 Py_XDECREF(nullstring);
3012 nullstring = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003013}
3014
Benjamin Peterson4116f362008-05-27 00:36:20 +00003015/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003016
3017typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003018 PyObject_HEAD
3019 Py_ssize_t it_index;
3020 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003021} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003022
3023static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003024striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003025{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003026 _PyObject_GC_UNTRACK(it);
3027 Py_XDECREF(it->it_seq);
3028 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003029}
3030
3031static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003032striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003033{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003034 Py_VISIT(it->it_seq);
3035 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003036}
3037
3038static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003039striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003040{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003041 PyBytesObject *seq;
3042 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003043
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003044 assert(it != NULL);
3045 seq = it->it_seq;
3046 if (seq == NULL)
3047 return NULL;
3048 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003049
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003050 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3051 item = PyLong_FromLong(
3052 (unsigned char)seq->ob_sval[it->it_index]);
3053 if (item != NULL)
3054 ++it->it_index;
3055 return item;
3056 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003057
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003058 Py_DECREF(seq);
3059 it->it_seq = NULL;
3060 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003061}
3062
3063static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003064striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003065{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003066 Py_ssize_t len = 0;
3067 if (it->it_seq)
3068 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3069 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003070}
3071
3072PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003073 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003074
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003075static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003076 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3077 length_hint_doc},
3078 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003079};
3080
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003081PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003082 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3083 "bytes_iterator", /* tp_name */
3084 sizeof(striterobject), /* tp_basicsize */
3085 0, /* tp_itemsize */
3086 /* methods */
3087 (destructor)striter_dealloc, /* tp_dealloc */
3088 0, /* tp_print */
3089 0, /* tp_getattr */
3090 0, /* tp_setattr */
3091 0, /* tp_reserved */
3092 0, /* tp_repr */
3093 0, /* tp_as_number */
3094 0, /* tp_as_sequence */
3095 0, /* tp_as_mapping */
3096 0, /* tp_hash */
3097 0, /* tp_call */
3098 0, /* tp_str */
3099 PyObject_GenericGetAttr, /* tp_getattro */
3100 0, /* tp_setattro */
3101 0, /* tp_as_buffer */
3102 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3103 0, /* tp_doc */
3104 (traverseproc)striter_traverse, /* tp_traverse */
3105 0, /* tp_clear */
3106 0, /* tp_richcompare */
3107 0, /* tp_weaklistoffset */
3108 PyObject_SelfIter, /* tp_iter */
3109 (iternextfunc)striter_next, /* tp_iternext */
3110 striter_methods, /* tp_methods */
3111 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003112};
3113
3114static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003115bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003116{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003117 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003118
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003119 if (!PyBytes_Check(seq)) {
3120 PyErr_BadInternalCall();
3121 return NULL;
3122 }
3123 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3124 if (it == NULL)
3125 return NULL;
3126 it->it_index = 0;
3127 Py_INCREF(seq);
3128 it->it_seq = (PyBytesObject *)seq;
3129 _PyObject_GC_TRACK(it);
3130 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003131}