blob: 8e35fa927697c505835eea221718de3d41457ea2 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroud1188562010-06-09 16:38:55 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
44 For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
45 parameter `size' denotes number of characters to allocate, not counting any
46 null terminating character.
47
48 For PyBytes_FromString(), the parameter `str' points to a null-terminated
49 string containing exactly `size' bytes.
50
51 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
52 either NULL or else points to a string containing at least `size' bytes.
53 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
54 not have to be null-terminated. (Therefore it is safe to construct a
55 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
56 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
57 bytes (setting the last byte to the null terminating character) and you can
58 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000059 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000060 alter the data yourself, since the strings may be shared.
61
62 The PyObject member `op->ob_size', which denotes the number of "extra
63 items" in a variable-size object, will contain the number of bytes
64 allocated for string data, not counting the null terminating character. It
65 is therefore equal to the equal to the `size' parameter (for
66 PyBytes_FromStringAndSize()) or the length of the string in the `str'
67 parameter (for PyBytes_FromString()).
68*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000069PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000071{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 register PyBytesObject *op;
73 if (size < 0) {
74 PyErr_SetString(PyExc_SystemError,
75 "Negative size passed to PyBytes_FromStringAndSize");
76 return NULL;
77 }
78 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000079#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000081#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85 if (size == 1 && str != NULL &&
86 (op = characters[*str & UCHAR_MAX]) != NULL)
87 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000088#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 Py_INCREF(op);
92 return (PyObject *)op;
93 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
96 PyErr_SetString(PyExc_OverflowError,
97 "byte string is too large");
98 return NULL;
99 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +0000100
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101 /* Inline PyObject_NewVar */
102 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
103 if (op == NULL)
104 return PyErr_NoMemory();
105 PyObject_INIT_VAR(op, &PyBytes_Type, size);
106 op->ob_shash = -1;
107 if (str != NULL)
108 Py_MEMCPY(op->ob_sval, str, size);
109 op->ob_sval[size] = '\0';
110 /* share short strings */
111 if (size == 0) {
112 nullstring = op;
113 Py_INCREF(op);
114 } else if (size == 1 && str != NULL) {
115 characters[*str & UCHAR_MAX] = op;
116 Py_INCREF(op);
117 }
118 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000119}
120
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000121PyObject *
122PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000123{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000124 register size_t size;
125 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000127 assert(str != NULL);
128 size = strlen(str);
129 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
130 PyErr_SetString(PyExc_OverflowError,
131 "byte string is too long");
132 return NULL;
133 }
134 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000135#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000136 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000137#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
141 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000144#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 /* Inline PyObject_NewVar */
150 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
151 if (op == NULL)
152 return PyErr_NoMemory();
153 PyObject_INIT_VAR(op, &PyBytes_Type, size);
154 op->ob_shash = -1;
155 Py_MEMCPY(op->ob_sval, str, size+1);
156 /* share short strings */
157 if (size == 0) {
158 nullstring = op;
159 Py_INCREF(op);
160 } else if (size == 1) {
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000165}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000166
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167PyObject *
168PyBytes_FromFormatV(const char *format, va_list vargs)
169{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000175
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000176 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 /* step 1: figure out how large a buffer we need */
178 for (f = format; *f; f++) {
179 if (*f == '%') {
180 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000181 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000183
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
185 * they don't affect the amount of space we reserve.
186 */
187 if ((*f == 'l' || *f == 'z') &&
188 (f[1] == 'd' || f[1] == 'u'))
189 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 switch (*f) {
192 case 'c':
193 (void)va_arg(count, int);
194 /* fall through... */
195 case '%':
196 n++;
197 break;
198 case 'd': case 'u': case 'i': case 'x':
199 (void) va_arg(count, int);
200 /* 20 bytes is enough to hold a 64-bit
201 integer. Decimal takes the most space.
202 This isn't enough for octal. */
203 n += 20;
204 break;
205 case 's':
206 s = va_arg(count, char*);
207 n += strlen(s);
208 break;
209 case 'p':
210 (void) va_arg(count, int);
211 /* maximum 64-bit pointer representation:
212 * 0xffffffffffffffff
213 * so 19 characters is enough.
214 * XXX I count 18 -- what's the extra for?
215 */
216 n += 19;
217 break;
218 default:
219 /* if we stumble upon an unknown
220 formatting code, copy the rest of
221 the format string to the output
222 string. (we cannot just skip the
223 code, since there's no way to know
224 what's in the argument list) */
225 n += strlen(p);
226 goto expand;
227 }
228 } else
229 n++;
230 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000231 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000232 /* step 2: fill the buffer */
233 /* Since we've analyzed how much space we need for the worst case,
234 use sprintf directly instead of the slower PyOS_snprintf. */
235 string = PyBytes_FromStringAndSize(NULL, n);
236 if (!string)
237 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000238
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000239 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000240
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000241 for (f = format; *f; f++) {
242 if (*f == '%') {
243 const char* p = f++;
244 Py_ssize_t i;
245 int longflag = 0;
246 int size_tflag = 0;
247 /* parse the width.precision part (we're only
248 interested in the precision value, if any) */
249 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000250 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000251 n = (n*10) + *f++ - '0';
252 if (*f == '.') {
253 f++;
254 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000255 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 n = (n*10) + *f++ - '0';
257 }
David Malcolm96960882010-11-05 17:23:41 +0000258 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000259 f++;
260 /* handle the long flag, but only for %ld and %lu.
261 others can be added when necessary. */
262 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
263 longflag = 1;
264 ++f;
265 }
266 /* handle the size_t flag. */
267 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
268 size_tflag = 1;
269 ++f;
270 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000271
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 switch (*f) {
273 case 'c':
274 *s++ = va_arg(vargs, int);
275 break;
276 case 'd':
277 if (longflag)
278 sprintf(s, "%ld", va_arg(vargs, long));
279 else if (size_tflag)
280 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
281 va_arg(vargs, Py_ssize_t));
282 else
283 sprintf(s, "%d", va_arg(vargs, int));
284 s += strlen(s);
285 break;
286 case 'u':
287 if (longflag)
288 sprintf(s, "%lu",
289 va_arg(vargs, unsigned long));
290 else if (size_tflag)
291 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
292 va_arg(vargs, size_t));
293 else
294 sprintf(s, "%u",
295 va_arg(vargs, unsigned int));
296 s += strlen(s);
297 break;
298 case 'i':
299 sprintf(s, "%i", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 'x':
303 sprintf(s, "%x", va_arg(vargs, int));
304 s += strlen(s);
305 break;
306 case 's':
307 p = va_arg(vargs, char*);
308 i = strlen(p);
309 if (n > 0 && i > n)
310 i = n;
311 Py_MEMCPY(s, p, i);
312 s += i;
313 break;
314 case 'p':
315 sprintf(s, "%p", va_arg(vargs, void*));
316 /* %p is ill-defined: ensure leading 0x. */
317 if (s[1] == 'X')
318 s[1] = 'x';
319 else if (s[1] != 'x') {
320 memmove(s+2, s, strlen(s)+1);
321 s[0] = '0';
322 s[1] = 'x';
323 }
324 s += strlen(s);
325 break;
326 case '%':
327 *s++ = '%';
328 break;
329 default:
330 strcpy(s, p);
331 s += strlen(s);
332 goto end;
333 }
334 } else
335 *s++ = *f;
336 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000337
338 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000339 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
340 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000341}
342
343PyObject *
344PyBytes_FromFormat(const char *format, ...)
345{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 PyObject* ret;
347 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000348
349#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000351#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000352 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000353#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000354 ret = PyBytes_FromFormatV(format, vargs);
355 va_end(vargs);
356 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000357}
358
359static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000360bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000361{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000362 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000363}
364
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000365/* Unescape a backslash-escaped string. If unicode is non-zero,
366 the string is a u-literal. If recode_encoding is non-zero,
367 the string is UTF-8 encoded and should be re-encoded in the
368 specified encoding. */
369
370PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 Py_ssize_t len,
372 const char *errors,
373 Py_ssize_t unicode,
374 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000375{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000376 int c;
377 char *p, *buf;
378 const char *end;
379 PyObject *v;
380 Py_ssize_t newlen = recode_encoding ? 4*len:len;
381 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
382 if (v == NULL)
383 return NULL;
384 p = buf = PyBytes_AsString(v);
385 end = s + len;
386 while (s < end) {
387 if (*s != '\\') {
388 non_esc:
389 if (recode_encoding && (*s & 0x80)) {
390 PyObject *u, *w;
391 char *r;
392 const char* t;
393 Py_ssize_t rn;
394 t = s;
395 /* Decode non-ASCII bytes as UTF-8. */
396 while (t < end && (*t & 0x80)) t++;
397 u = PyUnicode_DecodeUTF8(s, t - s, errors);
398 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000399
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000400 /* Recode them in target encoding. */
401 w = PyUnicode_AsEncodedString(
402 u, recode_encoding, errors);
403 Py_DECREF(u);
404 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000405
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000406 /* Append bytes to output buffer. */
407 assert(PyBytes_Check(w));
408 r = PyBytes_AS_STRING(w);
409 rn = PyBytes_GET_SIZE(w);
410 Py_MEMCPY(p, r, rn);
411 p += rn;
412 Py_DECREF(w);
413 s = t;
414 } else {
415 *p++ = *s++;
416 }
417 continue;
418 }
419 s++;
420 if (s==end) {
421 PyErr_SetString(PyExc_ValueError,
422 "Trailing \\ in string");
423 goto failed;
424 }
425 switch (*s++) {
426 /* XXX This assumes ASCII! */
427 case '\n': break;
428 case '\\': *p++ = '\\'; break;
429 case '\'': *p++ = '\''; break;
430 case '\"': *p++ = '\"'; break;
431 case 'b': *p++ = '\b'; break;
432 case 'f': *p++ = '\014'; break; /* FF */
433 case 't': *p++ = '\t'; break;
434 case 'n': *p++ = '\n'; break;
435 case 'r': *p++ = '\r'; break;
436 case 'v': *p++ = '\013'; break; /* VT */
437 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
438 case '0': case '1': case '2': case '3':
439 case '4': case '5': case '6': case '7':
440 c = s[-1] - '0';
441 if (s < end && '0' <= *s && *s <= '7') {
442 c = (c<<3) + *s++ - '0';
443 if (s < end && '0' <= *s && *s <= '7')
444 c = (c<<3) + *s++ - '0';
445 }
446 *p++ = c;
447 break;
448 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000449 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 unsigned int x = 0;
451 c = Py_CHARMASK(*s);
452 s++;
David Malcolm96960882010-11-05 17:23:41 +0000453 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000454 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000455 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000456 x = 10 + c - 'a';
457 else
458 x = 10 + c - 'A';
459 x = x << 4;
460 c = Py_CHARMASK(*s);
461 s++;
David Malcolm96960882010-11-05 17:23:41 +0000462 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000463 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000464 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000465 x += 10 + c - 'a';
466 else
467 x += 10 + c - 'A';
468 *p++ = x;
469 break;
470 }
471 if (!errors || strcmp(errors, "strict") == 0) {
472 PyErr_SetString(PyExc_ValueError,
473 "invalid \\x escape");
474 goto failed;
475 }
476 if (strcmp(errors, "replace") == 0) {
477 *p++ = '?';
478 } else if (strcmp(errors, "ignore") == 0)
479 /* do nothing */;
480 else {
481 PyErr_Format(PyExc_ValueError,
482 "decoding error; unknown "
483 "error handling code: %.400s",
484 errors);
485 goto failed;
486 }
487 default:
488 *p++ = '\\';
489 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200490 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000491 UTF-8 bytes may follow. */
492 }
493 }
494 if (p-buf < newlen)
495 _PyBytes_Resize(&v, p - buf);
496 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000497 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000498 Py_DECREF(v);
499 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000500}
501
502/* -------------------------------------------------------------------- */
503/* object api */
504
505Py_ssize_t
506PyBytes_Size(register PyObject *op)
507{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000508 if (!PyBytes_Check(op)) {
509 PyErr_Format(PyExc_TypeError,
510 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
511 return -1;
512 }
513 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000514}
515
516char *
517PyBytes_AsString(register PyObject *op)
518{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000519 if (!PyBytes_Check(op)) {
520 PyErr_Format(PyExc_TypeError,
521 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
522 return NULL;
523 }
524 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000525}
526
527int
528PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000529 register char **s,
530 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000531{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000532 if (s == NULL) {
533 PyErr_BadInternalCall();
534 return -1;
535 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000536
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000537 if (!PyBytes_Check(obj)) {
538 PyErr_Format(PyExc_TypeError,
539 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
540 return -1;
541 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000542
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000543 *s = PyBytes_AS_STRING(obj);
544 if (len != NULL)
545 *len = PyBytes_GET_SIZE(obj);
546 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
547 PyErr_SetString(PyExc_TypeError,
548 "expected bytes with no null");
549 return -1;
550 }
551 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000552}
Neal Norwitz6968b052007-02-27 19:02:19 +0000553
554/* -------------------------------------------------------------------- */
555/* Methods */
556
Eric Smith0923d1d2009-04-16 20:16:10 +0000557#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000558
559#include "stringlib/fastsearch.h"
560#include "stringlib/count.h"
561#include "stringlib/find.h"
562#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000563#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000564#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000565
Eric Smith0f78bff2009-11-30 01:01:42 +0000566#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000567
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000568PyObject *
569PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000570{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000571 static const char *hexdigits = "0123456789abcdef";
572 register PyBytesObject* op = (PyBytesObject*) obj;
573 Py_ssize_t length = Py_SIZE(op);
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000574 size_t newsize;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000575 PyObject *v;
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000576 if (length > (PY_SSIZE_T_MAX - 3) / 4) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000577 PyErr_SetString(PyExc_OverflowError,
578 "bytes object is too large to make repr");
579 return NULL;
580 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000581 newsize = 3 + 4 * length;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000582 v = PyUnicode_FromUnicode(NULL, newsize);
583 if (v == NULL) {
584 return NULL;
585 }
586 else {
587 register Py_ssize_t i;
588 register Py_UNICODE c;
589 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
590 int quote;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000591
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000592 /* Figure out which quote to use; single is preferred */
593 quote = '\'';
594 if (smartquotes) {
595 char *test, *start;
596 start = PyBytes_AS_STRING(op);
597 for (test = start; test < start+length; ++test) {
598 if (*test == '"') {
599 quote = '\''; /* back to single */
600 goto decided;
601 }
602 else if (*test == '\'')
603 quote = '"';
604 }
605 decided:
606 ;
607 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000608
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000609 *p++ = 'b', *p++ = quote;
610 for (i = 0; i < length; i++) {
611 /* There's at least enough room for a hex escape
612 and a closing quote. */
613 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
614 c = op->ob_sval[i];
615 if (c == quote || c == '\\')
616 *p++ = '\\', *p++ = c;
617 else if (c == '\t')
618 *p++ = '\\', *p++ = 't';
619 else if (c == '\n')
620 *p++ = '\\', *p++ = 'n';
621 else if (c == '\r')
622 *p++ = '\\', *p++ = 'r';
623 else if (c < ' ' || c >= 0x7f) {
624 *p++ = '\\';
625 *p++ = 'x';
626 *p++ = hexdigits[(c & 0xf0) >> 4];
627 *p++ = hexdigits[c & 0xf];
628 }
629 else
630 *p++ = c;
631 }
632 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
633 *p++ = quote;
634 *p = '\0';
635 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
636 Py_DECREF(v);
637 return NULL;
638 }
639 return v;
640 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000641}
642
Neal Norwitz6968b052007-02-27 19:02:19 +0000643static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000644bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000645{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000646 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000647}
648
Neal Norwitz6968b052007-02-27 19:02:19 +0000649static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000650bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000651{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000652 if (Py_BytesWarningFlag) {
653 if (PyErr_WarnEx(PyExc_BytesWarning,
654 "str() on a bytes instance", 1))
655 return NULL;
656 }
657 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000658}
659
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000660static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000661bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000662{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000663 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000664}
Neal Norwitz6968b052007-02-27 19:02:19 +0000665
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000666/* This is also used by PyBytes_Concat() */
667static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000668bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000669{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000670 Py_ssize_t size;
671 Py_buffer va, vb;
672 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000673
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000674 va.len = -1;
675 vb.len = -1;
676 if (_getbuffer(a, &va) < 0 ||
677 _getbuffer(b, &vb) < 0) {
678 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
679 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
680 goto done;
681 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000682
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000683 /* Optimize end cases */
684 if (va.len == 0 && PyBytes_CheckExact(b)) {
685 result = b;
686 Py_INCREF(result);
687 goto done;
688 }
689 if (vb.len == 0 && PyBytes_CheckExact(a)) {
690 result = a;
691 Py_INCREF(result);
692 goto done;
693 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000694
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000695 size = va.len + vb.len;
696 if (size < 0) {
697 PyErr_NoMemory();
698 goto done;
699 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000700
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000701 result = PyBytes_FromStringAndSize(NULL, size);
702 if (result != NULL) {
703 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
704 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
705 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000706
707 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000708 if (va.len != -1)
709 PyBuffer_Release(&va);
710 if (vb.len != -1)
711 PyBuffer_Release(&vb);
712 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000713}
Neal Norwitz6968b052007-02-27 19:02:19 +0000714
715static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000716bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000717{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000718 register Py_ssize_t i;
719 register Py_ssize_t j;
720 register Py_ssize_t size;
721 register PyBytesObject *op;
722 size_t nbytes;
723 if (n < 0)
724 n = 0;
725 /* watch out for overflows: the size can overflow int,
726 * and the # of bytes needed can overflow size_t
727 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000728 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000729 PyErr_SetString(PyExc_OverflowError,
730 "repeated bytes are too long");
731 return NULL;
732 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000733 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000734 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
735 Py_INCREF(a);
736 return (PyObject *)a;
737 }
738 nbytes = (size_t)size;
739 if (nbytes + PyBytesObject_SIZE <= nbytes) {
740 PyErr_SetString(PyExc_OverflowError,
741 "repeated bytes are too long");
742 return NULL;
743 }
744 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
745 if (op == NULL)
746 return PyErr_NoMemory();
747 PyObject_INIT_VAR(op, &PyBytes_Type, size);
748 op->ob_shash = -1;
749 op->ob_sval[size] = '\0';
750 if (Py_SIZE(a) == 1 && n > 0) {
751 memset(op->ob_sval, a->ob_sval[0] , n);
752 return (PyObject *) op;
753 }
754 i = 0;
755 if (i < size) {
756 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
757 i = Py_SIZE(a);
758 }
759 while (i < size) {
760 j = (i <= size-i) ? i : size-i;
761 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
762 i += j;
763 }
764 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000765}
766
Guido van Rossum98297ee2007-11-06 21:34:58 +0000767static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000768bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000769{
770 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
771 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000772 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000773 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000774 PyErr_Clear();
775 if (_getbuffer(arg, &varg) < 0)
776 return -1;
777 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
778 varg.buf, varg.len, 0);
779 PyBuffer_Release(&varg);
780 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000781 }
782 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000783 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
784 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000785 }
786
Antoine Pitrou0010d372010-08-15 17:12:55 +0000787 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000788}
789
Neal Norwitz6968b052007-02-27 19:02:19 +0000790static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000791bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000792{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000793 if (i < 0 || i >= Py_SIZE(a)) {
794 PyErr_SetString(PyExc_IndexError, "index out of range");
795 return NULL;
796 }
797 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000798}
799
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000800static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000801bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000802{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000803 int c;
804 Py_ssize_t len_a, len_b;
805 Py_ssize_t min_len;
806 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000807
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000808 /* Make sure both arguments are strings. */
809 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
810 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
811 (PyObject_IsInstance((PyObject*)a,
812 (PyObject*)&PyUnicode_Type) ||
813 PyObject_IsInstance((PyObject*)b,
814 (PyObject*)&PyUnicode_Type))) {
815 if (PyErr_WarnEx(PyExc_BytesWarning,
816 "Comparison between bytes and string", 1))
817 return NULL;
818 }
819 result = Py_NotImplemented;
820 goto out;
821 }
822 if (a == b) {
823 switch (op) {
824 case Py_EQ:case Py_LE:case Py_GE:
825 result = Py_True;
826 goto out;
827 case Py_NE:case Py_LT:case Py_GT:
828 result = Py_False;
829 goto out;
830 }
831 }
832 if (op == Py_EQ) {
833 /* Supporting Py_NE here as well does not save
834 much time, since Py_NE is rarely used. */
835 if (Py_SIZE(a) == Py_SIZE(b)
836 && (a->ob_sval[0] == b->ob_sval[0]
837 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
838 result = Py_True;
839 } else {
840 result = Py_False;
841 }
842 goto out;
843 }
844 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
845 min_len = (len_a < len_b) ? len_a : len_b;
846 if (min_len > 0) {
847 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
848 if (c==0)
849 c = memcmp(a->ob_sval, b->ob_sval, min_len);
850 } else
851 c = 0;
852 if (c == 0)
853 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
854 switch (op) {
855 case Py_LT: c = c < 0; break;
856 case Py_LE: c = c <= 0; break;
857 case Py_EQ: assert(0); break; /* unreachable */
858 case Py_NE: c = c != 0; break;
859 case Py_GT: c = c > 0; break;
860 case Py_GE: c = c >= 0; break;
861 default:
862 result = Py_NotImplemented;
863 goto out;
864 }
865 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000866 out:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000867 Py_INCREF(result);
868 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000869}
870
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000871static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000872bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000873{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000874 register Py_ssize_t len;
875 register unsigned char *p;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000876 register Py_hash_t x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000877
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000878 if (a->ob_shash != -1)
879 return a->ob_shash;
880 len = Py_SIZE(a);
881 p = (unsigned char *) a->ob_sval;
882 x = *p << 7;
883 while (--len >= 0)
884 x = (1000003*x) ^ *p++;
885 x ^= Py_SIZE(a);
886 if (x == -1)
887 x = -2;
888 a->ob_shash = x;
889 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000890}
891
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000892static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000893bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000894{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000895 if (PyIndex_Check(item)) {
896 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
897 if (i == -1 && PyErr_Occurred())
898 return NULL;
899 if (i < 0)
900 i += PyBytes_GET_SIZE(self);
901 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
902 PyErr_SetString(PyExc_IndexError,
903 "index out of range");
904 return NULL;
905 }
906 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
907 }
908 else if (PySlice_Check(item)) {
909 Py_ssize_t start, stop, step, slicelength, cur, i;
910 char* source_buf;
911 char* result_buf;
912 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000913
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000914 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000915 PyBytes_GET_SIZE(self),
916 &start, &stop, &step, &slicelength) < 0) {
917 return NULL;
918 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000919
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000920 if (slicelength <= 0) {
921 return PyBytes_FromStringAndSize("", 0);
922 }
923 else if (start == 0 && step == 1 &&
924 slicelength == PyBytes_GET_SIZE(self) &&
925 PyBytes_CheckExact(self)) {
926 Py_INCREF(self);
927 return (PyObject *)self;
928 }
929 else if (step == 1) {
930 return PyBytes_FromStringAndSize(
931 PyBytes_AS_STRING(self) + start,
932 slicelength);
933 }
934 else {
935 source_buf = PyBytes_AS_STRING(self);
936 result = PyBytes_FromStringAndSize(NULL, slicelength);
937 if (result == NULL)
938 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000939
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000940 result_buf = PyBytes_AS_STRING(result);
941 for (cur = start, i = 0; i < slicelength;
942 cur += step, i++) {
943 result_buf[i] = source_buf[cur];
944 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000945
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000946 return result;
947 }
948 }
949 else {
950 PyErr_Format(PyExc_TypeError,
951 "byte indices must be integers, not %.200s",
952 Py_TYPE(item)->tp_name);
953 return NULL;
954 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000955}
956
957static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000958bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000959{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000960 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
961 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000962}
963
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000964static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000965 (lenfunc)bytes_length, /*sq_length*/
966 (binaryfunc)bytes_concat, /*sq_concat*/
967 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
968 (ssizeargfunc)bytes_item, /*sq_item*/
969 0, /*sq_slice*/
970 0, /*sq_ass_item*/
971 0, /*sq_ass_slice*/
972 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000973};
974
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000975static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000976 (lenfunc)bytes_length,
977 (binaryfunc)bytes_subscript,
978 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000979};
980
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000981static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000982 (getbufferproc)bytes_buffer_getbuffer,
983 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000984};
985
986
987#define LEFTSTRIP 0
988#define RIGHTSTRIP 1
989#define BOTHSTRIP 2
990
991/* Arrays indexed by above */
992static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
993
994#define STRIPNAME(i) (stripformat[i]+3)
995
Neal Norwitz6968b052007-02-27 19:02:19 +0000996PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000997"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +0000998\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +0000999Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001000If sep is not specified or is None, B is split on ASCII whitespace\n\
1001characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001002If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001003
1004static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001005bytes_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001006{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001007 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1008 Py_ssize_t maxsplit = -1;
1009 const char *s = PyBytes_AS_STRING(self), *sub;
1010 Py_buffer vsub;
1011 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001012
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001013 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1014 return NULL;
1015 if (maxsplit < 0)
1016 maxsplit = PY_SSIZE_T_MAX;
1017 if (subobj == Py_None)
1018 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1019 if (_getbuffer(subobj, &vsub) < 0)
1020 return NULL;
1021 sub = vsub.buf;
1022 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001023
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001024 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1025 PyBuffer_Release(&vsub);
1026 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001027}
1028
Neal Norwitz6968b052007-02-27 19:02:19 +00001029PyDoc_STRVAR(partition__doc__,
1030"B.partition(sep) -> (head, sep, tail)\n\
1031\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001032Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001033the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001034found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001035
1036static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001037bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001038{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001039 const char *sep;
1040 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001041
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001042 if (PyBytes_Check(sep_obj)) {
1043 sep = PyBytes_AS_STRING(sep_obj);
1044 sep_len = PyBytes_GET_SIZE(sep_obj);
1045 }
1046 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1047 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001048
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001049 return stringlib_partition(
1050 (PyObject*) self,
1051 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1052 sep_obj, sep, sep_len
1053 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001054}
1055
1056PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001057"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001058\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001059Search for the separator sep in B, starting at the end of B,\n\
1060and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001061part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001062bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001063
1064static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001065bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001066{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001067 const char *sep;
1068 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001069
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001070 if (PyBytes_Check(sep_obj)) {
1071 sep = PyBytes_AS_STRING(sep_obj);
1072 sep_len = PyBytes_GET_SIZE(sep_obj);
1073 }
1074 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1075 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001077 return stringlib_rpartition(
1078 (PyObject*) self,
1079 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1080 sep_obj, sep, sep_len
1081 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001082}
1083
Neal Norwitz6968b052007-02-27 19:02:19 +00001084PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001085"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001086\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001087Return a list of the sections in B, using sep as the delimiter,\n\
1088starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001089If sep is not given, B is split on ASCII whitespace characters\n\
1090(space, tab, return, newline, formfeed, vertical tab).\n\
1091If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001092
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001093
Neal Norwitz6968b052007-02-27 19:02:19 +00001094static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001095bytes_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001096{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1098 Py_ssize_t maxsplit = -1;
1099 const char *s = PyBytes_AS_STRING(self), *sub;
1100 Py_buffer vsub;
1101 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001102
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001103 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1104 return NULL;
1105 if (maxsplit < 0)
1106 maxsplit = PY_SSIZE_T_MAX;
1107 if (subobj == Py_None)
1108 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1109 if (_getbuffer(subobj, &vsub) < 0)
1110 return NULL;
1111 sub = vsub.buf;
1112 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001113
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001114 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1115 PyBuffer_Release(&vsub);
1116 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001117}
1118
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001119
1120PyDoc_STRVAR(join__doc__,
1121"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001122\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001123Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001124Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1125
Neal Norwitz6968b052007-02-27 19:02:19 +00001126static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001127bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001128{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001129 char *sep = PyBytes_AS_STRING(self);
1130 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1131 PyObject *res = NULL;
1132 char *p;
1133 Py_ssize_t seqlen = 0;
1134 size_t sz = 0;
1135 Py_ssize_t i;
1136 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001138 seq = PySequence_Fast(orig, "");
1139 if (seq == NULL) {
1140 return NULL;
1141 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001143 seqlen = PySequence_Size(seq);
1144 if (seqlen == 0) {
1145 Py_DECREF(seq);
1146 return PyBytes_FromString("");
1147 }
1148 if (seqlen == 1) {
1149 item = PySequence_Fast_GET_ITEM(seq, 0);
1150 if (PyBytes_CheckExact(item)) {
1151 Py_INCREF(item);
1152 Py_DECREF(seq);
1153 return item;
1154 }
1155 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001156
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001157 /* There are at least two things to join, or else we have a subclass
1158 * of the builtin types in the sequence.
1159 * Do a pre-pass to figure out the total amount of space we'll
1160 * need (sz), and see whether all argument are bytes.
1161 */
1162 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1163 for (i = 0; i < seqlen; i++) {
1164 const size_t old_sz = sz;
1165 item = PySequence_Fast_GET_ITEM(seq, i);
1166 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1167 PyErr_Format(PyExc_TypeError,
1168 "sequence item %zd: expected bytes,"
1169 " %.80s found",
1170 i, Py_TYPE(item)->tp_name);
1171 Py_DECREF(seq);
1172 return NULL;
1173 }
1174 sz += Py_SIZE(item);
1175 if (i != 0)
1176 sz += seplen;
1177 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1178 PyErr_SetString(PyExc_OverflowError,
1179 "join() result is too long for bytes");
1180 Py_DECREF(seq);
1181 return NULL;
1182 }
1183 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001184
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001185 /* Allocate result space. */
1186 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1187 if (res == NULL) {
1188 Py_DECREF(seq);
1189 return NULL;
1190 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001191
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001192 /* Catenate everything. */
1193 /* I'm not worried about a PyByteArray item growing because there's
1194 nowhere in this function where we release the GIL. */
1195 p = PyBytes_AS_STRING(res);
1196 for (i = 0; i < seqlen; ++i) {
1197 size_t n;
1198 char *q;
1199 if (i) {
1200 Py_MEMCPY(p, sep, seplen);
1201 p += seplen;
1202 }
1203 item = PySequence_Fast_GET_ITEM(seq, i);
1204 n = Py_SIZE(item);
1205 if (PyBytes_Check(item))
1206 q = PyBytes_AS_STRING(item);
1207 else
1208 q = PyByteArray_AS_STRING(item);
1209 Py_MEMCPY(p, q, n);
1210 p += n;
1211 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001212
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001213 Py_DECREF(seq);
1214 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001215}
1216
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001217PyObject *
1218_PyBytes_Join(PyObject *sep, PyObject *x)
1219{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001220 assert(sep != NULL && PyBytes_Check(sep));
1221 assert(x != NULL);
1222 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001223}
1224
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001225/* helper macro to fixup start/end slice values */
1226#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001227 if (end > len) \
1228 end = len; \
1229 else if (end < 0) { \
1230 end += len; \
1231 if (end < 0) \
1232 end = 0; \
1233 } \
1234 if (start < 0) { \
1235 start += len; \
1236 if (start < 0) \
1237 start = 0; \
1238 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001239
1240Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001241bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001242{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001243 PyObject *subobj;
1244 const char *sub;
1245 Py_ssize_t sub_len;
1246 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001247
Jesus Ceaac451502011-04-20 17:09:23 +02001248 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1249 args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001251
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001252 if (PyBytes_Check(subobj)) {
1253 sub = PyBytes_AS_STRING(subobj);
1254 sub_len = PyBytes_GET_SIZE(subobj);
1255 }
1256 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1257 /* XXX - the "expected a character buffer object" is pretty
1258 confusing for a non-expert. remap to something else ? */
1259 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001260
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001261 if (dir > 0)
1262 return stringlib_find_slice(
1263 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1264 sub, sub_len, start, end);
1265 else
1266 return stringlib_rfind_slice(
1267 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1268 sub, sub_len, start, end);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001269}
1270
1271
1272PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001273"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001274\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001275Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001276such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001277arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001278\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001279Return -1 on failure.");
1280
Neal Norwitz6968b052007-02-27 19:02:19 +00001281static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001282bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001283{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001284 Py_ssize_t result = bytes_find_internal(self, args, +1);
1285 if (result == -2)
1286 return NULL;
1287 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001288}
1289
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001290
1291PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001292"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001293\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001294Like B.find() but raise ValueError when the substring is not found.");
1295
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001296static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001297bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001298{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001299 Py_ssize_t result = bytes_find_internal(self, args, +1);
1300 if (result == -2)
1301 return NULL;
1302 if (result == -1) {
1303 PyErr_SetString(PyExc_ValueError,
1304 "substring not found");
1305 return NULL;
1306 }
1307 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001308}
1309
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001310
1311PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001312"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001313\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001314Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001315such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001316arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001317\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001318Return -1 on failure.");
1319
Neal Norwitz6968b052007-02-27 19:02:19 +00001320static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001321bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001322{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001323 Py_ssize_t result = bytes_find_internal(self, args, -1);
1324 if (result == -2)
1325 return NULL;
1326 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001327}
1328
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001329
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001330PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001331"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001332\n\
1333Like B.rfind() but raise ValueError when the substring is not found.");
1334
1335static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001336bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001337{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001338 Py_ssize_t result = bytes_find_internal(self, args, -1);
1339 if (result == -2)
1340 return NULL;
1341 if (result == -1) {
1342 PyErr_SetString(PyExc_ValueError,
1343 "substring not found");
1344 return NULL;
1345 }
1346 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001347}
1348
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001349
1350Py_LOCAL_INLINE(PyObject *)
1351do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001352{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001353 Py_buffer vsep;
1354 char *s = PyBytes_AS_STRING(self);
1355 Py_ssize_t len = PyBytes_GET_SIZE(self);
1356 char *sep;
1357 Py_ssize_t seplen;
1358 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001359
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001360 if (_getbuffer(sepobj, &vsep) < 0)
1361 return NULL;
1362 sep = vsep.buf;
1363 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001364
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001365 i = 0;
1366 if (striptype != RIGHTSTRIP) {
1367 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1368 i++;
1369 }
1370 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001371
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001372 j = len;
1373 if (striptype != LEFTSTRIP) {
1374 do {
1375 j--;
1376 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1377 j++;
1378 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001379
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001380 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001381
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001382 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1383 Py_INCREF(self);
1384 return (PyObject*)self;
1385 }
1386 else
1387 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001388}
1389
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001390
1391Py_LOCAL_INLINE(PyObject *)
1392do_strip(PyBytesObject *self, int striptype)
1393{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 char *s = PyBytes_AS_STRING(self);
1395 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001396
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001397 i = 0;
1398 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001399 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001400 i++;
1401 }
1402 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001403
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001404 j = len;
1405 if (striptype != LEFTSTRIP) {
1406 do {
1407 j--;
David Malcolm96960882010-11-05 17:23:41 +00001408 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001409 j++;
1410 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001411
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001412 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1413 Py_INCREF(self);
1414 return (PyObject*)self;
1415 }
1416 else
1417 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001418}
1419
1420
1421Py_LOCAL_INLINE(PyObject *)
1422do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1423{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001424 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001425
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001426 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1427 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001428
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001429 if (sep != NULL && sep != Py_None) {
1430 return do_xstrip(self, striptype, sep);
1431 }
1432 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001433}
1434
1435
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001436PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001437"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001438\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001439Strip leading and trailing bytes contained in the argument.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001440If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001441static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001442bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001443{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001444 if (PyTuple_GET_SIZE(args) == 0)
1445 return do_strip(self, BOTHSTRIP); /* Common case */
1446 else
1447 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001448}
1449
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001450
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001451PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001452"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001453\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001454Strip leading bytes contained in the argument.\n\
1455If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001456static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001457bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001458{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001459 if (PyTuple_GET_SIZE(args) == 0)
1460 return do_strip(self, LEFTSTRIP); /* Common case */
1461 else
1462 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001463}
1464
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001465
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001466PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001467"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001468\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001469Strip trailing bytes contained in the argument.\n\
1470If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001471static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001472bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001473{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001474 if (PyTuple_GET_SIZE(args) == 0)
1475 return do_strip(self, RIGHTSTRIP); /* Common case */
1476 else
1477 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001478}
Neal Norwitz6968b052007-02-27 19:02:19 +00001479
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001480
1481PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001482"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001483\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001484Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001485string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001486as in slice notation.");
1487
1488static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001489bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001490{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001491 PyObject *sub_obj;
1492 const char *str = PyBytes_AS_STRING(self), *sub;
1493 Py_ssize_t sub_len;
1494 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001495
Jesus Ceaac451502011-04-20 17:09:23 +02001496 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001497 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001498
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001499 if (PyBytes_Check(sub_obj)) {
1500 sub = PyBytes_AS_STRING(sub_obj);
1501 sub_len = PyBytes_GET_SIZE(sub_obj);
1502 }
1503 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1504 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001505
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001506 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001507
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001508 return PyLong_FromSsize_t(
1509 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1510 );
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001511}
1512
1513
1514PyDoc_STRVAR(translate__doc__,
1515"B.translate(table[, deletechars]) -> bytes\n\
1516\n\
1517Return a copy of B, where all characters occurring in the\n\
1518optional argument deletechars are removed, and the remaining\n\
1519characters have been mapped through the given translation\n\
1520table, which must be a bytes object of length 256.");
1521
1522static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001523bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001524{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001525 register char *input, *output;
1526 const char *table;
1527 register Py_ssize_t i, c, changed = 0;
1528 PyObject *input_obj = (PyObject*)self;
1529 const char *output_start, *del_table=NULL;
1530 Py_ssize_t inlen, tablen, dellen = 0;
1531 PyObject *result;
1532 int trans_table[256];
1533 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001534
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001535 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1536 &tableobj, &delobj))
1537 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001538
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001539 if (PyBytes_Check(tableobj)) {
1540 table = PyBytes_AS_STRING(tableobj);
1541 tablen = PyBytes_GET_SIZE(tableobj);
1542 }
1543 else if (tableobj == Py_None) {
1544 table = NULL;
1545 tablen = 256;
1546 }
1547 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1548 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001549
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001550 if (tablen != 256) {
1551 PyErr_SetString(PyExc_ValueError,
1552 "translation table must be 256 characters long");
1553 return NULL;
1554 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001555
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001556 if (delobj != NULL) {
1557 if (PyBytes_Check(delobj)) {
1558 del_table = PyBytes_AS_STRING(delobj);
1559 dellen = PyBytes_GET_SIZE(delobj);
1560 }
1561 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1562 return NULL;
1563 }
1564 else {
1565 del_table = NULL;
1566 dellen = 0;
1567 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001568
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001569 inlen = PyBytes_GET_SIZE(input_obj);
1570 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1571 if (result == NULL)
1572 return NULL;
1573 output_start = output = PyBytes_AsString(result);
1574 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001575
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001576 if (dellen == 0 && table != NULL) {
1577 /* If no deletions are required, use faster code */
1578 for (i = inlen; --i >= 0; ) {
1579 c = Py_CHARMASK(*input++);
1580 if (Py_CHARMASK((*output++ = table[c])) != c)
1581 changed = 1;
1582 }
1583 if (changed || !PyBytes_CheckExact(input_obj))
1584 return result;
1585 Py_DECREF(result);
1586 Py_INCREF(input_obj);
1587 return input_obj;
1588 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001589
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001590 if (table == NULL) {
1591 for (i = 0; i < 256; i++)
1592 trans_table[i] = Py_CHARMASK(i);
1593 } else {
1594 for (i = 0; i < 256; i++)
1595 trans_table[i] = Py_CHARMASK(table[i]);
1596 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001597
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001598 for (i = 0; i < dellen; i++)
1599 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001600
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001601 for (i = inlen; --i >= 0; ) {
1602 c = Py_CHARMASK(*input++);
1603 if (trans_table[c] != -1)
1604 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1605 continue;
1606 changed = 1;
1607 }
1608 if (!changed && PyBytes_CheckExact(input_obj)) {
1609 Py_DECREF(result);
1610 Py_INCREF(input_obj);
1611 return input_obj;
1612 }
1613 /* Fix the size of the resulting string */
1614 if (inlen > 0)
1615 _PyBytes_Resize(&result, output - output_start);
1616 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001617}
1618
1619
Georg Brandlabc38772009-04-12 15:51:51 +00001620static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001621bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001622{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001623 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001624}
1625
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001626/* find and count characters and substrings */
1627
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001628#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001629 ((char *)memchr((const void *)(target), c, target_len))
1630
1631/* String ops must return a string. */
1632/* If the object is subclass of string, create a copy */
1633Py_LOCAL(PyBytesObject *)
1634return_self(PyBytesObject *self)
1635{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001636 if (PyBytes_CheckExact(self)) {
1637 Py_INCREF(self);
1638 return self;
1639 }
1640 return (PyBytesObject *)PyBytes_FromStringAndSize(
1641 PyBytes_AS_STRING(self),
1642 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001643}
1644
1645Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001646countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001647{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001648 Py_ssize_t count=0;
1649 const char *start=target;
1650 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001651
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001652 while ( (start=findchar(start, end-start, c)) != NULL ) {
1653 count++;
1654 if (count >= maxcount)
1655 break;
1656 start += 1;
1657 }
1658 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001659}
1660
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001661
1662/* Algorithms for different cases of string replacement */
1663
1664/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1665Py_LOCAL(PyBytesObject *)
1666replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001667 const char *to_s, Py_ssize_t to_len,
1668 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001669{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001670 char *self_s, *result_s;
1671 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001672 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001673 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001674
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001675 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001676
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001677 /* 1 at the end plus 1 after every character;
1678 count = min(maxcount, self_len + 1) */
1679 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001680 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001681 else
1682 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1683 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001684
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001685 /* Check for overflow */
1686 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001687 assert(count > 0);
1688 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001689 PyErr_SetString(PyExc_OverflowError,
1690 "replacement bytes are too long");
1691 return NULL;
1692 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001693 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001694
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001695 if (! (result = (PyBytesObject *)
1696 PyBytes_FromStringAndSize(NULL, result_len)) )
1697 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001699 self_s = PyBytes_AS_STRING(self);
1700 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001701
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001702 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001703
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001704 /* Lay the first one down (guaranteed this will occur) */
1705 Py_MEMCPY(result_s, to_s, to_len);
1706 result_s += to_len;
1707 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001708
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001709 for (i=0; i<count; i++) {
1710 *result_s++ = *self_s++;
1711 Py_MEMCPY(result_s, to_s, to_len);
1712 result_s += to_len;
1713 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001714
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001715 /* Copy the rest of the original string */
1716 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001717
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001718 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001719}
1720
1721/* Special case for deleting a single character */
1722/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1723Py_LOCAL(PyBytesObject *)
1724replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001725 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001726{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001727 char *self_s, *result_s;
1728 char *start, *next, *end;
1729 Py_ssize_t self_len, result_len;
1730 Py_ssize_t count;
1731 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001732
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001733 self_len = PyBytes_GET_SIZE(self);
1734 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001735
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001736 count = countchar(self_s, self_len, from_c, maxcount);
1737 if (count == 0) {
1738 return return_self(self);
1739 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001740
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001741 result_len = self_len - count; /* from_len == 1 */
1742 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001743
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001744 if ( (result = (PyBytesObject *)
1745 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1746 return NULL;
1747 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001748
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001749 start = self_s;
1750 end = self_s + self_len;
1751 while (count-- > 0) {
1752 next = findchar(start, end-start, from_c);
1753 if (next == NULL)
1754 break;
1755 Py_MEMCPY(result_s, start, next-start);
1756 result_s += (next-start);
1757 start = next+1;
1758 }
1759 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001760
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001761 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001762}
1763
1764/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1765
1766Py_LOCAL(PyBytesObject *)
1767replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001768 const char *from_s, Py_ssize_t from_len,
1769 Py_ssize_t maxcount) {
1770 char *self_s, *result_s;
1771 char *start, *next, *end;
1772 Py_ssize_t self_len, result_len;
1773 Py_ssize_t count, offset;
1774 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001775
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001776 self_len = PyBytes_GET_SIZE(self);
1777 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001778
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001779 count = stringlib_count(self_s, self_len,
1780 from_s, from_len,
1781 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001782
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001783 if (count == 0) {
1784 /* no matches */
1785 return return_self(self);
1786 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001787
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001788 result_len = self_len - (count * from_len);
1789 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001790
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001791 if ( (result = (PyBytesObject *)
1792 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1793 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001794
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001795 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001796
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001797 start = self_s;
1798 end = self_s + self_len;
1799 while (count-- > 0) {
1800 offset = stringlib_find(start, end-start,
1801 from_s, from_len,
1802 0);
1803 if (offset == -1)
1804 break;
1805 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001806
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001807 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001808
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001809 result_s += (next-start);
1810 start = next+from_len;
1811 }
1812 Py_MEMCPY(result_s, start, end-start);
1813 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001814}
1815
1816/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1817Py_LOCAL(PyBytesObject *)
1818replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001819 char from_c, char to_c,
1820 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001821{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001822 char *self_s, *result_s, *start, *end, *next;
1823 Py_ssize_t self_len;
1824 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001825
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001826 /* The result string will be the same size */
1827 self_s = PyBytes_AS_STRING(self);
1828 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001829
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001830 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001831
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001832 if (next == NULL) {
1833 /* No matches; return the original string */
1834 return return_self(self);
1835 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001836
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001837 /* Need to make a new string */
1838 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1839 if (result == NULL)
1840 return NULL;
1841 result_s = PyBytes_AS_STRING(result);
1842 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001843
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001844 /* change everything in-place, starting with this one */
1845 start = result_s + (next-self_s);
1846 *start = to_c;
1847 start++;
1848 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001849
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001850 while (--maxcount > 0) {
1851 next = findchar(start, end-start, from_c);
1852 if (next == NULL)
1853 break;
1854 *next = to_c;
1855 start = next+1;
1856 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001857
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001858 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001859}
1860
1861/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1862Py_LOCAL(PyBytesObject *)
1863replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001864 const char *from_s, Py_ssize_t from_len,
1865 const char *to_s, Py_ssize_t to_len,
1866 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001867{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001868 char *result_s, *start, *end;
1869 char *self_s;
1870 Py_ssize_t self_len, offset;
1871 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001872
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001873 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001874
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001875 self_s = PyBytes_AS_STRING(self);
1876 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001877
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001878 offset = stringlib_find(self_s, self_len,
1879 from_s, from_len,
1880 0);
1881 if (offset == -1) {
1882 /* No matches; return the original string */
1883 return return_self(self);
1884 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001885
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001886 /* Need to make a new string */
1887 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1888 if (result == NULL)
1889 return NULL;
1890 result_s = PyBytes_AS_STRING(result);
1891 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001892
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001893 /* change everything in-place, starting with this one */
1894 start = result_s + offset;
1895 Py_MEMCPY(start, to_s, from_len);
1896 start += from_len;
1897 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001898
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001899 while ( --maxcount > 0) {
1900 offset = stringlib_find(start, end-start,
1901 from_s, from_len,
1902 0);
1903 if (offset==-1)
1904 break;
1905 Py_MEMCPY(start+offset, to_s, from_len);
1906 start += offset+from_len;
1907 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001908
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001909 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001910}
1911
1912/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1913Py_LOCAL(PyBytesObject *)
1914replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001915 char from_c,
1916 const char *to_s, Py_ssize_t to_len,
1917 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001918{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001919 char *self_s, *result_s;
1920 char *start, *next, *end;
1921 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001922 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001923 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001924
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001925 self_s = PyBytes_AS_STRING(self);
1926 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001927
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001928 count = countchar(self_s, self_len, from_c, maxcount);
1929 if (count == 0) {
1930 /* no matches, return unchanged */
1931 return return_self(self);
1932 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001933
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001934 /* use the difference between current and new, hence the "-1" */
1935 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001936 assert(count > 0);
1937 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001938 PyErr_SetString(PyExc_OverflowError,
1939 "replacement bytes are too long");
1940 return NULL;
1941 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001942 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001943
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001944 if ( (result = (PyBytesObject *)
1945 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1946 return NULL;
1947 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001948
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001949 start = self_s;
1950 end = self_s + self_len;
1951 while (count-- > 0) {
1952 next = findchar(start, end-start, from_c);
1953 if (next == NULL)
1954 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001955
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001956 if (next == start) {
1957 /* replace with the 'to' */
1958 Py_MEMCPY(result_s, to_s, to_len);
1959 result_s += to_len;
1960 start += 1;
1961 } else {
1962 /* copy the unchanged old then the 'to' */
1963 Py_MEMCPY(result_s, start, next-start);
1964 result_s += (next-start);
1965 Py_MEMCPY(result_s, to_s, to_len);
1966 result_s += to_len;
1967 start = next+1;
1968 }
1969 }
1970 /* Copy the remainder of the remaining string */
1971 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001972
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001973 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001974}
1975
1976/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1977Py_LOCAL(PyBytesObject *)
1978replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001979 const char *from_s, Py_ssize_t from_len,
1980 const char *to_s, Py_ssize_t to_len,
1981 Py_ssize_t maxcount) {
1982 char *self_s, *result_s;
1983 char *start, *next, *end;
1984 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001985 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001986 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001987
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001988 self_s = PyBytes_AS_STRING(self);
1989 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001990
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001991 count = stringlib_count(self_s, self_len,
1992 from_s, from_len,
1993 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001994
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001995 if (count == 0) {
1996 /* no matches, return unchanged */
1997 return return_self(self);
1998 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001999
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002000 /* Check for overflow */
2001 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002002 assert(count > 0);
2003 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002004 PyErr_SetString(PyExc_OverflowError,
2005 "replacement bytes are too long");
2006 return NULL;
2007 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002008 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002009
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002010 if ( (result = (PyBytesObject *)
2011 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2012 return NULL;
2013 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002014
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002015 start = self_s;
2016 end = self_s + self_len;
2017 while (count-- > 0) {
2018 offset = stringlib_find(start, end-start,
2019 from_s, from_len,
2020 0);
2021 if (offset == -1)
2022 break;
2023 next = start+offset;
2024 if (next == start) {
2025 /* replace with the 'to' */
2026 Py_MEMCPY(result_s, to_s, to_len);
2027 result_s += to_len;
2028 start += from_len;
2029 } else {
2030 /* copy the unchanged old then the 'to' */
2031 Py_MEMCPY(result_s, start, next-start);
2032 result_s += (next-start);
2033 Py_MEMCPY(result_s, to_s, to_len);
2034 result_s += to_len;
2035 start = next+from_len;
2036 }
2037 }
2038 /* Copy the remainder of the remaining string */
2039 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002040
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002041 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002042}
2043
2044
2045Py_LOCAL(PyBytesObject *)
2046replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002047 const char *from_s, Py_ssize_t from_len,
2048 const char *to_s, Py_ssize_t to_len,
2049 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002050{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002051 if (maxcount < 0) {
2052 maxcount = PY_SSIZE_T_MAX;
2053 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2054 /* nothing to do; return the original string */
2055 return return_self(self);
2056 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002057
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002058 if (maxcount == 0 ||
2059 (from_len == 0 && to_len == 0)) {
2060 /* nothing to do; return the original string */
2061 return return_self(self);
2062 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002063
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002064 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002065
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002066 if (from_len == 0) {
2067 /* insert the 'to' string everywhere. */
2068 /* >>> "Python".replace("", ".") */
2069 /* '.P.y.t.h.o.n.' */
2070 return replace_interleave(self, to_s, to_len, maxcount);
2071 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002072
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002073 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2074 /* point for an empty self string to generate a non-empty string */
2075 /* Special case so the remaining code always gets a non-empty string */
2076 if (PyBytes_GET_SIZE(self) == 0) {
2077 return return_self(self);
2078 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002079
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002080 if (to_len == 0) {
2081 /* delete all occurrences of 'from' string */
2082 if (from_len == 1) {
2083 return replace_delete_single_character(
2084 self, from_s[0], maxcount);
2085 } else {
2086 return replace_delete_substring(self, from_s,
2087 from_len, maxcount);
2088 }
2089 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002091 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002092
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002093 if (from_len == to_len) {
2094 if (from_len == 1) {
2095 return replace_single_character_in_place(
2096 self,
2097 from_s[0],
2098 to_s[0],
2099 maxcount);
2100 } else {
2101 return replace_substring_in_place(
2102 self, from_s, from_len, to_s, to_len,
2103 maxcount);
2104 }
2105 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002106
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002107 /* Otherwise use the more generic algorithms */
2108 if (from_len == 1) {
2109 return replace_single_character(self, from_s[0],
2110 to_s, to_len, maxcount);
2111 } else {
2112 /* len('from')>=2, len('to')>=1 */
2113 return replace_substring(self, from_s, from_len, to_s, to_len,
2114 maxcount);
2115 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002116}
2117
2118PyDoc_STRVAR(replace__doc__,
2119"B.replace(old, new[, count]) -> bytes\n\
2120\n\
2121Return a copy of B with all occurrences of subsection\n\
2122old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002123given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002124
2125static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002126bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002127{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002128 Py_ssize_t count = -1;
2129 PyObject *from, *to;
2130 const char *from_s, *to_s;
2131 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002132
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002133 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2134 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002135
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002136 if (PyBytes_Check(from)) {
2137 from_s = PyBytes_AS_STRING(from);
2138 from_len = PyBytes_GET_SIZE(from);
2139 }
2140 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2141 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002143 if (PyBytes_Check(to)) {
2144 to_s = PyBytes_AS_STRING(to);
2145 to_len = PyBytes_GET_SIZE(to);
2146 }
2147 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2148 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002149
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002150 return (PyObject *)replace((PyBytesObject *) self,
2151 from_s, from_len,
2152 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002153}
2154
2155/** End DALKE **/
2156
2157/* Matches the end (direction >= 0) or start (direction < 0) of self
2158 * against substr, using the start and end arguments. Returns
2159 * -1 on error, 0 if not found and 1 if found.
2160 */
2161Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002162_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002163 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002164{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002165 Py_ssize_t len = PyBytes_GET_SIZE(self);
2166 Py_ssize_t slen;
2167 const char* sub;
2168 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002169
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002170 if (PyBytes_Check(substr)) {
2171 sub = PyBytes_AS_STRING(substr);
2172 slen = PyBytes_GET_SIZE(substr);
2173 }
2174 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2175 return -1;
2176 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002177
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002178 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002180 if (direction < 0) {
2181 /* startswith */
2182 if (start+slen > len)
2183 return 0;
2184 } else {
2185 /* endswith */
2186 if (end-start < slen || start > len)
2187 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002188
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002189 if (end-slen > start)
2190 start = end - slen;
2191 }
2192 if (end-start >= slen)
2193 return ! memcmp(str+start, sub, slen);
2194 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002195}
2196
2197
2198PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002199"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002200\n\
2201Return True if B starts with the specified prefix, False otherwise.\n\
2202With optional start, test B beginning at that position.\n\
2203With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002204prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002205
2206static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002207bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002208{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002209 Py_ssize_t start = 0;
2210 Py_ssize_t end = PY_SSIZE_T_MAX;
2211 PyObject *subobj;
2212 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002213
Jesus Ceaac451502011-04-20 17:09:23 +02002214 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002215 return NULL;
2216 if (PyTuple_Check(subobj)) {
2217 Py_ssize_t i;
2218 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2219 result = _bytes_tailmatch(self,
2220 PyTuple_GET_ITEM(subobj, i),
2221 start, end, -1);
2222 if (result == -1)
2223 return NULL;
2224 else if (result) {
2225 Py_RETURN_TRUE;
2226 }
2227 }
2228 Py_RETURN_FALSE;
2229 }
2230 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002231 if (result == -1) {
2232 if (PyErr_ExceptionMatches(PyExc_TypeError))
2233 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2234 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002235 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002236 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002237 else
2238 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002239}
2240
2241
2242PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002243"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002244\n\
2245Return True if B ends with the specified suffix, False otherwise.\n\
2246With optional start, test B beginning at that position.\n\
2247With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002248suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002249
2250static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002251bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002252{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002253 Py_ssize_t start = 0;
2254 Py_ssize_t end = PY_SSIZE_T_MAX;
2255 PyObject *subobj;
2256 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002257
Jesus Ceaac451502011-04-20 17:09:23 +02002258 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002259 return NULL;
2260 if (PyTuple_Check(subobj)) {
2261 Py_ssize_t i;
2262 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2263 result = _bytes_tailmatch(self,
2264 PyTuple_GET_ITEM(subobj, i),
2265 start, end, +1);
2266 if (result == -1)
2267 return NULL;
2268 else if (result) {
2269 Py_RETURN_TRUE;
2270 }
2271 }
2272 Py_RETURN_FALSE;
2273 }
2274 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002275 if (result == -1) {
2276 if (PyErr_ExceptionMatches(PyExc_TypeError))
2277 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2278 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002279 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002280 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002281 else
2282 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002283}
2284
2285
2286PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002287"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002288\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002289Decode B using the codec registered for encoding. Default encoding\n\
2290is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002291handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2292a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002293as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002294able to handle UnicodeDecodeErrors.");
2295
2296static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002297bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002298{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002299 const char *encoding = NULL;
2300 const char *errors = NULL;
2301 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002302
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002303 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2304 return NULL;
2305 if (encoding == NULL)
2306 encoding = PyUnicode_GetDefaultEncoding();
2307 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002308}
2309
Guido van Rossum20188312006-05-05 15:15:40 +00002310
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002311PyDoc_STRVAR(splitlines__doc__,
2312"B.splitlines([keepends]) -> list of lines\n\
2313\n\
2314Return a list of the lines in B, breaking at line boundaries.\n\
2315Line breaks are not included in the resulting list unless keepends\n\
2316is given and true.");
2317
2318static PyObject*
2319bytes_splitlines(PyObject *self, PyObject *args)
2320{
2321 int keepends = 0;
2322
2323 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002324 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002325
2326 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002327 (PyObject*) self, PyBytes_AS_STRING(self),
2328 PyBytes_GET_SIZE(self), keepends
2329 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002330}
2331
2332
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002333PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002334"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002335\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002336Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002337Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002338Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002339
2340static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002341hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002342{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002343 if (c >= 128)
2344 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002345 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002346 return c - '0';
2347 else {
David Malcolm96960882010-11-05 17:23:41 +00002348 if (Py_ISUPPER(c))
2349 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002350 if (c >= 'a' && c <= 'f')
2351 return c - 'a' + 10;
2352 }
2353 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002354}
2355
2356static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002357bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002358{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002359 PyObject *newstring, *hexobj;
2360 char *buf;
2361 Py_UNICODE *hex;
2362 Py_ssize_t hexlen, byteslen, i, j;
2363 int top, bot;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002364
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002365 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2366 return NULL;
2367 assert(PyUnicode_Check(hexobj));
2368 hexlen = PyUnicode_GET_SIZE(hexobj);
2369 hex = PyUnicode_AS_UNICODE(hexobj);
2370 byteslen = hexlen/2; /* This overestimates if there are spaces */
2371 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2372 if (!newstring)
2373 return NULL;
2374 buf = PyBytes_AS_STRING(newstring);
2375 for (i = j = 0; i < hexlen; i += 2) {
2376 /* skip over spaces in the input */
2377 while (hex[i] == ' ')
2378 i++;
2379 if (i >= hexlen)
2380 break;
2381 top = hex_digit_to_int(hex[i]);
2382 bot = hex_digit_to_int(hex[i+1]);
2383 if (top == -1 || bot == -1) {
2384 PyErr_Format(PyExc_ValueError,
2385 "non-hexadecimal number found in "
2386 "fromhex() arg at position %zd", i);
2387 goto error;
2388 }
2389 buf[j++] = (top << 4) + bot;
2390 }
2391 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2392 goto error;
2393 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002394
2395 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002396 Py_XDECREF(newstring);
2397 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002398}
2399
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002400PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002401"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002402
2403static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002404bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002405{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002406 Py_ssize_t res;
2407 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2408 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002409}
2410
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002411
2412static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002413bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002414{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002415 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002416}
2417
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002418
2419static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002420bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002421 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2422 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2423 _Py_capitalize__doc__},
2424 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2425 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2426 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2427 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2428 endswith__doc__},
2429 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2430 expandtabs__doc__},
2431 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2432 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2433 fromhex_doc},
2434 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2435 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2436 _Py_isalnum__doc__},
2437 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2438 _Py_isalpha__doc__},
2439 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2440 _Py_isdigit__doc__},
2441 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2442 _Py_islower__doc__},
2443 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2444 _Py_isspace__doc__},
2445 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2446 _Py_istitle__doc__},
2447 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2448 _Py_isupper__doc__},
2449 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2450 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2451 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2452 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2453 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2454 _Py_maketrans__doc__},
2455 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2456 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2457 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2458 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2459 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2460 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2461 rpartition__doc__},
2462 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2463 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
2464 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2465 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS,
2466 splitlines__doc__},
2467 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2468 startswith__doc__},
2469 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2470 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2471 _Py_swapcase__doc__},
2472 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2473 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2474 translate__doc__},
2475 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2476 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2477 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2478 sizeof__doc__},
2479 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002480};
2481
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002482static PyObject *
2483str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2484
2485static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002486bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002487{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002488 PyObject *x = NULL;
2489 const char *encoding = NULL;
2490 const char *errors = NULL;
2491 PyObject *new = NULL;
2492 Py_ssize_t size;
2493 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002494
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002495 if (type != &PyBytes_Type)
2496 return str_subtype_new(type, args, kwds);
2497 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2498 &encoding, &errors))
2499 return NULL;
2500 if (x == NULL) {
2501 if (encoding != NULL || errors != NULL) {
2502 PyErr_SetString(PyExc_TypeError,
2503 "encoding or errors without sequence "
2504 "argument");
2505 return NULL;
2506 }
2507 return PyBytes_FromString("");
2508 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002509
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002510 if (PyUnicode_Check(x)) {
2511 /* Encode via the codec registry */
2512 if (encoding == NULL) {
2513 PyErr_SetString(PyExc_TypeError,
2514 "string argument without an encoding");
2515 return NULL;
2516 }
2517 new = PyUnicode_AsEncodedString(x, encoding, errors);
2518 if (new == NULL)
2519 return NULL;
2520 assert(PyBytes_Check(new));
2521 return new;
2522 }
2523 /* Is it an integer? */
2524 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2525 if (size == -1 && PyErr_Occurred()) {
2526 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2527 return NULL;
2528 PyErr_Clear();
2529 }
2530 else if (size < 0) {
2531 PyErr_SetString(PyExc_ValueError, "negative count");
2532 return NULL;
2533 }
2534 else {
2535 new = PyBytes_FromStringAndSize(NULL, size);
2536 if (new == NULL) {
2537 return NULL;
2538 }
2539 if (size > 0) {
2540 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2541 }
2542 return new;
2543 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002544
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002545 /* If it's not unicode, there can't be encoding or errors */
2546 if (encoding != NULL || errors != NULL) {
2547 PyErr_SetString(PyExc_TypeError,
2548 "encoding or errors without a string argument");
2549 return NULL;
2550 }
2551 return PyObject_Bytes(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002552}
2553
2554PyObject *
2555PyBytes_FromObject(PyObject *x)
2556{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002557 PyObject *new, *it;
2558 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002559
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002560 if (x == NULL) {
2561 PyErr_BadInternalCall();
2562 return NULL;
2563 }
2564 /* Use the modern buffer interface */
2565 if (PyObject_CheckBuffer(x)) {
2566 Py_buffer view;
2567 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2568 return NULL;
2569 new = PyBytes_FromStringAndSize(NULL, view.len);
2570 if (!new)
2571 goto fail;
2572 /* XXX(brett.cannon): Better way to get to internal buffer? */
2573 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2574 &view, view.len, 'C') < 0)
2575 goto fail;
2576 PyBuffer_Release(&view);
2577 return new;
2578 fail:
2579 Py_XDECREF(new);
2580 PyBuffer_Release(&view);
2581 return NULL;
2582 }
2583 if (PyUnicode_Check(x)) {
2584 PyErr_SetString(PyExc_TypeError,
2585 "cannot convert unicode object to bytes");
2586 return NULL;
2587 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002588
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002589 if (PyList_CheckExact(x)) {
2590 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2591 if (new == NULL)
2592 return NULL;
2593 for (i = 0; i < Py_SIZE(x); i++) {
2594 Py_ssize_t value = PyNumber_AsSsize_t(
2595 PyList_GET_ITEM(x, i), PyExc_ValueError);
2596 if (value == -1 && PyErr_Occurred()) {
2597 Py_DECREF(new);
2598 return NULL;
2599 }
2600 if (value < 0 || value >= 256) {
2601 PyErr_SetString(PyExc_ValueError,
2602 "bytes must be in range(0, 256)");
2603 Py_DECREF(new);
2604 return NULL;
2605 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002606 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002607 }
2608 return new;
2609 }
2610 if (PyTuple_CheckExact(x)) {
2611 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2612 if (new == NULL)
2613 return NULL;
2614 for (i = 0; i < Py_SIZE(x); i++) {
2615 Py_ssize_t value = PyNumber_AsSsize_t(
2616 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2617 if (value == -1 && PyErr_Occurred()) {
2618 Py_DECREF(new);
2619 return NULL;
2620 }
2621 if (value < 0 || value >= 256) {
2622 PyErr_SetString(PyExc_ValueError,
2623 "bytes must be in range(0, 256)");
2624 Py_DECREF(new);
2625 return NULL;
2626 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002627 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002628 }
2629 return new;
2630 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002631
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002632 /* For iterator version, create a string object and resize as needed */
2633 size = _PyObject_LengthHint(x, 64);
2634 if (size == -1 && PyErr_Occurred())
2635 return NULL;
2636 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2637 returning a shared empty bytes string. This required because we
2638 want to call _PyBytes_Resize() the returned object, which we can
2639 only do on bytes objects with refcount == 1. */
2640 size += 1;
2641 new = PyBytes_FromStringAndSize(NULL, size);
2642 if (new == NULL)
2643 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002644
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002645 /* Get the iterator */
2646 it = PyObject_GetIter(x);
2647 if (it == NULL)
2648 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002649
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002650 /* Run the iterator to exhaustion */
2651 for (i = 0; ; i++) {
2652 PyObject *item;
2653 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002654
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002655 /* Get the next item */
2656 item = PyIter_Next(it);
2657 if (item == NULL) {
2658 if (PyErr_Occurred())
2659 goto error;
2660 break;
2661 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002662
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002663 /* Interpret it as an int (__index__) */
2664 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2665 Py_DECREF(item);
2666 if (value == -1 && PyErr_Occurred())
2667 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002668
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002669 /* Range check */
2670 if (value < 0 || value >= 256) {
2671 PyErr_SetString(PyExc_ValueError,
2672 "bytes must be in range(0, 256)");
2673 goto error;
2674 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002675
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002676 /* Append the byte */
2677 if (i >= size) {
2678 size = 2 * size + 1;
2679 if (_PyBytes_Resize(&new, size) < 0)
2680 goto error;
2681 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002682 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002683 }
2684 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002685
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002686 /* Clean up and return success */
2687 Py_DECREF(it);
2688 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002689
2690 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002691 /* Error handling when new != NULL */
2692 Py_XDECREF(it);
2693 Py_DECREF(new);
2694 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002695}
2696
2697static PyObject *
2698str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2699{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002700 PyObject *tmp, *pnew;
2701 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002702
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002703 assert(PyType_IsSubtype(type, &PyBytes_Type));
2704 tmp = bytes_new(&PyBytes_Type, args, kwds);
2705 if (tmp == NULL)
2706 return NULL;
2707 assert(PyBytes_CheckExact(tmp));
2708 n = PyBytes_GET_SIZE(tmp);
2709 pnew = type->tp_alloc(type, n);
2710 if (pnew != NULL) {
2711 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2712 PyBytes_AS_STRING(tmp), n+1);
2713 ((PyBytesObject *)pnew)->ob_shash =
2714 ((PyBytesObject *)tmp)->ob_shash;
2715 }
2716 Py_DECREF(tmp);
2717 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002718}
2719
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002720PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002721"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002722bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002723bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2724bytes(memory_view) -> bytes\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002725\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002726Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002727 - an iterable yielding integers in range(256)\n\
2728 - a text string encoded using the specified encoding\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002729 - a bytes or a buffer object\n\
2730 - any object implementing the buffer API.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002731
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002732static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002733
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002734PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002735 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2736 "bytes",
2737 PyBytesObject_SIZE,
2738 sizeof(char),
2739 bytes_dealloc, /* tp_dealloc */
2740 0, /* tp_print */
2741 0, /* tp_getattr */
2742 0, /* tp_setattr */
2743 0, /* tp_reserved */
2744 (reprfunc)bytes_repr, /* tp_repr */
2745 0, /* tp_as_number */
2746 &bytes_as_sequence, /* tp_as_sequence */
2747 &bytes_as_mapping, /* tp_as_mapping */
2748 (hashfunc)bytes_hash, /* tp_hash */
2749 0, /* tp_call */
2750 bytes_str, /* tp_str */
2751 PyObject_GenericGetAttr, /* tp_getattro */
2752 0, /* tp_setattro */
2753 &bytes_as_buffer, /* tp_as_buffer */
2754 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2755 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2756 bytes_doc, /* tp_doc */
2757 0, /* tp_traverse */
2758 0, /* tp_clear */
2759 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2760 0, /* tp_weaklistoffset */
2761 bytes_iter, /* tp_iter */
2762 0, /* tp_iternext */
2763 bytes_methods, /* tp_methods */
2764 0, /* tp_members */
2765 0, /* tp_getset */
2766 &PyBaseObject_Type, /* tp_base */
2767 0, /* tp_dict */
2768 0, /* tp_descr_get */
2769 0, /* tp_descr_set */
2770 0, /* tp_dictoffset */
2771 0, /* tp_init */
2772 0, /* tp_alloc */
2773 bytes_new, /* tp_new */
2774 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002775};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002776
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002777void
2778PyBytes_Concat(register PyObject **pv, register PyObject *w)
2779{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002780 register PyObject *v;
2781 assert(pv != NULL);
2782 if (*pv == NULL)
2783 return;
2784 if (w == NULL) {
2785 Py_DECREF(*pv);
2786 *pv = NULL;
2787 return;
2788 }
2789 v = bytes_concat(*pv, w);
2790 Py_DECREF(*pv);
2791 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002792}
2793
2794void
2795PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
2796{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002797 PyBytes_Concat(pv, w);
2798 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002799}
2800
2801
2802/* The following function breaks the notion that strings are immutable:
2803 it changes the size of a string. We get away with this only if there
2804 is only one module referencing the object. You can also think of it
2805 as creating a new string object and destroying the old one, only
2806 more efficiently. In any case, don't use this if the string may
2807 already be known to some other part of the code...
2808 Note that if there's not enough memory to resize the string, the original
2809 string object at *pv is deallocated, *pv is set to NULL, an "out of
2810 memory" exception is set, and -1 is returned. Else (on success) 0 is
2811 returned, and the value in *pv may or may not be the same as on input.
2812 As always, an extra byte is allocated for a trailing \0 byte (newsize
2813 does *not* include that), and a trailing \0 byte is stored.
2814*/
2815
2816int
2817_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2818{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002819 register PyObject *v;
2820 register PyBytesObject *sv;
2821 v = *pv;
2822 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2823 *pv = 0;
2824 Py_DECREF(v);
2825 PyErr_BadInternalCall();
2826 return -1;
2827 }
2828 /* XXX UNREF/NEWREF interface should be more symmetrical */
2829 _Py_DEC_REFTOTAL;
2830 _Py_ForgetReference(v);
2831 *pv = (PyObject *)
2832 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2833 if (*pv == NULL) {
2834 PyObject_Del(v);
2835 PyErr_NoMemory();
2836 return -1;
2837 }
2838 _Py_NewReference(*pv);
2839 sv = (PyBytesObject *) *pv;
2840 Py_SIZE(sv) = newsize;
2841 sv->ob_sval[newsize] = '\0';
2842 sv->ob_shash = -1; /* invalidate cached hash value */
2843 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002844}
2845
2846/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
2847 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2848 * Python's regular ints.
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002849 * Return value: a new PyBytes*, or NULL if error.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002850 * . *pbuf is set to point into it,
2851 * *plen set to the # of chars following that.
2852 * Caller must decref it when done using pbuf.
2853 * The string starting at *pbuf is of the form
2854 * "-"? ("0x" | "0X")? digit+
2855 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2856 * set in flags. The case of hex digits will be correct,
2857 * There will be at least prec digits, zero-filled on the left if
2858 * necessary to get that many.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002859 * val object to be converted
2860 * flags bitmask of format flags; only F_ALT is looked at
2861 * prec minimum number of digits; 0-fill on left if needed
2862 * type a character in [duoxX]; u acts the same as d
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002863 *
2864 * CAUTION: o, x and X conversions on regular ints can never
2865 * produce a '-' sign, but can for Python's unbounded ints.
2866 */
2867PyObject*
2868_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002869 char **pbuf, int *plen)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002870{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002871 PyObject *result = NULL;
2872 char *buf;
2873 Py_ssize_t i;
2874 int sign; /* 1 if '-', else 0 */
2875 int len; /* number of characters */
2876 Py_ssize_t llen;
2877 int numdigits; /* len == numnondigits + numdigits */
2878 int numnondigits = 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002879
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002880 /* Avoid exceeding SSIZE_T_MAX */
2881 if (prec > INT_MAX-3) {
2882 PyErr_SetString(PyExc_OverflowError,
2883 "precision too large");
2884 return NULL;
2885 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002886
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002887 switch (type) {
2888 case 'd':
2889 case 'u':
2890 /* Special-case boolean: we want 0/1 */
2891 if (PyBool_Check(val))
2892 result = PyNumber_ToBase(val, 10);
2893 else
2894 result = Py_TYPE(val)->tp_str(val);
2895 break;
2896 case 'o':
2897 numnondigits = 2;
2898 result = PyNumber_ToBase(val, 8);
2899 break;
2900 case 'x':
2901 case 'X':
2902 numnondigits = 2;
2903 result = PyNumber_ToBase(val, 16);
2904 break;
2905 default:
2906 assert(!"'type' not in [duoxX]");
2907 }
2908 if (!result)
2909 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002910
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002911 buf = _PyUnicode_AsString(result);
2912 if (!buf) {
2913 Py_DECREF(result);
2914 return NULL;
2915 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002916
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002917 /* To modify the string in-place, there can only be one reference. */
2918 if (Py_REFCNT(result) != 1) {
2919 PyErr_BadInternalCall();
2920 return NULL;
2921 }
2922 llen = PyUnicode_GetSize(result);
2923 if (llen > INT_MAX) {
2924 PyErr_SetString(PyExc_ValueError,
2925 "string too large in _PyBytes_FormatLong");
2926 return NULL;
2927 }
2928 len = (int)llen;
2929 if (buf[len-1] == 'L') {
2930 --len;
2931 buf[len] = '\0';
2932 }
2933 sign = buf[0] == '-';
2934 numnondigits += sign;
2935 numdigits = len - numnondigits;
2936 assert(numdigits > 0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002937
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002938 /* Get rid of base marker unless F_ALT */
2939 if (((flags & F_ALT) == 0 &&
2940 (type == 'o' || type == 'x' || type == 'X'))) {
2941 assert(buf[sign] == '0');
2942 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
2943 buf[sign+1] == 'o');
2944 numnondigits -= 2;
2945 buf += 2;
2946 len -= 2;
2947 if (sign)
2948 buf[0] = '-';
2949 assert(len == numnondigits + numdigits);
2950 assert(numdigits > 0);
2951 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002952
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002953 /* Fill with leading zeroes to meet minimum width. */
2954 if (prec > numdigits) {
2955 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
2956 numnondigits + prec);
2957 char *b1;
2958 if (!r1) {
2959 Py_DECREF(result);
2960 return NULL;
2961 }
2962 b1 = PyBytes_AS_STRING(r1);
2963 for (i = 0; i < numnondigits; ++i)
2964 *b1++ = *buf++;
2965 for (i = 0; i < prec - numdigits; i++)
2966 *b1++ = '0';
2967 for (i = 0; i < numdigits; i++)
2968 *b1++ = *buf++;
2969 *b1 = '\0';
2970 Py_DECREF(result);
2971 result = r1;
2972 buf = PyBytes_AS_STRING(result);
2973 len = numnondigits + prec;
2974 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002975
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002976 /* Fix up case for hex conversions. */
2977 if (type == 'X') {
2978 /* Need to convert all lower case letters to upper case.
2979 and need to convert 0x to 0X (and -0x to -0X). */
2980 for (i = 0; i < len; i++)
2981 if (buf[i] >= 'a' && buf[i] <= 'x')
2982 buf[i] -= 'a'-'A';
2983 }
2984 *pbuf = buf;
2985 *plen = len;
2986 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002987}
2988
2989void
2990PyBytes_Fini(void)
2991{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002992 int i;
2993 for (i = 0; i < UCHAR_MAX + 1; i++) {
2994 Py_XDECREF(characters[i]);
2995 characters[i] = NULL;
2996 }
2997 Py_XDECREF(nullstring);
2998 nullstring = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002999}
3000
Benjamin Peterson4116f362008-05-27 00:36:20 +00003001/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003002
3003typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003004 PyObject_HEAD
3005 Py_ssize_t it_index;
3006 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003007} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003008
3009static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003010striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003011{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003012 _PyObject_GC_UNTRACK(it);
3013 Py_XDECREF(it->it_seq);
3014 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003015}
3016
3017static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003018striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003019{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003020 Py_VISIT(it->it_seq);
3021 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003022}
3023
3024static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003025striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003026{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003027 PyBytesObject *seq;
3028 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003029
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003030 assert(it != NULL);
3031 seq = it->it_seq;
3032 if (seq == NULL)
3033 return NULL;
3034 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003035
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003036 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3037 item = PyLong_FromLong(
3038 (unsigned char)seq->ob_sval[it->it_index]);
3039 if (item != NULL)
3040 ++it->it_index;
3041 return item;
3042 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003043
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003044 Py_DECREF(seq);
3045 it->it_seq = NULL;
3046 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003047}
3048
3049static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003050striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003051{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003052 Py_ssize_t len = 0;
3053 if (it->it_seq)
3054 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3055 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003056}
3057
3058PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003059 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003060
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003061static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003062 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3063 length_hint_doc},
3064 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003065};
3066
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003067PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003068 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3069 "bytes_iterator", /* tp_name */
3070 sizeof(striterobject), /* tp_basicsize */
3071 0, /* tp_itemsize */
3072 /* methods */
3073 (destructor)striter_dealloc, /* tp_dealloc */
3074 0, /* tp_print */
3075 0, /* tp_getattr */
3076 0, /* tp_setattr */
3077 0, /* tp_reserved */
3078 0, /* tp_repr */
3079 0, /* tp_as_number */
3080 0, /* tp_as_sequence */
3081 0, /* tp_as_mapping */
3082 0, /* tp_hash */
3083 0, /* tp_call */
3084 0, /* tp_str */
3085 PyObject_GenericGetAttr, /* tp_getattro */
3086 0, /* tp_setattro */
3087 0, /* tp_as_buffer */
3088 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3089 0, /* tp_doc */
3090 (traverseproc)striter_traverse, /* tp_traverse */
3091 0, /* tp_clear */
3092 0, /* tp_richcompare */
3093 0, /* tp_weaklistoffset */
3094 PyObject_SelfIter, /* tp_iter */
3095 (iternextfunc)striter_next, /* tp_iternext */
3096 striter_methods, /* tp_methods */
3097 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003098};
3099
3100static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003101bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003102{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003103 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003104
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003105 if (!PyBytes_Check(seq)) {
3106 PyErr_BadInternalCall();
3107 return NULL;
3108 }
3109 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3110 if (it == NULL)
3111 return NULL;
3112 it->it_index = 0;
3113 Py_INCREF(seq);
3114 it->it_seq = (PyBytesObject *)seq;
3115 _PyObject_GC_TRACK(it);
3116 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003117}