blob: 2d7f16da0d4f737f6a0d8b68c8c363b1fc93e6f8 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroud1188562010-06-09 16:38:55 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044 For PyBytes_FromString(), the parameter `str' points to a null-terminated
45 string containing exactly `size' bytes.
46
47 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
48 either NULL or else points to a string containing at least `size' bytes.
49 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
50 not have to be null-terminated. (Therefore it is safe to construct a
51 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
52 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
53 bytes (setting the last byte to the null terminating character) and you can
54 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000055 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000056 alter the data yourself, since the strings may be shared.
57
58 The PyObject member `op->ob_size', which denotes the number of "extra
59 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020060 allocated for string data, not counting the null terminating character.
61 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000062 PyBytes_FromStringAndSize()) or the length of the string in the `str'
63 parameter (for PyBytes_FromString()).
64*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000065PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000066PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000067{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 register PyBytesObject *op;
69 if (size < 0) {
70 PyErr_SetString(PyExc_SystemError,
71 "Negative size passed to PyBytes_FromStringAndSize");
72 return NULL;
73 }
74 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000076 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000077#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 Py_INCREF(op);
79 return (PyObject *)op;
80 }
81 if (size == 1 && str != NULL &&
82 (op = characters[*str & UCHAR_MAX]) != NULL)
83 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000084#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000086#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 Py_INCREF(op);
88 return (PyObject *)op;
89 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
92 PyErr_SetString(PyExc_OverflowError,
93 "byte string is too large");
94 return NULL;
95 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 /* Inline PyObject_NewVar */
98 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
99 if (op == NULL)
100 return PyErr_NoMemory();
101 PyObject_INIT_VAR(op, &PyBytes_Type, size);
102 op->ob_shash = -1;
103 if (str != NULL)
104 Py_MEMCPY(op->ob_sval, str, size);
105 op->ob_sval[size] = '\0';
106 /* share short strings */
107 if (size == 0) {
108 nullstring = op;
109 Py_INCREF(op);
110 } else if (size == 1 && str != NULL) {
111 characters[*str & UCHAR_MAX] = op;
112 Py_INCREF(op);
113 }
114 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000115}
116
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000117PyObject *
118PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000119{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 register size_t size;
121 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000122
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000123 assert(str != NULL);
124 size = strlen(str);
125 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
126 PyErr_SetString(PyExc_OverflowError,
127 "byte string is too long");
128 return NULL;
129 }
130 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000131#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000133#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 Py_INCREF(op);
135 return (PyObject *)op;
136 }
137 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000138#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000140#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
143 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 /* Inline PyObject_NewVar */
146 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
147 if (op == NULL)
148 return PyErr_NoMemory();
149 PyObject_INIT_VAR(op, &PyBytes_Type, size);
150 op->ob_shash = -1;
151 Py_MEMCPY(op->ob_sval, str, size+1);
152 /* share short strings */
153 if (size == 0) {
154 nullstring = op;
155 Py_INCREF(op);
156 } else if (size == 1) {
157 characters[*str & UCHAR_MAX] = op;
158 Py_INCREF(op);
159 }
160 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000161}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000162
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000163PyObject *
164PyBytes_FromFormatV(const char *format, va_list vargs)
165{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 va_list count;
167 Py_ssize_t n = 0;
168 const char* f;
169 char *s;
170 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000171
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000172 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000173 /* step 1: figure out how large a buffer we need */
174 for (f = format; *f; f++) {
175 if (*f == '%') {
176 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000177 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000178 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 * they don't affect the amount of space we reserve.
182 */
183 if ((*f == 'l' || *f == 'z') &&
184 (f[1] == 'd' || f[1] == 'u'))
185 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 switch (*f) {
188 case 'c':
189 (void)va_arg(count, int);
190 /* fall through... */
191 case '%':
192 n++;
193 break;
194 case 'd': case 'u': case 'i': case 'x':
195 (void) va_arg(count, int);
196 /* 20 bytes is enough to hold a 64-bit
197 integer. Decimal takes the most space.
198 This isn't enough for octal. */
199 n += 20;
200 break;
201 case 's':
202 s = va_arg(count, char*);
203 n += strlen(s);
204 break;
205 case 'p':
206 (void) va_arg(count, int);
207 /* maximum 64-bit pointer representation:
208 * 0xffffffffffffffff
209 * so 19 characters is enough.
210 * XXX I count 18 -- what's the extra for?
211 */
212 n += 19;
213 break;
214 default:
215 /* if we stumble upon an unknown
216 formatting code, copy the rest of
217 the format string to the output
218 string. (we cannot just skip the
219 code, since there's no way to know
220 what's in the argument list) */
221 n += strlen(p);
222 goto expand;
223 }
224 } else
225 n++;
226 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000227 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 /* step 2: fill the buffer */
229 /* Since we've analyzed how much space we need for the worst case,
230 use sprintf directly instead of the slower PyOS_snprintf. */
231 string = PyBytes_FromStringAndSize(NULL, n);
232 if (!string)
233 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000236
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 for (f = format; *f; f++) {
238 if (*f == '%') {
239 const char* p = f++;
240 Py_ssize_t i;
241 int longflag = 0;
242 int size_tflag = 0;
243 /* parse the width.precision part (we're only
244 interested in the precision value, if any) */
245 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000246 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000247 n = (n*10) + *f++ - '0';
248 if (*f == '.') {
249 f++;
250 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000251 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 n = (n*10) + *f++ - '0';
253 }
David Malcolm96960882010-11-05 17:23:41 +0000254 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 f++;
256 /* handle the long flag, but only for %ld and %lu.
257 others can be added when necessary. */
258 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
259 longflag = 1;
260 ++f;
261 }
262 /* handle the size_t flag. */
263 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
264 size_tflag = 1;
265 ++f;
266 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000267
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000268 switch (*f) {
269 case 'c':
270 *s++ = va_arg(vargs, int);
271 break;
272 case 'd':
273 if (longflag)
274 sprintf(s, "%ld", va_arg(vargs, long));
275 else if (size_tflag)
276 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
278 else
279 sprintf(s, "%d", va_arg(vargs, int));
280 s += strlen(s);
281 break;
282 case 'u':
283 if (longflag)
284 sprintf(s, "%lu",
285 va_arg(vargs, unsigned long));
286 else if (size_tflag)
287 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
288 va_arg(vargs, size_t));
289 else
290 sprintf(s, "%u",
291 va_arg(vargs, unsigned int));
292 s += strlen(s);
293 break;
294 case 'i':
295 sprintf(s, "%i", va_arg(vargs, int));
296 s += strlen(s);
297 break;
298 case 'x':
299 sprintf(s, "%x", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 's':
303 p = va_arg(vargs, char*);
304 i = strlen(p);
305 if (n > 0 && i > n)
306 i = n;
307 Py_MEMCPY(s, p, i);
308 s += i;
309 break;
310 case 'p':
311 sprintf(s, "%p", va_arg(vargs, void*));
312 /* %p is ill-defined: ensure leading 0x. */
313 if (s[1] == 'X')
314 s[1] = 'x';
315 else if (s[1] != 'x') {
316 memmove(s+2, s, strlen(s)+1);
317 s[0] = '0';
318 s[1] = 'x';
319 }
320 s += strlen(s);
321 break;
322 case '%':
323 *s++ = '%';
324 break;
325 default:
326 strcpy(s, p);
327 s += strlen(s);
328 goto end;
329 }
330 } else
331 *s++ = *f;
332 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000333
334 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000335 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
336 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000337}
338
339PyObject *
340PyBytes_FromFormat(const char *format, ...)
341{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000342 PyObject* ret;
343 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000344
345#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000347#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000349#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 ret = PyBytes_FromFormatV(format, vargs);
351 va_end(vargs);
352 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000353}
354
355static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000356bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000357{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000358 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000359}
360
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361/* Unescape a backslash-escaped string. If unicode is non-zero,
362 the string is a u-literal. If recode_encoding is non-zero,
363 the string is UTF-8 encoded and should be re-encoded in the
364 specified encoding. */
365
366PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 Py_ssize_t len,
368 const char *errors,
369 Py_ssize_t unicode,
370 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 int c;
373 char *p, *buf;
374 const char *end;
375 PyObject *v;
376 Py_ssize_t newlen = recode_encoding ? 4*len:len;
377 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
378 if (v == NULL)
379 return NULL;
380 p = buf = PyBytes_AsString(v);
381 end = s + len;
382 while (s < end) {
383 if (*s != '\\') {
384 non_esc:
385 if (recode_encoding && (*s & 0x80)) {
386 PyObject *u, *w;
387 char *r;
388 const char* t;
389 Py_ssize_t rn;
390 t = s;
391 /* Decode non-ASCII bytes as UTF-8. */
392 while (t < end && (*t & 0x80)) t++;
393 u = PyUnicode_DecodeUTF8(s, t - s, errors);
394 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 /* Recode them in target encoding. */
397 w = PyUnicode_AsEncodedString(
398 u, recode_encoding, errors);
399 Py_DECREF(u);
400 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000401
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402 /* Append bytes to output buffer. */
403 assert(PyBytes_Check(w));
404 r = PyBytes_AS_STRING(w);
405 rn = PyBytes_GET_SIZE(w);
406 Py_MEMCPY(p, r, rn);
407 p += rn;
408 Py_DECREF(w);
409 s = t;
410 } else {
411 *p++ = *s++;
412 }
413 continue;
414 }
415 s++;
416 if (s==end) {
417 PyErr_SetString(PyExc_ValueError,
418 "Trailing \\ in string");
419 goto failed;
420 }
421 switch (*s++) {
422 /* XXX This assumes ASCII! */
423 case '\n': break;
424 case '\\': *p++ = '\\'; break;
425 case '\'': *p++ = '\''; break;
426 case '\"': *p++ = '\"'; break;
427 case 'b': *p++ = '\b'; break;
428 case 'f': *p++ = '\014'; break; /* FF */
429 case 't': *p++ = '\t'; break;
430 case 'n': *p++ = '\n'; break;
431 case 'r': *p++ = '\r'; break;
432 case 'v': *p++ = '\013'; break; /* VT */
433 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
434 case '0': case '1': case '2': case '3':
435 case '4': case '5': case '6': case '7':
436 c = s[-1] - '0';
437 if (s < end && '0' <= *s && *s <= '7') {
438 c = (c<<3) + *s++ - '0';
439 if (s < end && '0' <= *s && *s <= '7')
440 c = (c<<3) + *s++ - '0';
441 }
442 *p++ = c;
443 break;
444 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000445 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000446 unsigned int x = 0;
447 c = Py_CHARMASK(*s);
448 s++;
David Malcolm96960882010-11-05 17:23:41 +0000449 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000451 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 x = 10 + c - 'a';
453 else
454 x = 10 + c - 'A';
455 x = x << 4;
456 c = Py_CHARMASK(*s);
457 s++;
David Malcolm96960882010-11-05 17:23:41 +0000458 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000460 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 x += 10 + c - 'a';
462 else
463 x += 10 + c - 'A';
464 *p++ = x;
465 break;
466 }
467 if (!errors || strcmp(errors, "strict") == 0) {
468 PyErr_SetString(PyExc_ValueError,
469 "invalid \\x escape");
470 goto failed;
471 }
472 if (strcmp(errors, "replace") == 0) {
473 *p++ = '?';
474 } else if (strcmp(errors, "ignore") == 0)
475 /* do nothing */;
476 else {
477 PyErr_Format(PyExc_ValueError,
478 "decoding error; unknown "
479 "error handling code: %.400s",
480 errors);
481 goto failed;
482 }
483 default:
484 *p++ = '\\';
485 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200486 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000487 UTF-8 bytes may follow. */
488 }
489 }
490 if (p-buf < newlen)
491 _PyBytes_Resize(&v, p - buf);
492 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000493 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000494 Py_DECREF(v);
495 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000496}
497
498/* -------------------------------------------------------------------- */
499/* object api */
500
501Py_ssize_t
502PyBytes_Size(register PyObject *op)
503{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000504 if (!PyBytes_Check(op)) {
505 PyErr_Format(PyExc_TypeError,
506 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
507 return -1;
508 }
509 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000510}
511
512char *
513PyBytes_AsString(register PyObject *op)
514{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000515 if (!PyBytes_Check(op)) {
516 PyErr_Format(PyExc_TypeError,
517 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
518 return NULL;
519 }
520 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000521}
522
523int
524PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000525 register char **s,
526 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000527{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000528 if (s == NULL) {
529 PyErr_BadInternalCall();
530 return -1;
531 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000532
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000533 if (!PyBytes_Check(obj)) {
534 PyErr_Format(PyExc_TypeError,
535 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
536 return -1;
537 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000538
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 *s = PyBytes_AS_STRING(obj);
540 if (len != NULL)
541 *len = PyBytes_GET_SIZE(obj);
542 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
543 PyErr_SetString(PyExc_TypeError,
544 "expected bytes with no null");
545 return -1;
546 }
547 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000548}
Neal Norwitz6968b052007-02-27 19:02:19 +0000549
550/* -------------------------------------------------------------------- */
551/* Methods */
552
Eric Smith0923d1d2009-04-16 20:16:10 +0000553#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000554
555#include "stringlib/fastsearch.h"
556#include "stringlib/count.h"
557#include "stringlib/find.h"
558#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000559#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000560#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000561
Eric Smith0f78bff2009-11-30 01:01:42 +0000562#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000563
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000564PyObject *
565PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000566{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000567 static const char *hexdigits = "0123456789abcdef";
568 register PyBytesObject* op = (PyBytesObject*) obj;
569 Py_ssize_t length = Py_SIZE(op);
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000570 size_t newsize;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000571 PyObject *v;
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000572 if (length > (PY_SSIZE_T_MAX - 3) / 4) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000573 PyErr_SetString(PyExc_OverflowError,
574 "bytes object is too large to make repr");
575 return NULL;
576 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000577 newsize = 3 + 4 * length;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000578 v = PyUnicode_FromUnicode(NULL, newsize);
579 if (v == NULL) {
580 return NULL;
581 }
582 else {
583 register Py_ssize_t i;
584 register Py_UNICODE c;
585 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
586 int quote;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000587
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000588 /* Figure out which quote to use; single is preferred */
589 quote = '\'';
590 if (smartquotes) {
591 char *test, *start;
592 start = PyBytes_AS_STRING(op);
593 for (test = start; test < start+length; ++test) {
594 if (*test == '"') {
595 quote = '\''; /* back to single */
596 goto decided;
597 }
598 else if (*test == '\'')
599 quote = '"';
600 }
601 decided:
602 ;
603 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000604
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000605 *p++ = 'b', *p++ = quote;
606 for (i = 0; i < length; i++) {
607 /* There's at least enough room for a hex escape
608 and a closing quote. */
609 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
610 c = op->ob_sval[i];
611 if (c == quote || c == '\\')
612 *p++ = '\\', *p++ = c;
613 else if (c == '\t')
614 *p++ = '\\', *p++ = 't';
615 else if (c == '\n')
616 *p++ = '\\', *p++ = 'n';
617 else if (c == '\r')
618 *p++ = '\\', *p++ = 'r';
619 else if (c < ' ' || c >= 0x7f) {
620 *p++ = '\\';
621 *p++ = 'x';
622 *p++ = hexdigits[(c & 0xf0) >> 4];
623 *p++ = hexdigits[c & 0xf];
624 }
625 else
626 *p++ = c;
627 }
628 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
629 *p++ = quote;
630 *p = '\0';
631 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
632 Py_DECREF(v);
633 return NULL;
634 }
635 return v;
636 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000637}
638
Neal Norwitz6968b052007-02-27 19:02:19 +0000639static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000640bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000641{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000642 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000643}
644
Neal Norwitz6968b052007-02-27 19:02:19 +0000645static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000646bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000647{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000648 if (Py_BytesWarningFlag) {
649 if (PyErr_WarnEx(PyExc_BytesWarning,
650 "str() on a bytes instance", 1))
651 return NULL;
652 }
653 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000654}
655
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000656static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000657bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000658{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000659 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000660}
Neal Norwitz6968b052007-02-27 19:02:19 +0000661
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000662/* This is also used by PyBytes_Concat() */
663static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000664bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000665{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000666 Py_ssize_t size;
667 Py_buffer va, vb;
668 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000669
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000670 va.len = -1;
671 vb.len = -1;
672 if (_getbuffer(a, &va) < 0 ||
673 _getbuffer(b, &vb) < 0) {
674 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
675 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
676 goto done;
677 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000678
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000679 /* Optimize end cases */
680 if (va.len == 0 && PyBytes_CheckExact(b)) {
681 result = b;
682 Py_INCREF(result);
683 goto done;
684 }
685 if (vb.len == 0 && PyBytes_CheckExact(a)) {
686 result = a;
687 Py_INCREF(result);
688 goto done;
689 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000691 size = va.len + vb.len;
692 if (size < 0) {
693 PyErr_NoMemory();
694 goto done;
695 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000696
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000697 result = PyBytes_FromStringAndSize(NULL, size);
698 if (result != NULL) {
699 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
700 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
701 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000702
703 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000704 if (va.len != -1)
705 PyBuffer_Release(&va);
706 if (vb.len != -1)
707 PyBuffer_Release(&vb);
708 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000709}
Neal Norwitz6968b052007-02-27 19:02:19 +0000710
711static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000712bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000713{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000714 register Py_ssize_t i;
715 register Py_ssize_t j;
716 register Py_ssize_t size;
717 register PyBytesObject *op;
718 size_t nbytes;
719 if (n < 0)
720 n = 0;
721 /* watch out for overflows: the size can overflow int,
722 * and the # of bytes needed can overflow size_t
723 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000724 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000725 PyErr_SetString(PyExc_OverflowError,
726 "repeated bytes are too long");
727 return NULL;
728 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000729 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000730 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
731 Py_INCREF(a);
732 return (PyObject *)a;
733 }
734 nbytes = (size_t)size;
735 if (nbytes + PyBytesObject_SIZE <= nbytes) {
736 PyErr_SetString(PyExc_OverflowError,
737 "repeated bytes are too long");
738 return NULL;
739 }
740 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
741 if (op == NULL)
742 return PyErr_NoMemory();
743 PyObject_INIT_VAR(op, &PyBytes_Type, size);
744 op->ob_shash = -1;
745 op->ob_sval[size] = '\0';
746 if (Py_SIZE(a) == 1 && n > 0) {
747 memset(op->ob_sval, a->ob_sval[0] , n);
748 return (PyObject *) op;
749 }
750 i = 0;
751 if (i < size) {
752 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
753 i = Py_SIZE(a);
754 }
755 while (i < size) {
756 j = (i <= size-i) ? i : size-i;
757 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
758 i += j;
759 }
760 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000761}
762
Guido van Rossum98297ee2007-11-06 21:34:58 +0000763static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000764bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000765{
766 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
767 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000768 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000769 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000770 PyErr_Clear();
771 if (_getbuffer(arg, &varg) < 0)
772 return -1;
773 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
774 varg.buf, varg.len, 0);
775 PyBuffer_Release(&varg);
776 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000777 }
778 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000779 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
780 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000781 }
782
Antoine Pitrou0010d372010-08-15 17:12:55 +0000783 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000784}
785
Neal Norwitz6968b052007-02-27 19:02:19 +0000786static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000787bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000788{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000789 if (i < 0 || i >= Py_SIZE(a)) {
790 PyErr_SetString(PyExc_IndexError, "index out of range");
791 return NULL;
792 }
793 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000794}
795
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000796static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000797bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000798{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000799 int c;
800 Py_ssize_t len_a, len_b;
801 Py_ssize_t min_len;
802 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000803
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000804 /* Make sure both arguments are strings. */
805 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
806 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
807 (PyObject_IsInstance((PyObject*)a,
808 (PyObject*)&PyUnicode_Type) ||
809 PyObject_IsInstance((PyObject*)b,
810 (PyObject*)&PyUnicode_Type))) {
811 if (PyErr_WarnEx(PyExc_BytesWarning,
812 "Comparison between bytes and string", 1))
813 return NULL;
814 }
815 result = Py_NotImplemented;
816 goto out;
817 }
818 if (a == b) {
819 switch (op) {
820 case Py_EQ:case Py_LE:case Py_GE:
821 result = Py_True;
822 goto out;
823 case Py_NE:case Py_LT:case Py_GT:
824 result = Py_False;
825 goto out;
826 }
827 }
828 if (op == Py_EQ) {
829 /* Supporting Py_NE here as well does not save
830 much time, since Py_NE is rarely used. */
831 if (Py_SIZE(a) == Py_SIZE(b)
832 && (a->ob_sval[0] == b->ob_sval[0]
833 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
834 result = Py_True;
835 } else {
836 result = Py_False;
837 }
838 goto out;
839 }
840 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
841 min_len = (len_a < len_b) ? len_a : len_b;
842 if (min_len > 0) {
843 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
844 if (c==0)
845 c = memcmp(a->ob_sval, b->ob_sval, min_len);
846 } else
847 c = 0;
848 if (c == 0)
849 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
850 switch (op) {
851 case Py_LT: c = c < 0; break;
852 case Py_LE: c = c <= 0; break;
853 case Py_EQ: assert(0); break; /* unreachable */
854 case Py_NE: c = c != 0; break;
855 case Py_GT: c = c > 0; break;
856 case Py_GE: c = c >= 0; break;
857 default:
858 result = Py_NotImplemented;
859 goto out;
860 }
861 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000862 out:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000863 Py_INCREF(result);
864 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000865}
866
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000867static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000868bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000869{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000870 register Py_ssize_t len;
871 register unsigned char *p;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000872 register Py_hash_t x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000873
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000874 if (a->ob_shash != -1)
875 return a->ob_shash;
876 len = Py_SIZE(a);
877 p = (unsigned char *) a->ob_sval;
878 x = *p << 7;
879 while (--len >= 0)
880 x = (1000003*x) ^ *p++;
881 x ^= Py_SIZE(a);
882 if (x == -1)
883 x = -2;
884 a->ob_shash = x;
885 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000886}
887
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000888static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000889bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000890{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000891 if (PyIndex_Check(item)) {
892 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
893 if (i == -1 && PyErr_Occurred())
894 return NULL;
895 if (i < 0)
896 i += PyBytes_GET_SIZE(self);
897 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
898 PyErr_SetString(PyExc_IndexError,
899 "index out of range");
900 return NULL;
901 }
902 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
903 }
904 else if (PySlice_Check(item)) {
905 Py_ssize_t start, stop, step, slicelength, cur, i;
906 char* source_buf;
907 char* result_buf;
908 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000909
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000910 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000911 PyBytes_GET_SIZE(self),
912 &start, &stop, &step, &slicelength) < 0) {
913 return NULL;
914 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000915
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000916 if (slicelength <= 0) {
917 return PyBytes_FromStringAndSize("", 0);
918 }
919 else if (start == 0 && step == 1 &&
920 slicelength == PyBytes_GET_SIZE(self) &&
921 PyBytes_CheckExact(self)) {
922 Py_INCREF(self);
923 return (PyObject *)self;
924 }
925 else if (step == 1) {
926 return PyBytes_FromStringAndSize(
927 PyBytes_AS_STRING(self) + start,
928 slicelength);
929 }
930 else {
931 source_buf = PyBytes_AS_STRING(self);
932 result = PyBytes_FromStringAndSize(NULL, slicelength);
933 if (result == NULL)
934 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000935
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000936 result_buf = PyBytes_AS_STRING(result);
937 for (cur = start, i = 0; i < slicelength;
938 cur += step, i++) {
939 result_buf[i] = source_buf[cur];
940 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000941
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000942 return result;
943 }
944 }
945 else {
946 PyErr_Format(PyExc_TypeError,
947 "byte indices must be integers, not %.200s",
948 Py_TYPE(item)->tp_name);
949 return NULL;
950 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000951}
952
953static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000954bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000955{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000956 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
957 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000958}
959
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000960static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000961 (lenfunc)bytes_length, /*sq_length*/
962 (binaryfunc)bytes_concat, /*sq_concat*/
963 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
964 (ssizeargfunc)bytes_item, /*sq_item*/
965 0, /*sq_slice*/
966 0, /*sq_ass_item*/
967 0, /*sq_ass_slice*/
968 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000969};
970
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000971static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000972 (lenfunc)bytes_length,
973 (binaryfunc)bytes_subscript,
974 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000975};
976
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000977static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000978 (getbufferproc)bytes_buffer_getbuffer,
979 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000980};
981
982
983#define LEFTSTRIP 0
984#define RIGHTSTRIP 1
985#define BOTHSTRIP 2
986
987/* Arrays indexed by above */
988static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
989
990#define STRIPNAME(i) (stripformat[i]+3)
991
Neal Norwitz6968b052007-02-27 19:02:19 +0000992PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000993"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +0000994\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +0000995Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000996If sep is not specified or is None, B is split on ASCII whitespace\n\
997characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +0000998If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +0000999
1000static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001001bytes_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001002{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001003 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1004 Py_ssize_t maxsplit = -1;
1005 const char *s = PyBytes_AS_STRING(self), *sub;
1006 Py_buffer vsub;
1007 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001008
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001009 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1010 return NULL;
1011 if (maxsplit < 0)
1012 maxsplit = PY_SSIZE_T_MAX;
1013 if (subobj == Py_None)
1014 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1015 if (_getbuffer(subobj, &vsub) < 0)
1016 return NULL;
1017 sub = vsub.buf;
1018 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001019
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1021 PyBuffer_Release(&vsub);
1022 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001023}
1024
Neal Norwitz6968b052007-02-27 19:02:19 +00001025PyDoc_STRVAR(partition__doc__,
1026"B.partition(sep) -> (head, sep, tail)\n\
1027\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001028Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001029the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001030found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001031
1032static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001033bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001034{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001035 const char *sep;
1036 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001037
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001038 if (PyBytes_Check(sep_obj)) {
1039 sep = PyBytes_AS_STRING(sep_obj);
1040 sep_len = PyBytes_GET_SIZE(sep_obj);
1041 }
1042 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1043 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001044
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001045 return stringlib_partition(
1046 (PyObject*) self,
1047 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1048 sep_obj, sep, sep_len
1049 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001050}
1051
1052PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001053"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001054\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001055Search for the separator sep in B, starting at the end of B,\n\
1056and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001057part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001058bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001059
1060static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001061bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001062{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001063 const char *sep;
1064 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001065
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001066 if (PyBytes_Check(sep_obj)) {
1067 sep = PyBytes_AS_STRING(sep_obj);
1068 sep_len = PyBytes_GET_SIZE(sep_obj);
1069 }
1070 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1071 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001072
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001073 return stringlib_rpartition(
1074 (PyObject*) self,
1075 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1076 sep_obj, sep, sep_len
1077 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001078}
1079
Neal Norwitz6968b052007-02-27 19:02:19 +00001080PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001081"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001082\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001083Return a list of the sections in B, using sep as the delimiter,\n\
1084starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001085If sep is not given, B is split on ASCII whitespace characters\n\
1086(space, tab, return, newline, formfeed, vertical tab).\n\
1087If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001088
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001089
Neal Norwitz6968b052007-02-27 19:02:19 +00001090static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001091bytes_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001092{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001093 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1094 Py_ssize_t maxsplit = -1;
1095 const char *s = PyBytes_AS_STRING(self), *sub;
1096 Py_buffer vsub;
1097 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001098
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001099 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1100 return NULL;
1101 if (maxsplit < 0)
1102 maxsplit = PY_SSIZE_T_MAX;
1103 if (subobj == Py_None)
1104 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1105 if (_getbuffer(subobj, &vsub) < 0)
1106 return NULL;
1107 sub = vsub.buf;
1108 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001109
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001110 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1111 PyBuffer_Release(&vsub);
1112 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001113}
1114
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001115
1116PyDoc_STRVAR(join__doc__,
1117"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001118\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001119Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001120Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1121
Neal Norwitz6968b052007-02-27 19:02:19 +00001122static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001123bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001124{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001125 char *sep = PyBytes_AS_STRING(self);
1126 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1127 PyObject *res = NULL;
1128 char *p;
1129 Py_ssize_t seqlen = 0;
1130 size_t sz = 0;
1131 Py_ssize_t i;
1132 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001134 seq = PySequence_Fast(orig, "");
1135 if (seq == NULL) {
1136 return NULL;
1137 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001138
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001139 seqlen = PySequence_Size(seq);
1140 if (seqlen == 0) {
1141 Py_DECREF(seq);
1142 return PyBytes_FromString("");
1143 }
1144 if (seqlen == 1) {
1145 item = PySequence_Fast_GET_ITEM(seq, 0);
1146 if (PyBytes_CheckExact(item)) {
1147 Py_INCREF(item);
1148 Py_DECREF(seq);
1149 return item;
1150 }
1151 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001152
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001153 /* There are at least two things to join, or else we have a subclass
1154 * of the builtin types in the sequence.
1155 * Do a pre-pass to figure out the total amount of space we'll
1156 * need (sz), and see whether all argument are bytes.
1157 */
1158 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1159 for (i = 0; i < seqlen; i++) {
1160 const size_t old_sz = sz;
1161 item = PySequence_Fast_GET_ITEM(seq, i);
1162 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1163 PyErr_Format(PyExc_TypeError,
1164 "sequence item %zd: expected bytes,"
1165 " %.80s found",
1166 i, Py_TYPE(item)->tp_name);
1167 Py_DECREF(seq);
1168 return NULL;
1169 }
1170 sz += Py_SIZE(item);
1171 if (i != 0)
1172 sz += seplen;
1173 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1174 PyErr_SetString(PyExc_OverflowError,
1175 "join() result is too long for bytes");
1176 Py_DECREF(seq);
1177 return NULL;
1178 }
1179 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001180
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001181 /* Allocate result space. */
1182 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1183 if (res == NULL) {
1184 Py_DECREF(seq);
1185 return NULL;
1186 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001187
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001188 /* Catenate everything. */
1189 /* I'm not worried about a PyByteArray item growing because there's
1190 nowhere in this function where we release the GIL. */
1191 p = PyBytes_AS_STRING(res);
1192 for (i = 0; i < seqlen; ++i) {
1193 size_t n;
1194 char *q;
1195 if (i) {
1196 Py_MEMCPY(p, sep, seplen);
1197 p += seplen;
1198 }
1199 item = PySequence_Fast_GET_ITEM(seq, i);
1200 n = Py_SIZE(item);
1201 if (PyBytes_Check(item))
1202 q = PyBytes_AS_STRING(item);
1203 else
1204 q = PyByteArray_AS_STRING(item);
1205 Py_MEMCPY(p, q, n);
1206 p += n;
1207 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001208
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001209 Py_DECREF(seq);
1210 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001211}
1212
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001213PyObject *
1214_PyBytes_Join(PyObject *sep, PyObject *x)
1215{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001216 assert(sep != NULL && PyBytes_Check(sep));
1217 assert(x != NULL);
1218 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001219}
1220
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001221/* helper macro to fixup start/end slice values */
1222#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001223 if (end > len) \
1224 end = len; \
1225 else if (end < 0) { \
1226 end += len; \
1227 if (end < 0) \
1228 end = 0; \
1229 } \
1230 if (start < 0) { \
1231 start += len; \
1232 if (start < 0) \
1233 start = 0; \
1234 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001235
1236Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001237bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001238{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001239 PyObject *subobj;
1240 const char *sub;
1241 Py_ssize_t sub_len;
1242 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1243 PyObject *obj_start=Py_None, *obj_end=Py_None;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001244
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001245 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1246 &obj_start, &obj_end))
1247 return -2;
1248 /* To support None in "start" and "end" arguments, meaning
1249 the same as if they were not passed.
1250 */
1251 if (obj_start != Py_None)
1252 if (!_PyEval_SliceIndex(obj_start, &start))
1253 return -2;
1254 if (obj_end != Py_None)
1255 if (!_PyEval_SliceIndex(obj_end, &end))
1256 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001257
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001258 if (PyBytes_Check(subobj)) {
1259 sub = PyBytes_AS_STRING(subobj);
1260 sub_len = PyBytes_GET_SIZE(subobj);
1261 }
1262 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1263 /* XXX - the "expected a character buffer object" is pretty
1264 confusing for a non-expert. remap to something else ? */
1265 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001266
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001267 if (dir > 0)
1268 return stringlib_find_slice(
1269 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1270 sub, sub_len, start, end);
1271 else
1272 return stringlib_rfind_slice(
1273 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1274 sub, sub_len, start, end);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001275}
1276
1277
1278PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001279"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001280\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001281Return the lowest index in B where substring sub is found,\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001282such that sub is contained within s[start:end]. Optional\n\
1283arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001284\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001285Return -1 on failure.");
1286
Neal Norwitz6968b052007-02-27 19:02:19 +00001287static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001288bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001289{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001290 Py_ssize_t result = bytes_find_internal(self, args, +1);
1291 if (result == -2)
1292 return NULL;
1293 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001294}
1295
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001296
1297PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001298"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001299\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001300Like B.find() but raise ValueError when the substring is not found.");
1301
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001302static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001303bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001304{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001305 Py_ssize_t result = bytes_find_internal(self, args, +1);
1306 if (result == -2)
1307 return NULL;
1308 if (result == -1) {
1309 PyErr_SetString(PyExc_ValueError,
1310 "substring not found");
1311 return NULL;
1312 }
1313 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001314}
1315
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001316
1317PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001318"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001319\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001320Return the highest index in B where substring sub is found,\n\
1321such that sub is contained within s[start:end]. Optional\n\
1322arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001323\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001324Return -1 on failure.");
1325
Neal Norwitz6968b052007-02-27 19:02:19 +00001326static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001327bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001328{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001329 Py_ssize_t result = bytes_find_internal(self, args, -1);
1330 if (result == -2)
1331 return NULL;
1332 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001333}
1334
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001335
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001336PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001337"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001338\n\
1339Like B.rfind() but raise ValueError when the substring is not found.");
1340
1341static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001342bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001343{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001344 Py_ssize_t result = bytes_find_internal(self, args, -1);
1345 if (result == -2)
1346 return NULL;
1347 if (result == -1) {
1348 PyErr_SetString(PyExc_ValueError,
1349 "substring not found");
1350 return NULL;
1351 }
1352 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001353}
1354
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001355
1356Py_LOCAL_INLINE(PyObject *)
1357do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001358{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001359 Py_buffer vsep;
1360 char *s = PyBytes_AS_STRING(self);
1361 Py_ssize_t len = PyBytes_GET_SIZE(self);
1362 char *sep;
1363 Py_ssize_t seplen;
1364 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001365
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001366 if (_getbuffer(sepobj, &vsep) < 0)
1367 return NULL;
1368 sep = vsep.buf;
1369 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001370
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001371 i = 0;
1372 if (striptype != RIGHTSTRIP) {
1373 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1374 i++;
1375 }
1376 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001377
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001378 j = len;
1379 if (striptype != LEFTSTRIP) {
1380 do {
1381 j--;
1382 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1383 j++;
1384 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001385
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001386 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001387
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001388 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1389 Py_INCREF(self);
1390 return (PyObject*)self;
1391 }
1392 else
1393 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001394}
1395
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001396
1397Py_LOCAL_INLINE(PyObject *)
1398do_strip(PyBytesObject *self, int striptype)
1399{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001400 char *s = PyBytes_AS_STRING(self);
1401 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001402
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001403 i = 0;
1404 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001405 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001406 i++;
1407 }
1408 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001409
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001410 j = len;
1411 if (striptype != LEFTSTRIP) {
1412 do {
1413 j--;
David Malcolm96960882010-11-05 17:23:41 +00001414 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 j++;
1416 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001417
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001418 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1419 Py_INCREF(self);
1420 return (PyObject*)self;
1421 }
1422 else
1423 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001424}
1425
1426
1427Py_LOCAL_INLINE(PyObject *)
1428do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1429{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001431
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001432 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1433 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001434
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001435 if (sep != NULL && sep != Py_None) {
1436 return do_xstrip(self, striptype, sep);
1437 }
1438 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001439}
1440
1441
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001442PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001443"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001444\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001445Strip leading and trailing bytes contained in the argument.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001446If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001447static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001448bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001449{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001450 if (PyTuple_GET_SIZE(args) == 0)
1451 return do_strip(self, BOTHSTRIP); /* Common case */
1452 else
1453 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001454}
1455
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001456
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001457PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001458"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001459\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001460Strip leading bytes contained in the argument.\n\
1461If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001462static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001463bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001464{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001465 if (PyTuple_GET_SIZE(args) == 0)
1466 return do_strip(self, LEFTSTRIP); /* Common case */
1467 else
1468 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001469}
1470
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001471
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001472PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001473"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001474\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001475Strip trailing bytes contained in the argument.\n\
1476If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001477static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001478bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001479{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001480 if (PyTuple_GET_SIZE(args) == 0)
1481 return do_strip(self, RIGHTSTRIP); /* Common case */
1482 else
1483 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001484}
Neal Norwitz6968b052007-02-27 19:02:19 +00001485
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001486
1487PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001488"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001489\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001490Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001491string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001492as in slice notation.");
1493
1494static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001495bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001496{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001497 PyObject *sub_obj;
1498 const char *str = PyBytes_AS_STRING(self), *sub;
1499 Py_ssize_t sub_len;
1500 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001501
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001502 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1503 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1504 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001505
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001506 if (PyBytes_Check(sub_obj)) {
1507 sub = PyBytes_AS_STRING(sub_obj);
1508 sub_len = PyBytes_GET_SIZE(sub_obj);
1509 }
1510 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1511 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001512
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001513 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001514
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001515 return PyLong_FromSsize_t(
1516 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1517 );
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001518}
1519
1520
1521PyDoc_STRVAR(translate__doc__,
1522"B.translate(table[, deletechars]) -> bytes\n\
1523\n\
1524Return a copy of B, where all characters occurring in the\n\
1525optional argument deletechars are removed, and the remaining\n\
1526characters have been mapped through the given translation\n\
1527table, which must be a bytes object of length 256.");
1528
1529static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001530bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001531{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001532 register char *input, *output;
1533 const char *table;
1534 register Py_ssize_t i, c, changed = 0;
1535 PyObject *input_obj = (PyObject*)self;
1536 const char *output_start, *del_table=NULL;
1537 Py_ssize_t inlen, tablen, dellen = 0;
1538 PyObject *result;
1539 int trans_table[256];
1540 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001541
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001542 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1543 &tableobj, &delobj))
1544 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001545
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001546 if (PyBytes_Check(tableobj)) {
1547 table = PyBytes_AS_STRING(tableobj);
1548 tablen = PyBytes_GET_SIZE(tableobj);
1549 }
1550 else if (tableobj == Py_None) {
1551 table = NULL;
1552 tablen = 256;
1553 }
1554 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1555 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001556
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001557 if (tablen != 256) {
1558 PyErr_SetString(PyExc_ValueError,
1559 "translation table must be 256 characters long");
1560 return NULL;
1561 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001562
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001563 if (delobj != NULL) {
1564 if (PyBytes_Check(delobj)) {
1565 del_table = PyBytes_AS_STRING(delobj);
1566 dellen = PyBytes_GET_SIZE(delobj);
1567 }
1568 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1569 return NULL;
1570 }
1571 else {
1572 del_table = NULL;
1573 dellen = 0;
1574 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001575
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001576 inlen = PyBytes_GET_SIZE(input_obj);
1577 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1578 if (result == NULL)
1579 return NULL;
1580 output_start = output = PyBytes_AsString(result);
1581 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001582
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001583 if (dellen == 0 && table != NULL) {
1584 /* If no deletions are required, use faster code */
1585 for (i = inlen; --i >= 0; ) {
1586 c = Py_CHARMASK(*input++);
1587 if (Py_CHARMASK((*output++ = table[c])) != c)
1588 changed = 1;
1589 }
1590 if (changed || !PyBytes_CheckExact(input_obj))
1591 return result;
1592 Py_DECREF(result);
1593 Py_INCREF(input_obj);
1594 return input_obj;
1595 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001596
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001597 if (table == NULL) {
1598 for (i = 0; i < 256; i++)
1599 trans_table[i] = Py_CHARMASK(i);
1600 } else {
1601 for (i = 0; i < 256; i++)
1602 trans_table[i] = Py_CHARMASK(table[i]);
1603 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001604
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001605 for (i = 0; i < dellen; i++)
1606 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001607
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001608 for (i = inlen; --i >= 0; ) {
1609 c = Py_CHARMASK(*input++);
1610 if (trans_table[c] != -1)
1611 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1612 continue;
1613 changed = 1;
1614 }
1615 if (!changed && PyBytes_CheckExact(input_obj)) {
1616 Py_DECREF(result);
1617 Py_INCREF(input_obj);
1618 return input_obj;
1619 }
1620 /* Fix the size of the resulting string */
1621 if (inlen > 0)
1622 _PyBytes_Resize(&result, output - output_start);
1623 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001624}
1625
1626
Georg Brandlabc38772009-04-12 15:51:51 +00001627static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001628bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001629{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001630 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001631}
1632
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001633/* find and count characters and substrings */
1634
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001635#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001636 ((char *)memchr((const void *)(target), c, target_len))
1637
1638/* String ops must return a string. */
1639/* If the object is subclass of string, create a copy */
1640Py_LOCAL(PyBytesObject *)
1641return_self(PyBytesObject *self)
1642{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643 if (PyBytes_CheckExact(self)) {
1644 Py_INCREF(self);
1645 return self;
1646 }
1647 return (PyBytesObject *)PyBytes_FromStringAndSize(
1648 PyBytes_AS_STRING(self),
1649 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001650}
1651
1652Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001653countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001654{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001655 Py_ssize_t count=0;
1656 const char *start=target;
1657 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001658
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001659 while ( (start=findchar(start, end-start, c)) != NULL ) {
1660 count++;
1661 if (count >= maxcount)
1662 break;
1663 start += 1;
1664 }
1665 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001666}
1667
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001668
1669/* Algorithms for different cases of string replacement */
1670
1671/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1672Py_LOCAL(PyBytesObject *)
1673replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001674 const char *to_s, Py_ssize_t to_len,
1675 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001676{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001677 char *self_s, *result_s;
1678 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001679 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001680 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001681
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001682 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001683
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001684 /* 1 at the end plus 1 after every character;
1685 count = min(maxcount, self_len + 1) */
1686 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001687 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001688 else
1689 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1690 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001691
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001692 /* Check for overflow */
1693 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001694 assert(count > 0);
1695 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001696 PyErr_SetString(PyExc_OverflowError,
1697 "replacement bytes are too long");
1698 return NULL;
1699 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001700 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001701
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001702 if (! (result = (PyBytesObject *)
1703 PyBytes_FromStringAndSize(NULL, result_len)) )
1704 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001705
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001706 self_s = PyBytes_AS_STRING(self);
1707 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001708
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001709 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001710
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001711 /* Lay the first one down (guaranteed this will occur) */
1712 Py_MEMCPY(result_s, to_s, to_len);
1713 result_s += to_len;
1714 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001715
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001716 for (i=0; i<count; i++) {
1717 *result_s++ = *self_s++;
1718 Py_MEMCPY(result_s, to_s, to_len);
1719 result_s += to_len;
1720 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001721
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001722 /* Copy the rest of the original string */
1723 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001724
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001725 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001726}
1727
1728/* Special case for deleting a single character */
1729/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1730Py_LOCAL(PyBytesObject *)
1731replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001732 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001733{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001734 char *self_s, *result_s;
1735 char *start, *next, *end;
1736 Py_ssize_t self_len, result_len;
1737 Py_ssize_t count;
1738 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001739
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001740 self_len = PyBytes_GET_SIZE(self);
1741 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001742
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001743 count = countchar(self_s, self_len, from_c, maxcount);
1744 if (count == 0) {
1745 return return_self(self);
1746 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001747
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001748 result_len = self_len - count; /* from_len == 1 */
1749 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001750
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001751 if ( (result = (PyBytesObject *)
1752 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1753 return NULL;
1754 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001755
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001756 start = self_s;
1757 end = self_s + self_len;
1758 while (count-- > 0) {
1759 next = findchar(start, end-start, from_c);
1760 if (next == NULL)
1761 break;
1762 Py_MEMCPY(result_s, start, next-start);
1763 result_s += (next-start);
1764 start = next+1;
1765 }
1766 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001767
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001768 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001769}
1770
1771/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1772
1773Py_LOCAL(PyBytesObject *)
1774replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001775 const char *from_s, Py_ssize_t from_len,
1776 Py_ssize_t maxcount) {
1777 char *self_s, *result_s;
1778 char *start, *next, *end;
1779 Py_ssize_t self_len, result_len;
1780 Py_ssize_t count, offset;
1781 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001782
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001783 self_len = PyBytes_GET_SIZE(self);
1784 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001785
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001786 count = stringlib_count(self_s, self_len,
1787 from_s, from_len,
1788 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001789
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001790 if (count == 0) {
1791 /* no matches */
1792 return return_self(self);
1793 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001794
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001795 result_len = self_len - (count * from_len);
1796 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001797
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001798 if ( (result = (PyBytesObject *)
1799 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1800 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001801
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001802 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001803
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001804 start = self_s;
1805 end = self_s + self_len;
1806 while (count-- > 0) {
1807 offset = stringlib_find(start, end-start,
1808 from_s, from_len,
1809 0);
1810 if (offset == -1)
1811 break;
1812 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001813
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001814 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001815
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001816 result_s += (next-start);
1817 start = next+from_len;
1818 }
1819 Py_MEMCPY(result_s, start, end-start);
1820 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001821}
1822
1823/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1824Py_LOCAL(PyBytesObject *)
1825replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001826 char from_c, char to_c,
1827 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001828{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001829 char *self_s, *result_s, *start, *end, *next;
1830 Py_ssize_t self_len;
1831 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001832
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001833 /* The result string will be the same size */
1834 self_s = PyBytes_AS_STRING(self);
1835 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001836
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001837 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001838
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001839 if (next == NULL) {
1840 /* No matches; return the original string */
1841 return return_self(self);
1842 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001843
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001844 /* Need to make a new string */
1845 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1846 if (result == NULL)
1847 return NULL;
1848 result_s = PyBytes_AS_STRING(result);
1849 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001850
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001851 /* change everything in-place, starting with this one */
1852 start = result_s + (next-self_s);
1853 *start = to_c;
1854 start++;
1855 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001856
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001857 while (--maxcount > 0) {
1858 next = findchar(start, end-start, from_c);
1859 if (next == NULL)
1860 break;
1861 *next = to_c;
1862 start = next+1;
1863 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001864
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001865 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001866}
1867
1868/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1869Py_LOCAL(PyBytesObject *)
1870replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001871 const char *from_s, Py_ssize_t from_len,
1872 const char *to_s, Py_ssize_t to_len,
1873 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001874{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001875 char *result_s, *start, *end;
1876 char *self_s;
1877 Py_ssize_t self_len, offset;
1878 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001879
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001880 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001881
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001882 self_s = PyBytes_AS_STRING(self);
1883 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001884
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001885 offset = stringlib_find(self_s, self_len,
1886 from_s, from_len,
1887 0);
1888 if (offset == -1) {
1889 /* No matches; return the original string */
1890 return return_self(self);
1891 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001892
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001893 /* Need to make a new string */
1894 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1895 if (result == NULL)
1896 return NULL;
1897 result_s = PyBytes_AS_STRING(result);
1898 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001899
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001900 /* change everything in-place, starting with this one */
1901 start = result_s + offset;
1902 Py_MEMCPY(start, to_s, from_len);
1903 start += from_len;
1904 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001905
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001906 while ( --maxcount > 0) {
1907 offset = stringlib_find(start, end-start,
1908 from_s, from_len,
1909 0);
1910 if (offset==-1)
1911 break;
1912 Py_MEMCPY(start+offset, to_s, from_len);
1913 start += offset+from_len;
1914 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001915
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001916 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001917}
1918
1919/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1920Py_LOCAL(PyBytesObject *)
1921replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001922 char from_c,
1923 const char *to_s, Py_ssize_t to_len,
1924 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001925{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001926 char *self_s, *result_s;
1927 char *start, *next, *end;
1928 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001929 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001930 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001931
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001932 self_s = PyBytes_AS_STRING(self);
1933 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001934
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001935 count = countchar(self_s, self_len, from_c, maxcount);
1936 if (count == 0) {
1937 /* no matches, return unchanged */
1938 return return_self(self);
1939 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001940
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001941 /* use the difference between current and new, hence the "-1" */
1942 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001943 assert(count > 0);
1944 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001945 PyErr_SetString(PyExc_OverflowError,
1946 "replacement bytes are too long");
1947 return NULL;
1948 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001949 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001950
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001951 if ( (result = (PyBytesObject *)
1952 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1953 return NULL;
1954 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001955
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001956 start = self_s;
1957 end = self_s + self_len;
1958 while (count-- > 0) {
1959 next = findchar(start, end-start, from_c);
1960 if (next == NULL)
1961 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001962
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001963 if (next == start) {
1964 /* replace with the 'to' */
1965 Py_MEMCPY(result_s, to_s, to_len);
1966 result_s += to_len;
1967 start += 1;
1968 } else {
1969 /* copy the unchanged old then the 'to' */
1970 Py_MEMCPY(result_s, start, next-start);
1971 result_s += (next-start);
1972 Py_MEMCPY(result_s, to_s, to_len);
1973 result_s += to_len;
1974 start = next+1;
1975 }
1976 }
1977 /* Copy the remainder of the remaining string */
1978 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001979
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001980 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001981}
1982
1983/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1984Py_LOCAL(PyBytesObject *)
1985replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001986 const char *from_s, Py_ssize_t from_len,
1987 const char *to_s, Py_ssize_t to_len,
1988 Py_ssize_t maxcount) {
1989 char *self_s, *result_s;
1990 char *start, *next, *end;
1991 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001992 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001993 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001994
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001995 self_s = PyBytes_AS_STRING(self);
1996 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001997
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001998 count = stringlib_count(self_s, self_len,
1999 from_s, from_len,
2000 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002001
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002002 if (count == 0) {
2003 /* no matches, return unchanged */
2004 return return_self(self);
2005 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002006
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002007 /* Check for overflow */
2008 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002009 assert(count > 0);
2010 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002011 PyErr_SetString(PyExc_OverflowError,
2012 "replacement bytes are too long");
2013 return NULL;
2014 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002015 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002016
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002017 if ( (result = (PyBytesObject *)
2018 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2019 return NULL;
2020 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002021
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002022 start = self_s;
2023 end = self_s + self_len;
2024 while (count-- > 0) {
2025 offset = stringlib_find(start, end-start,
2026 from_s, from_len,
2027 0);
2028 if (offset == -1)
2029 break;
2030 next = start+offset;
2031 if (next == start) {
2032 /* replace with the 'to' */
2033 Py_MEMCPY(result_s, to_s, to_len);
2034 result_s += to_len;
2035 start += from_len;
2036 } else {
2037 /* copy the unchanged old then the 'to' */
2038 Py_MEMCPY(result_s, start, next-start);
2039 result_s += (next-start);
2040 Py_MEMCPY(result_s, to_s, to_len);
2041 result_s += to_len;
2042 start = next+from_len;
2043 }
2044 }
2045 /* Copy the remainder of the remaining string */
2046 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002047
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002048 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002049}
2050
2051
2052Py_LOCAL(PyBytesObject *)
2053replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002054 const char *from_s, Py_ssize_t from_len,
2055 const char *to_s, Py_ssize_t to_len,
2056 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002057{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002058 if (maxcount < 0) {
2059 maxcount = PY_SSIZE_T_MAX;
2060 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2061 /* nothing to do; return the original string */
2062 return return_self(self);
2063 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002065 if (maxcount == 0 ||
2066 (from_len == 0 && to_len == 0)) {
2067 /* nothing to do; return the original string */
2068 return return_self(self);
2069 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002070
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002071 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002072
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002073 if (from_len == 0) {
2074 /* insert the 'to' string everywhere. */
2075 /* >>> "Python".replace("", ".") */
2076 /* '.P.y.t.h.o.n.' */
2077 return replace_interleave(self, to_s, to_len, maxcount);
2078 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002079
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002080 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2081 /* point for an empty self string to generate a non-empty string */
2082 /* Special case so the remaining code always gets a non-empty string */
2083 if (PyBytes_GET_SIZE(self) == 0) {
2084 return return_self(self);
2085 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002086
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002087 if (to_len == 0) {
2088 /* delete all occurrences of 'from' string */
2089 if (from_len == 1) {
2090 return replace_delete_single_character(
2091 self, from_s[0], maxcount);
2092 } else {
2093 return replace_delete_substring(self, from_s,
2094 from_len, maxcount);
2095 }
2096 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002097
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002098 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002099
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002100 if (from_len == to_len) {
2101 if (from_len == 1) {
2102 return replace_single_character_in_place(
2103 self,
2104 from_s[0],
2105 to_s[0],
2106 maxcount);
2107 } else {
2108 return replace_substring_in_place(
2109 self, from_s, from_len, to_s, to_len,
2110 maxcount);
2111 }
2112 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002113
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002114 /* Otherwise use the more generic algorithms */
2115 if (from_len == 1) {
2116 return replace_single_character(self, from_s[0],
2117 to_s, to_len, maxcount);
2118 } else {
2119 /* len('from')>=2, len('to')>=1 */
2120 return replace_substring(self, from_s, from_len, to_s, to_len,
2121 maxcount);
2122 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002123}
2124
2125PyDoc_STRVAR(replace__doc__,
2126"B.replace(old, new[, count]) -> bytes\n\
2127\n\
2128Return a copy of B with all occurrences of subsection\n\
2129old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002130given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002131
2132static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002133bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002134{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002135 Py_ssize_t count = -1;
2136 PyObject *from, *to;
2137 const char *from_s, *to_s;
2138 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002139
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002140 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2141 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002143 if (PyBytes_Check(from)) {
2144 from_s = PyBytes_AS_STRING(from);
2145 from_len = PyBytes_GET_SIZE(from);
2146 }
2147 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2148 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002149
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002150 if (PyBytes_Check(to)) {
2151 to_s = PyBytes_AS_STRING(to);
2152 to_len = PyBytes_GET_SIZE(to);
2153 }
2154 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2155 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002156
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002157 return (PyObject *)replace((PyBytesObject *) self,
2158 from_s, from_len,
2159 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002160}
2161
2162/** End DALKE **/
2163
2164/* Matches the end (direction >= 0) or start (direction < 0) of self
2165 * against substr, using the start and end arguments. Returns
2166 * -1 on error, 0 if not found and 1 if found.
2167 */
2168Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002169_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002170 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002171{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002172 Py_ssize_t len = PyBytes_GET_SIZE(self);
2173 Py_ssize_t slen;
2174 const char* sub;
2175 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002176
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002177 if (PyBytes_Check(substr)) {
2178 sub = PyBytes_AS_STRING(substr);
2179 slen = PyBytes_GET_SIZE(substr);
2180 }
2181 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2182 return -1;
2183 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002184
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002185 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002187 if (direction < 0) {
2188 /* startswith */
2189 if (start+slen > len)
2190 return 0;
2191 } else {
2192 /* endswith */
2193 if (end-start < slen || start > len)
2194 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002195
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002196 if (end-slen > start)
2197 start = end - slen;
2198 }
2199 if (end-start >= slen)
2200 return ! memcmp(str+start, sub, slen);
2201 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002202}
2203
2204
2205PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002206"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002207\n\
2208Return True if B starts with the specified prefix, False otherwise.\n\
2209With optional start, test B beginning at that position.\n\
2210With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002211prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002212
2213static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002214bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002215{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002216 Py_ssize_t start = 0;
2217 Py_ssize_t end = PY_SSIZE_T_MAX;
2218 PyObject *subobj;
2219 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002220
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002221 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2222 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2223 return NULL;
2224 if (PyTuple_Check(subobj)) {
2225 Py_ssize_t i;
2226 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2227 result = _bytes_tailmatch(self,
2228 PyTuple_GET_ITEM(subobj, i),
2229 start, end, -1);
2230 if (result == -1)
2231 return NULL;
2232 else if (result) {
2233 Py_RETURN_TRUE;
2234 }
2235 }
2236 Py_RETURN_FALSE;
2237 }
2238 result = _bytes_tailmatch(self, subobj, start, end, -1);
2239 if (result == -1)
2240 return NULL;
2241 else
2242 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002243}
2244
2245
2246PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002247"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002248\n\
2249Return True if B ends with the specified suffix, False otherwise.\n\
2250With optional start, test B beginning at that position.\n\
2251With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002252suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002253
2254static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002255bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002256{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002257 Py_ssize_t start = 0;
2258 Py_ssize_t end = PY_SSIZE_T_MAX;
2259 PyObject *subobj;
2260 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002261
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002262 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2263 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2264 return NULL;
2265 if (PyTuple_Check(subobj)) {
2266 Py_ssize_t i;
2267 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2268 result = _bytes_tailmatch(self,
2269 PyTuple_GET_ITEM(subobj, i),
2270 start, end, +1);
2271 if (result == -1)
2272 return NULL;
2273 else if (result) {
2274 Py_RETURN_TRUE;
2275 }
2276 }
2277 Py_RETURN_FALSE;
2278 }
2279 result = _bytes_tailmatch(self, subobj, start, end, +1);
2280 if (result == -1)
2281 return NULL;
2282 else
2283 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002284}
2285
2286
2287PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002288"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002289\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002290Decode B using the codec registered for encoding. Default encoding\n\
2291is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002292handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2293a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002294as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002295able to handle UnicodeDecodeErrors.");
2296
2297static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002298bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002299{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002300 const char *encoding = NULL;
2301 const char *errors = NULL;
2302 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002303
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002304 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2305 return NULL;
2306 if (encoding == NULL)
2307 encoding = PyUnicode_GetDefaultEncoding();
2308 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002309}
2310
Guido van Rossum20188312006-05-05 15:15:40 +00002311
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002312PyDoc_STRVAR(splitlines__doc__,
2313"B.splitlines([keepends]) -> list of lines\n\
2314\n\
2315Return a list of the lines in B, breaking at line boundaries.\n\
2316Line breaks are not included in the resulting list unless keepends\n\
2317is given and true.");
2318
2319static PyObject*
2320bytes_splitlines(PyObject *self, PyObject *args)
2321{
2322 int keepends = 0;
2323
2324 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002325 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002326
2327 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002328 (PyObject*) self, PyBytes_AS_STRING(self),
2329 PyBytes_GET_SIZE(self), keepends
2330 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002331}
2332
2333
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002334PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002335"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002336\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002337Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002338Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002339Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002340
2341static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002342hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002343{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002344 if (c >= 128)
2345 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002346 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002347 return c - '0';
2348 else {
David Malcolm96960882010-11-05 17:23:41 +00002349 if (Py_ISUPPER(c))
2350 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002351 if (c >= 'a' && c <= 'f')
2352 return c - 'a' + 10;
2353 }
2354 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002355}
2356
2357static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002358bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002359{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002360 PyObject *newstring, *hexobj;
2361 char *buf;
2362 Py_UNICODE *hex;
2363 Py_ssize_t hexlen, byteslen, i, j;
2364 int top, bot;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002365
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002366 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2367 return NULL;
2368 assert(PyUnicode_Check(hexobj));
2369 hexlen = PyUnicode_GET_SIZE(hexobj);
2370 hex = PyUnicode_AS_UNICODE(hexobj);
2371 byteslen = hexlen/2; /* This overestimates if there are spaces */
2372 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2373 if (!newstring)
2374 return NULL;
2375 buf = PyBytes_AS_STRING(newstring);
2376 for (i = j = 0; i < hexlen; i += 2) {
2377 /* skip over spaces in the input */
2378 while (hex[i] == ' ')
2379 i++;
2380 if (i >= hexlen)
2381 break;
2382 top = hex_digit_to_int(hex[i]);
2383 bot = hex_digit_to_int(hex[i+1]);
2384 if (top == -1 || bot == -1) {
2385 PyErr_Format(PyExc_ValueError,
2386 "non-hexadecimal number found in "
2387 "fromhex() arg at position %zd", i);
2388 goto error;
2389 }
2390 buf[j++] = (top << 4) + bot;
2391 }
2392 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2393 goto error;
2394 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002395
2396 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002397 Py_XDECREF(newstring);
2398 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002399}
2400
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002401PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002402"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002403
2404static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002405bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002406{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002407 Py_ssize_t res;
2408 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2409 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002410}
2411
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002412
2413static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002414bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002415{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002416 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002417}
2418
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002419
2420static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002421bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002422 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2423 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2424 _Py_capitalize__doc__},
2425 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2426 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2427 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2428 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2429 endswith__doc__},
2430 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2431 expandtabs__doc__},
2432 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2433 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2434 fromhex_doc},
2435 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2436 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2437 _Py_isalnum__doc__},
2438 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2439 _Py_isalpha__doc__},
2440 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2441 _Py_isdigit__doc__},
2442 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2443 _Py_islower__doc__},
2444 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2445 _Py_isspace__doc__},
2446 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2447 _Py_istitle__doc__},
2448 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2449 _Py_isupper__doc__},
2450 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2451 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2452 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2453 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2454 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2455 _Py_maketrans__doc__},
2456 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2457 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2458 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2459 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2460 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2461 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2462 rpartition__doc__},
2463 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2464 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
2465 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2466 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS,
2467 splitlines__doc__},
2468 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2469 startswith__doc__},
2470 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2471 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2472 _Py_swapcase__doc__},
2473 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2474 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2475 translate__doc__},
2476 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2477 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2478 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2479 sizeof__doc__},
2480 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002481};
2482
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002483static PyObject *
2484str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2485
2486static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002487bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002488{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002489 PyObject *x = NULL;
2490 const char *encoding = NULL;
2491 const char *errors = NULL;
2492 PyObject *new = NULL;
2493 Py_ssize_t size;
2494 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002495
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002496 if (type != &PyBytes_Type)
2497 return str_subtype_new(type, args, kwds);
2498 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2499 &encoding, &errors))
2500 return NULL;
2501 if (x == NULL) {
2502 if (encoding != NULL || errors != NULL) {
2503 PyErr_SetString(PyExc_TypeError,
2504 "encoding or errors without sequence "
2505 "argument");
2506 return NULL;
2507 }
2508 return PyBytes_FromString("");
2509 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002510
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002511 if (PyUnicode_Check(x)) {
2512 /* Encode via the codec registry */
2513 if (encoding == NULL) {
2514 PyErr_SetString(PyExc_TypeError,
2515 "string argument without an encoding");
2516 return NULL;
2517 }
2518 new = PyUnicode_AsEncodedString(x, encoding, errors);
2519 if (new == NULL)
2520 return NULL;
2521 assert(PyBytes_Check(new));
2522 return new;
2523 }
2524 /* Is it an integer? */
2525 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2526 if (size == -1 && PyErr_Occurred()) {
2527 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2528 return NULL;
2529 PyErr_Clear();
2530 }
2531 else if (size < 0) {
2532 PyErr_SetString(PyExc_ValueError, "negative count");
2533 return NULL;
2534 }
2535 else {
2536 new = PyBytes_FromStringAndSize(NULL, size);
2537 if (new == NULL) {
2538 return NULL;
2539 }
2540 if (size > 0) {
2541 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2542 }
2543 return new;
2544 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002545
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002546 /* If it's not unicode, there can't be encoding or errors */
2547 if (encoding != NULL || errors != NULL) {
2548 PyErr_SetString(PyExc_TypeError,
2549 "encoding or errors without a string argument");
2550 return NULL;
2551 }
2552 return PyObject_Bytes(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002553}
2554
2555PyObject *
2556PyBytes_FromObject(PyObject *x)
2557{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002558 PyObject *new, *it;
2559 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002560
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002561 if (x == NULL) {
2562 PyErr_BadInternalCall();
2563 return NULL;
2564 }
2565 /* Use the modern buffer interface */
2566 if (PyObject_CheckBuffer(x)) {
2567 Py_buffer view;
2568 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2569 return NULL;
2570 new = PyBytes_FromStringAndSize(NULL, view.len);
2571 if (!new)
2572 goto fail;
2573 /* XXX(brett.cannon): Better way to get to internal buffer? */
2574 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2575 &view, view.len, 'C') < 0)
2576 goto fail;
2577 PyBuffer_Release(&view);
2578 return new;
2579 fail:
2580 Py_XDECREF(new);
2581 PyBuffer_Release(&view);
2582 return NULL;
2583 }
2584 if (PyUnicode_Check(x)) {
2585 PyErr_SetString(PyExc_TypeError,
2586 "cannot convert unicode object to bytes");
2587 return NULL;
2588 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002589
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002590 if (PyList_CheckExact(x)) {
2591 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2592 if (new == NULL)
2593 return NULL;
2594 for (i = 0; i < Py_SIZE(x); i++) {
2595 Py_ssize_t value = PyNumber_AsSsize_t(
2596 PyList_GET_ITEM(x, i), PyExc_ValueError);
2597 if (value == -1 && PyErr_Occurred()) {
2598 Py_DECREF(new);
2599 return NULL;
2600 }
2601 if (value < 0 || value >= 256) {
2602 PyErr_SetString(PyExc_ValueError,
2603 "bytes must be in range(0, 256)");
2604 Py_DECREF(new);
2605 return NULL;
2606 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002607 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002608 }
2609 return new;
2610 }
2611 if (PyTuple_CheckExact(x)) {
2612 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2613 if (new == NULL)
2614 return NULL;
2615 for (i = 0; i < Py_SIZE(x); i++) {
2616 Py_ssize_t value = PyNumber_AsSsize_t(
2617 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2618 if (value == -1 && PyErr_Occurred()) {
2619 Py_DECREF(new);
2620 return NULL;
2621 }
2622 if (value < 0 || value >= 256) {
2623 PyErr_SetString(PyExc_ValueError,
2624 "bytes must be in range(0, 256)");
2625 Py_DECREF(new);
2626 return NULL;
2627 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002628 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002629 }
2630 return new;
2631 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002632
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002633 /* For iterator version, create a string object and resize as needed */
2634 size = _PyObject_LengthHint(x, 64);
2635 if (size == -1 && PyErr_Occurred())
2636 return NULL;
2637 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2638 returning a shared empty bytes string. This required because we
2639 want to call _PyBytes_Resize() the returned object, which we can
2640 only do on bytes objects with refcount == 1. */
2641 size += 1;
2642 new = PyBytes_FromStringAndSize(NULL, size);
2643 if (new == NULL)
2644 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002645
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002646 /* Get the iterator */
2647 it = PyObject_GetIter(x);
2648 if (it == NULL)
2649 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002650
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002651 /* Run the iterator to exhaustion */
2652 for (i = 0; ; i++) {
2653 PyObject *item;
2654 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002655
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002656 /* Get the next item */
2657 item = PyIter_Next(it);
2658 if (item == NULL) {
2659 if (PyErr_Occurred())
2660 goto error;
2661 break;
2662 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002663
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002664 /* Interpret it as an int (__index__) */
2665 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2666 Py_DECREF(item);
2667 if (value == -1 && PyErr_Occurred())
2668 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002669
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002670 /* Range check */
2671 if (value < 0 || value >= 256) {
2672 PyErr_SetString(PyExc_ValueError,
2673 "bytes must be in range(0, 256)");
2674 goto error;
2675 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002676
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002677 /* Append the byte */
2678 if (i >= size) {
2679 size = 2 * size + 1;
2680 if (_PyBytes_Resize(&new, size) < 0)
2681 goto error;
2682 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002683 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002684 }
2685 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002686
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002687 /* Clean up and return success */
2688 Py_DECREF(it);
2689 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002690
2691 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002692 /* Error handling when new != NULL */
2693 Py_XDECREF(it);
2694 Py_DECREF(new);
2695 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002696}
2697
2698static PyObject *
2699str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2700{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002701 PyObject *tmp, *pnew;
2702 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002703
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002704 assert(PyType_IsSubtype(type, &PyBytes_Type));
2705 tmp = bytes_new(&PyBytes_Type, args, kwds);
2706 if (tmp == NULL)
2707 return NULL;
2708 assert(PyBytes_CheckExact(tmp));
2709 n = PyBytes_GET_SIZE(tmp);
2710 pnew = type->tp_alloc(type, n);
2711 if (pnew != NULL) {
2712 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2713 PyBytes_AS_STRING(tmp), n+1);
2714 ((PyBytesObject *)pnew)->ob_shash =
2715 ((PyBytesObject *)tmp)->ob_shash;
2716 }
2717 Py_DECREF(tmp);
2718 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002719}
2720
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002721PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002722"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002723bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002724bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2725bytes(memory_view) -> bytes\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002726\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002727Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002728 - an iterable yielding integers in range(256)\n\
2729 - a text string encoded using the specified encoding\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002730 - a bytes or a buffer object\n\
2731 - any object implementing the buffer API.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002732
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002733static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002734
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002735PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002736 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2737 "bytes",
2738 PyBytesObject_SIZE,
2739 sizeof(char),
2740 bytes_dealloc, /* tp_dealloc */
2741 0, /* tp_print */
2742 0, /* tp_getattr */
2743 0, /* tp_setattr */
2744 0, /* tp_reserved */
2745 (reprfunc)bytes_repr, /* tp_repr */
2746 0, /* tp_as_number */
2747 &bytes_as_sequence, /* tp_as_sequence */
2748 &bytes_as_mapping, /* tp_as_mapping */
2749 (hashfunc)bytes_hash, /* tp_hash */
2750 0, /* tp_call */
2751 bytes_str, /* tp_str */
2752 PyObject_GenericGetAttr, /* tp_getattro */
2753 0, /* tp_setattro */
2754 &bytes_as_buffer, /* tp_as_buffer */
2755 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2756 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2757 bytes_doc, /* tp_doc */
2758 0, /* tp_traverse */
2759 0, /* tp_clear */
2760 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2761 0, /* tp_weaklistoffset */
2762 bytes_iter, /* tp_iter */
2763 0, /* tp_iternext */
2764 bytes_methods, /* tp_methods */
2765 0, /* tp_members */
2766 0, /* tp_getset */
2767 &PyBaseObject_Type, /* tp_base */
2768 0, /* tp_dict */
2769 0, /* tp_descr_get */
2770 0, /* tp_descr_set */
2771 0, /* tp_dictoffset */
2772 0, /* tp_init */
2773 0, /* tp_alloc */
2774 bytes_new, /* tp_new */
2775 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002776};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002777
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002778void
2779PyBytes_Concat(register PyObject **pv, register PyObject *w)
2780{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002781 register PyObject *v;
2782 assert(pv != NULL);
2783 if (*pv == NULL)
2784 return;
2785 if (w == NULL) {
2786 Py_DECREF(*pv);
2787 *pv = NULL;
2788 return;
2789 }
2790 v = bytes_concat(*pv, w);
2791 Py_DECREF(*pv);
2792 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002793}
2794
2795void
2796PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
2797{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002798 PyBytes_Concat(pv, w);
2799 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002800}
2801
2802
2803/* The following function breaks the notion that strings are immutable:
2804 it changes the size of a string. We get away with this only if there
2805 is only one module referencing the object. You can also think of it
2806 as creating a new string object and destroying the old one, only
2807 more efficiently. In any case, don't use this if the string may
2808 already be known to some other part of the code...
2809 Note that if there's not enough memory to resize the string, the original
2810 string object at *pv is deallocated, *pv is set to NULL, an "out of
2811 memory" exception is set, and -1 is returned. Else (on success) 0 is
2812 returned, and the value in *pv may or may not be the same as on input.
2813 As always, an extra byte is allocated for a trailing \0 byte (newsize
2814 does *not* include that), and a trailing \0 byte is stored.
2815*/
2816
2817int
2818_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2819{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002820 register PyObject *v;
2821 register PyBytesObject *sv;
2822 v = *pv;
2823 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2824 *pv = 0;
2825 Py_DECREF(v);
2826 PyErr_BadInternalCall();
2827 return -1;
2828 }
2829 /* XXX UNREF/NEWREF interface should be more symmetrical */
2830 _Py_DEC_REFTOTAL;
2831 _Py_ForgetReference(v);
2832 *pv = (PyObject *)
2833 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2834 if (*pv == NULL) {
2835 PyObject_Del(v);
2836 PyErr_NoMemory();
2837 return -1;
2838 }
2839 _Py_NewReference(*pv);
2840 sv = (PyBytesObject *) *pv;
2841 Py_SIZE(sv) = newsize;
2842 sv->ob_sval[newsize] = '\0';
2843 sv->ob_shash = -1; /* invalidate cached hash value */
2844 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002845}
2846
2847/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
2848 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2849 * Python's regular ints.
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002850 * Return value: a new PyBytes*, or NULL if error.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002851 * . *pbuf is set to point into it,
2852 * *plen set to the # of chars following that.
2853 * Caller must decref it when done using pbuf.
2854 * The string starting at *pbuf is of the form
2855 * "-"? ("0x" | "0X")? digit+
2856 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2857 * set in flags. The case of hex digits will be correct,
2858 * There will be at least prec digits, zero-filled on the left if
2859 * necessary to get that many.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002860 * val object to be converted
2861 * flags bitmask of format flags; only F_ALT is looked at
2862 * prec minimum number of digits; 0-fill on left if needed
2863 * type a character in [duoxX]; u acts the same as d
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002864 *
2865 * CAUTION: o, x and X conversions on regular ints can never
2866 * produce a '-' sign, but can for Python's unbounded ints.
2867 */
2868PyObject*
2869_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002870 char **pbuf, int *plen)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002871{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002872 PyObject *result = NULL;
2873 char *buf;
2874 Py_ssize_t i;
2875 int sign; /* 1 if '-', else 0 */
2876 int len; /* number of characters */
2877 Py_ssize_t llen;
2878 int numdigits; /* len == numnondigits + numdigits */
2879 int numnondigits = 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002880
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002881 /* Avoid exceeding SSIZE_T_MAX */
2882 if (prec > INT_MAX-3) {
2883 PyErr_SetString(PyExc_OverflowError,
2884 "precision too large");
2885 return NULL;
2886 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002887
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002888 switch (type) {
2889 case 'd':
2890 case 'u':
2891 /* Special-case boolean: we want 0/1 */
2892 if (PyBool_Check(val))
2893 result = PyNumber_ToBase(val, 10);
2894 else
2895 result = Py_TYPE(val)->tp_str(val);
2896 break;
2897 case 'o':
2898 numnondigits = 2;
2899 result = PyNumber_ToBase(val, 8);
2900 break;
2901 case 'x':
2902 case 'X':
2903 numnondigits = 2;
2904 result = PyNumber_ToBase(val, 16);
2905 break;
2906 default:
2907 assert(!"'type' not in [duoxX]");
2908 }
2909 if (!result)
2910 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002911
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002912 buf = _PyUnicode_AsString(result);
2913 if (!buf) {
2914 Py_DECREF(result);
2915 return NULL;
2916 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002917
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002918 /* To modify the string in-place, there can only be one reference. */
2919 if (Py_REFCNT(result) != 1) {
2920 PyErr_BadInternalCall();
2921 return NULL;
2922 }
2923 llen = PyUnicode_GetSize(result);
2924 if (llen > INT_MAX) {
2925 PyErr_SetString(PyExc_ValueError,
2926 "string too large in _PyBytes_FormatLong");
2927 return NULL;
2928 }
2929 len = (int)llen;
2930 if (buf[len-1] == 'L') {
2931 --len;
2932 buf[len] = '\0';
2933 }
2934 sign = buf[0] == '-';
2935 numnondigits += sign;
2936 numdigits = len - numnondigits;
2937 assert(numdigits > 0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002938
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002939 /* Get rid of base marker unless F_ALT */
2940 if (((flags & F_ALT) == 0 &&
2941 (type == 'o' || type == 'x' || type == 'X'))) {
2942 assert(buf[sign] == '0');
2943 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
2944 buf[sign+1] == 'o');
2945 numnondigits -= 2;
2946 buf += 2;
2947 len -= 2;
2948 if (sign)
2949 buf[0] = '-';
2950 assert(len == numnondigits + numdigits);
2951 assert(numdigits > 0);
2952 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002953
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002954 /* Fill with leading zeroes to meet minimum width. */
2955 if (prec > numdigits) {
2956 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
2957 numnondigits + prec);
2958 char *b1;
2959 if (!r1) {
2960 Py_DECREF(result);
2961 return NULL;
2962 }
2963 b1 = PyBytes_AS_STRING(r1);
2964 for (i = 0; i < numnondigits; ++i)
2965 *b1++ = *buf++;
2966 for (i = 0; i < prec - numdigits; i++)
2967 *b1++ = '0';
2968 for (i = 0; i < numdigits; i++)
2969 *b1++ = *buf++;
2970 *b1 = '\0';
2971 Py_DECREF(result);
2972 result = r1;
2973 buf = PyBytes_AS_STRING(result);
2974 len = numnondigits + prec;
2975 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002976
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002977 /* Fix up case for hex conversions. */
2978 if (type == 'X') {
2979 /* Need to convert all lower case letters to upper case.
2980 and need to convert 0x to 0X (and -0x to -0X). */
2981 for (i = 0; i < len; i++)
2982 if (buf[i] >= 'a' && buf[i] <= 'x')
2983 buf[i] -= 'a'-'A';
2984 }
2985 *pbuf = buf;
2986 *plen = len;
2987 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002988}
2989
2990void
2991PyBytes_Fini(void)
2992{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002993 int i;
2994 for (i = 0; i < UCHAR_MAX + 1; i++) {
2995 Py_XDECREF(characters[i]);
2996 characters[i] = NULL;
2997 }
2998 Py_XDECREF(nullstring);
2999 nullstring = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003000}
3001
Benjamin Peterson4116f362008-05-27 00:36:20 +00003002/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003003
3004typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003005 PyObject_HEAD
3006 Py_ssize_t it_index;
3007 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003008} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003009
3010static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003011striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003012{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003013 _PyObject_GC_UNTRACK(it);
3014 Py_XDECREF(it->it_seq);
3015 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003016}
3017
3018static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003019striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003020{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003021 Py_VISIT(it->it_seq);
3022 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003023}
3024
3025static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003026striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003027{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003028 PyBytesObject *seq;
3029 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003030
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003031 assert(it != NULL);
3032 seq = it->it_seq;
3033 if (seq == NULL)
3034 return NULL;
3035 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003036
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003037 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3038 item = PyLong_FromLong(
3039 (unsigned char)seq->ob_sval[it->it_index]);
3040 if (item != NULL)
3041 ++it->it_index;
3042 return item;
3043 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003044
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003045 Py_DECREF(seq);
3046 it->it_seq = NULL;
3047 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003048}
3049
3050static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003051striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003052{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003053 Py_ssize_t len = 0;
3054 if (it->it_seq)
3055 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3056 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003057}
3058
3059PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003060 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003061
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003062static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003063 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3064 length_hint_doc},
3065 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003066};
3067
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003068PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003069 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3070 "bytes_iterator", /* tp_name */
3071 sizeof(striterobject), /* tp_basicsize */
3072 0, /* tp_itemsize */
3073 /* methods */
3074 (destructor)striter_dealloc, /* tp_dealloc */
3075 0, /* tp_print */
3076 0, /* tp_getattr */
3077 0, /* tp_setattr */
3078 0, /* tp_reserved */
3079 0, /* tp_repr */
3080 0, /* tp_as_number */
3081 0, /* tp_as_sequence */
3082 0, /* tp_as_mapping */
3083 0, /* tp_hash */
3084 0, /* tp_call */
3085 0, /* tp_str */
3086 PyObject_GenericGetAttr, /* tp_getattro */
3087 0, /* tp_setattro */
3088 0, /* tp_as_buffer */
3089 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3090 0, /* tp_doc */
3091 (traverseproc)striter_traverse, /* tp_traverse */
3092 0, /* tp_clear */
3093 0, /* tp_richcompare */
3094 0, /* tp_weaklistoffset */
3095 PyObject_SelfIter, /* tp_iter */
3096 (iternextfunc)striter_next, /* tp_iternext */
3097 striter_methods, /* tp_methods */
3098 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003099};
3100
3101static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003102bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003103{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003104 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003106 if (!PyBytes_Check(seq)) {
3107 PyErr_BadInternalCall();
3108 return NULL;
3109 }
3110 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3111 if (it == NULL)
3112 return NULL;
3113 it->it_index = 0;
3114 Py_INCREF(seq);
3115 it->it_seq = (PyBytesObject *)seq;
3116 _PyObject_GC_TRACK(it);
3117 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003118}