blob: 4aa0748f09c17fe468d422a39b93ac620da4028b [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Christian Heimes90aa7642007-12-19 02:45:37 +000013 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000014
Gregory P. Smith60d241f2007-10-16 06:31:30 +000015 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000016 {
Antoine Pitroud1188562010-06-09 16:38:55 +000017 PyErr_Format(PyExc_TypeError,
18 "Type %.100s doesn't support the buffer API",
19 Py_TYPE(obj)->tp_name);
20 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000021 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000023 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000025 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000026}
27
Christian Heimes2c9c7a52008-05-26 13:42:13 +000028#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000029Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000031
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032static PyBytesObject *characters[UCHAR_MAX + 1];
33static PyBytesObject *nullstring;
34
Mark Dickinsonfd24b322008-12-06 15:33:31 +000035/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 for a string of length n should request PyBytesObject_SIZE + n bytes.
37
38 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 3 bytes per string allocation on a typical system.
40*/
41#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043/*
44 For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
45 parameter `size' denotes number of characters to allocate, not counting any
46 null terminating character.
47
48 For PyBytes_FromString(), the parameter `str' points to a null-terminated
49 string containing exactly `size' bytes.
50
51 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
52 either NULL or else points to a string containing at least `size' bytes.
53 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
54 not have to be null-terminated. (Therefore it is safe to construct a
55 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
56 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
57 bytes (setting the last byte to the null terminating character) and you can
58 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000059 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000060 alter the data yourself, since the strings may be shared.
61
62 The PyObject member `op->ob_size', which denotes the number of "extra
63 items" in a variable-size object, will contain the number of bytes
64 allocated for string data, not counting the null terminating character. It
65 is therefore equal to the equal to the `size' parameter (for
66 PyBytes_FromStringAndSize()) or the length of the string in the `str'
67 parameter (for PyBytes_FromString()).
68*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000069PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000071{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 register PyBytesObject *op;
73 if (size < 0) {
74 PyErr_SetString(PyExc_SystemError,
75 "Negative size passed to PyBytes_FromStringAndSize");
76 return NULL;
77 }
78 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000079#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000081#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 Py_INCREF(op);
83 return (PyObject *)op;
84 }
85 if (size == 1 && str != NULL &&
86 (op = characters[*str & UCHAR_MAX]) != NULL)
87 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000088#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000090#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 Py_INCREF(op);
92 return (PyObject *)op;
93 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
96 PyErr_SetString(PyExc_OverflowError,
97 "byte string is too large");
98 return NULL;
99 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +0000100
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101 /* Inline PyObject_NewVar */
102 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
103 if (op == NULL)
104 return PyErr_NoMemory();
105 PyObject_INIT_VAR(op, &PyBytes_Type, size);
106 op->ob_shash = -1;
107 if (str != NULL)
108 Py_MEMCPY(op->ob_sval, str, size);
109 op->ob_sval[size] = '\0';
110 /* share short strings */
111 if (size == 0) {
112 nullstring = op;
113 Py_INCREF(op);
114 } else if (size == 1 && str != NULL) {
115 characters[*str & UCHAR_MAX] = op;
116 Py_INCREF(op);
117 }
118 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000119}
120
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000121PyObject *
122PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000123{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000124 register size_t size;
125 register PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000127 assert(str != NULL);
128 size = strlen(str);
129 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
130 PyErr_SetString(PyExc_OverflowError,
131 "byte string is too long");
132 return NULL;
133 }
134 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000135#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000136 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000137#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
141 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000144#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 /* Inline PyObject_NewVar */
150 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
151 if (op == NULL)
152 return PyErr_NoMemory();
153 PyObject_INIT_VAR(op, &PyBytes_Type, size);
154 op->ob_shash = -1;
155 Py_MEMCPY(op->ob_sval, str, size+1);
156 /* share short strings */
157 if (size == 0) {
158 nullstring = op;
159 Py_INCREF(op);
160 } else if (size == 1) {
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000165}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000166
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167PyObject *
168PyBytes_FromFormatV(const char *format, va_list vargs)
169{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000175
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000176 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 /* step 1: figure out how large a buffer we need */
178 for (f = format; *f; f++) {
179 if (*f == '%') {
180 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000181 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000183
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
185 * they don't affect the amount of space we reserve.
186 */
187 if ((*f == 'l' || *f == 'z') &&
188 (f[1] == 'd' || f[1] == 'u'))
189 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 switch (*f) {
192 case 'c':
193 (void)va_arg(count, int);
194 /* fall through... */
195 case '%':
196 n++;
197 break;
198 case 'd': case 'u': case 'i': case 'x':
199 (void) va_arg(count, int);
200 /* 20 bytes is enough to hold a 64-bit
201 integer. Decimal takes the most space.
202 This isn't enough for octal. */
203 n += 20;
204 break;
205 case 's':
206 s = va_arg(count, char*);
207 n += strlen(s);
208 break;
209 case 'p':
210 (void) va_arg(count, int);
211 /* maximum 64-bit pointer representation:
212 * 0xffffffffffffffff
213 * so 19 characters is enough.
214 * XXX I count 18 -- what's the extra for?
215 */
216 n += 19;
217 break;
218 default:
219 /* if we stumble upon an unknown
220 formatting code, copy the rest of
221 the format string to the output
222 string. (we cannot just skip the
223 code, since there's no way to know
224 what's in the argument list) */
225 n += strlen(p);
226 goto expand;
227 }
228 } else
229 n++;
230 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000231 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000232 /* step 2: fill the buffer */
233 /* Since we've analyzed how much space we need for the worst case,
234 use sprintf directly instead of the slower PyOS_snprintf. */
235 string = PyBytes_FromStringAndSize(NULL, n);
236 if (!string)
237 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000238
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000239 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000240
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000241 for (f = format; *f; f++) {
242 if (*f == '%') {
243 const char* p = f++;
244 Py_ssize_t i;
245 int longflag = 0;
246 int size_tflag = 0;
247 /* parse the width.precision part (we're only
248 interested in the precision value, if any) */
249 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000250 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000251 n = (n*10) + *f++ - '0';
252 if (*f == '.') {
253 f++;
254 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000255 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 n = (n*10) + *f++ - '0';
257 }
David Malcolm96960882010-11-05 17:23:41 +0000258 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000259 f++;
260 /* handle the long flag, but only for %ld and %lu.
261 others can be added when necessary. */
262 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
263 longflag = 1;
264 ++f;
265 }
266 /* handle the size_t flag. */
267 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
268 size_tflag = 1;
269 ++f;
270 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000271
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 switch (*f) {
273 case 'c':
274 *s++ = va_arg(vargs, int);
275 break;
276 case 'd':
277 if (longflag)
278 sprintf(s, "%ld", va_arg(vargs, long));
279 else if (size_tflag)
280 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
281 va_arg(vargs, Py_ssize_t));
282 else
283 sprintf(s, "%d", va_arg(vargs, int));
284 s += strlen(s);
285 break;
286 case 'u':
287 if (longflag)
288 sprintf(s, "%lu",
289 va_arg(vargs, unsigned long));
290 else if (size_tflag)
291 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
292 va_arg(vargs, size_t));
293 else
294 sprintf(s, "%u",
295 va_arg(vargs, unsigned int));
296 s += strlen(s);
297 break;
298 case 'i':
299 sprintf(s, "%i", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 'x':
303 sprintf(s, "%x", va_arg(vargs, int));
304 s += strlen(s);
305 break;
306 case 's':
307 p = va_arg(vargs, char*);
308 i = strlen(p);
309 if (n > 0 && i > n)
310 i = n;
311 Py_MEMCPY(s, p, i);
312 s += i;
313 break;
314 case 'p':
315 sprintf(s, "%p", va_arg(vargs, void*));
316 /* %p is ill-defined: ensure leading 0x. */
317 if (s[1] == 'X')
318 s[1] = 'x';
319 else if (s[1] != 'x') {
320 memmove(s+2, s, strlen(s)+1);
321 s[0] = '0';
322 s[1] = 'x';
323 }
324 s += strlen(s);
325 break;
326 case '%':
327 *s++ = '%';
328 break;
329 default:
330 strcpy(s, p);
331 s += strlen(s);
332 goto end;
333 }
334 } else
335 *s++ = *f;
336 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000337
338 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000339 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
340 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000341}
342
343PyObject *
344PyBytes_FromFormat(const char *format, ...)
345{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 PyObject* ret;
347 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000348
349#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000351#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000352 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000353#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000354 ret = PyBytes_FromFormatV(format, vargs);
355 va_end(vargs);
356 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000357}
358
359static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000360bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000361{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000362 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000363}
364
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000365/* Unescape a backslash-escaped string. If unicode is non-zero,
366 the string is a u-literal. If recode_encoding is non-zero,
367 the string is UTF-8 encoded and should be re-encoded in the
368 specified encoding. */
369
370PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 Py_ssize_t len,
372 const char *errors,
373 Py_ssize_t unicode,
374 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000375{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000376 int c;
377 char *p, *buf;
378 const char *end;
379 PyObject *v;
380 Py_ssize_t newlen = recode_encoding ? 4*len:len;
381 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
382 if (v == NULL)
383 return NULL;
384 p = buf = PyBytes_AsString(v);
385 end = s + len;
386 while (s < end) {
387 if (*s != '\\') {
388 non_esc:
389 if (recode_encoding && (*s & 0x80)) {
390 PyObject *u, *w;
391 char *r;
392 const char* t;
393 Py_ssize_t rn;
394 t = s;
395 /* Decode non-ASCII bytes as UTF-8. */
396 while (t < end && (*t & 0x80)) t++;
397 u = PyUnicode_DecodeUTF8(s, t - s, errors);
398 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000399
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000400 /* Recode them in target encoding. */
401 w = PyUnicode_AsEncodedString(
402 u, recode_encoding, errors);
403 Py_DECREF(u);
404 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000405
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000406 /* Append bytes to output buffer. */
407 assert(PyBytes_Check(w));
408 r = PyBytes_AS_STRING(w);
409 rn = PyBytes_GET_SIZE(w);
410 Py_MEMCPY(p, r, rn);
411 p += rn;
412 Py_DECREF(w);
413 s = t;
414 } else {
415 *p++ = *s++;
416 }
417 continue;
418 }
419 s++;
420 if (s==end) {
421 PyErr_SetString(PyExc_ValueError,
422 "Trailing \\ in string");
423 goto failed;
424 }
425 switch (*s++) {
426 /* XXX This assumes ASCII! */
427 case '\n': break;
428 case '\\': *p++ = '\\'; break;
429 case '\'': *p++ = '\''; break;
430 case '\"': *p++ = '\"'; break;
431 case 'b': *p++ = '\b'; break;
432 case 'f': *p++ = '\014'; break; /* FF */
433 case 't': *p++ = '\t'; break;
434 case 'n': *p++ = '\n'; break;
435 case 'r': *p++ = '\r'; break;
436 case 'v': *p++ = '\013'; break; /* VT */
437 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
438 case '0': case '1': case '2': case '3':
439 case '4': case '5': case '6': case '7':
440 c = s[-1] - '0';
441 if (s < end && '0' <= *s && *s <= '7') {
442 c = (c<<3) + *s++ - '0';
443 if (s < end && '0' <= *s && *s <= '7')
444 c = (c<<3) + *s++ - '0';
445 }
446 *p++ = c;
447 break;
448 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000449 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 unsigned int x = 0;
451 c = Py_CHARMASK(*s);
452 s++;
David Malcolm96960882010-11-05 17:23:41 +0000453 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000454 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000455 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000456 x = 10 + c - 'a';
457 else
458 x = 10 + c - 'A';
459 x = x << 4;
460 c = Py_CHARMASK(*s);
461 s++;
David Malcolm96960882010-11-05 17:23:41 +0000462 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000463 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000464 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000465 x += 10 + c - 'a';
466 else
467 x += 10 + c - 'A';
468 *p++ = x;
469 break;
470 }
471 if (!errors || strcmp(errors, "strict") == 0) {
472 PyErr_SetString(PyExc_ValueError,
473 "invalid \\x escape");
474 goto failed;
475 }
476 if (strcmp(errors, "replace") == 0) {
477 *p++ = '?';
478 } else if (strcmp(errors, "ignore") == 0)
479 /* do nothing */;
480 else {
481 PyErr_Format(PyExc_ValueError,
482 "decoding error; unknown "
483 "error handling code: %.400s",
484 errors);
485 goto failed;
486 }
487 default:
488 *p++ = '\\';
489 s--;
490 goto non_esc; /* an arbitry number of unescaped
491 UTF-8 bytes may follow. */
492 }
493 }
494 if (p-buf < newlen)
495 _PyBytes_Resize(&v, p - buf);
496 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000497 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000498 Py_DECREF(v);
499 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000500}
501
502/* -------------------------------------------------------------------- */
503/* object api */
504
505Py_ssize_t
506PyBytes_Size(register PyObject *op)
507{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000508 if (!PyBytes_Check(op)) {
509 PyErr_Format(PyExc_TypeError,
510 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
511 return -1;
512 }
513 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000514}
515
516char *
517PyBytes_AsString(register PyObject *op)
518{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000519 if (!PyBytes_Check(op)) {
520 PyErr_Format(PyExc_TypeError,
521 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
522 return NULL;
523 }
524 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000525}
526
527int
528PyBytes_AsStringAndSize(register PyObject *obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000529 register char **s,
530 register Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000531{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000532 if (s == NULL) {
533 PyErr_BadInternalCall();
534 return -1;
535 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000536
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000537 if (!PyBytes_Check(obj)) {
538 PyErr_Format(PyExc_TypeError,
539 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
540 return -1;
541 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000542
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000543 *s = PyBytes_AS_STRING(obj);
544 if (len != NULL)
545 *len = PyBytes_GET_SIZE(obj);
546 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
547 PyErr_SetString(PyExc_TypeError,
548 "expected bytes with no null");
549 return -1;
550 }
551 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000552}
Neal Norwitz6968b052007-02-27 19:02:19 +0000553
554/* -------------------------------------------------------------------- */
555/* Methods */
556
Eric Smith0923d1d2009-04-16 20:16:10 +0000557#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000558
559#include "stringlib/fastsearch.h"
560#include "stringlib/count.h"
561#include "stringlib/find.h"
562#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000563#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000564#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000565
Eric Smith0f78bff2009-11-30 01:01:42 +0000566#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000567
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000568PyObject *
569PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000570{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000571 static const char *hexdigits = "0123456789abcdef";
572 register PyBytesObject* op = (PyBytesObject*) obj;
573 Py_ssize_t length = Py_SIZE(op);
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000574 size_t newsize;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000575 PyObject *v;
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000576 if (length > (PY_SSIZE_T_MAX - 3) / 4) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000577 PyErr_SetString(PyExc_OverflowError,
578 "bytes object is too large to make repr");
579 return NULL;
580 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000581 newsize = 3 + 4 * length;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000582 v = PyUnicode_FromUnicode(NULL, newsize);
583 if (v == NULL) {
584 return NULL;
585 }
586 else {
587 register Py_ssize_t i;
588 register Py_UNICODE c;
589 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
590 int quote;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000591
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000592 /* Figure out which quote to use; single is preferred */
593 quote = '\'';
594 if (smartquotes) {
595 char *test, *start;
596 start = PyBytes_AS_STRING(op);
597 for (test = start; test < start+length; ++test) {
598 if (*test == '"') {
599 quote = '\''; /* back to single */
600 goto decided;
601 }
602 else if (*test == '\'')
603 quote = '"';
604 }
605 decided:
606 ;
607 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000608
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000609 *p++ = 'b', *p++ = quote;
610 for (i = 0; i < length; i++) {
611 /* There's at least enough room for a hex escape
612 and a closing quote. */
613 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
614 c = op->ob_sval[i];
615 if (c == quote || c == '\\')
616 *p++ = '\\', *p++ = c;
617 else if (c == '\t')
618 *p++ = '\\', *p++ = 't';
619 else if (c == '\n')
620 *p++ = '\\', *p++ = 'n';
621 else if (c == '\r')
622 *p++ = '\\', *p++ = 'r';
623 else if (c < ' ' || c >= 0x7f) {
624 *p++ = '\\';
625 *p++ = 'x';
626 *p++ = hexdigits[(c & 0xf0) >> 4];
627 *p++ = hexdigits[c & 0xf];
628 }
629 else
630 *p++ = c;
631 }
632 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
633 *p++ = quote;
634 *p = '\0';
635 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
636 Py_DECREF(v);
637 return NULL;
638 }
639 return v;
640 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000641}
642
Neal Norwitz6968b052007-02-27 19:02:19 +0000643static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000644bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000645{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000646 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000647}
648
Neal Norwitz6968b052007-02-27 19:02:19 +0000649static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000650bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000651{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000652 if (Py_BytesWarningFlag) {
653 if (PyErr_WarnEx(PyExc_BytesWarning,
654 "str() on a bytes instance", 1))
655 return NULL;
656 }
657 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000658}
659
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000660static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000661bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000662{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000663 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000664}
Neal Norwitz6968b052007-02-27 19:02:19 +0000665
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000666/* This is also used by PyBytes_Concat() */
667static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000668bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000669{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000670 Py_ssize_t size;
671 Py_buffer va, vb;
672 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000673
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000674 va.len = -1;
675 vb.len = -1;
676 if (_getbuffer(a, &va) < 0 ||
677 _getbuffer(b, &vb) < 0) {
678 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
679 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
680 goto done;
681 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000682
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000683 /* Optimize end cases */
684 if (va.len == 0 && PyBytes_CheckExact(b)) {
685 result = b;
686 Py_INCREF(result);
687 goto done;
688 }
689 if (vb.len == 0 && PyBytes_CheckExact(a)) {
690 result = a;
691 Py_INCREF(result);
692 goto done;
693 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000694
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000695 size = va.len + vb.len;
696 if (size < 0) {
697 PyErr_NoMemory();
698 goto done;
699 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000700
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000701 result = PyBytes_FromStringAndSize(NULL, size);
702 if (result != NULL) {
703 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
704 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
705 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000706
707 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000708 if (va.len != -1)
709 PyBuffer_Release(&va);
710 if (vb.len != -1)
711 PyBuffer_Release(&vb);
712 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000713}
Neal Norwitz6968b052007-02-27 19:02:19 +0000714
715static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000716bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000717{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000718 register Py_ssize_t i;
719 register Py_ssize_t j;
720 register Py_ssize_t size;
721 register PyBytesObject *op;
722 size_t nbytes;
723 if (n < 0)
724 n = 0;
725 /* watch out for overflows: the size can overflow int,
726 * and the # of bytes needed can overflow size_t
727 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000728 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000729 PyErr_SetString(PyExc_OverflowError,
730 "repeated bytes are too long");
731 return NULL;
732 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000733 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000734 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
735 Py_INCREF(a);
736 return (PyObject *)a;
737 }
738 nbytes = (size_t)size;
739 if (nbytes + PyBytesObject_SIZE <= nbytes) {
740 PyErr_SetString(PyExc_OverflowError,
741 "repeated bytes are too long");
742 return NULL;
743 }
744 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
745 if (op == NULL)
746 return PyErr_NoMemory();
747 PyObject_INIT_VAR(op, &PyBytes_Type, size);
748 op->ob_shash = -1;
749 op->ob_sval[size] = '\0';
750 if (Py_SIZE(a) == 1 && n > 0) {
751 memset(op->ob_sval, a->ob_sval[0] , n);
752 return (PyObject *) op;
753 }
754 i = 0;
755 if (i < size) {
756 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
757 i = Py_SIZE(a);
758 }
759 while (i < size) {
760 j = (i <= size-i) ? i : size-i;
761 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
762 i += j;
763 }
764 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000765}
766
Guido van Rossum98297ee2007-11-06 21:34:58 +0000767static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000768bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000769{
770 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
771 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000772 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000773 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000774 PyErr_Clear();
775 if (_getbuffer(arg, &varg) < 0)
776 return -1;
777 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
778 varg.buf, varg.len, 0);
779 PyBuffer_Release(&varg);
780 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000781 }
782 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000783 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
784 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000785 }
786
Antoine Pitrou0010d372010-08-15 17:12:55 +0000787 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000788}
789
Neal Norwitz6968b052007-02-27 19:02:19 +0000790static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000791bytes_item(PyBytesObject *a, register Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000792{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000793 if (i < 0 || i >= Py_SIZE(a)) {
794 PyErr_SetString(PyExc_IndexError, "index out of range");
795 return NULL;
796 }
797 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000798}
799
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000800static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000801bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000802{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000803 int c;
804 Py_ssize_t len_a, len_b;
805 Py_ssize_t min_len;
806 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000807
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000808 /* Make sure both arguments are strings. */
809 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
810 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
811 (PyObject_IsInstance((PyObject*)a,
812 (PyObject*)&PyUnicode_Type) ||
813 PyObject_IsInstance((PyObject*)b,
814 (PyObject*)&PyUnicode_Type))) {
815 if (PyErr_WarnEx(PyExc_BytesWarning,
816 "Comparison between bytes and string", 1))
817 return NULL;
818 }
819 result = Py_NotImplemented;
820 goto out;
821 }
822 if (a == b) {
823 switch (op) {
824 case Py_EQ:case Py_LE:case Py_GE:
825 result = Py_True;
826 goto out;
827 case Py_NE:case Py_LT:case Py_GT:
828 result = Py_False;
829 goto out;
830 }
831 }
832 if (op == Py_EQ) {
833 /* Supporting Py_NE here as well does not save
834 much time, since Py_NE is rarely used. */
835 if (Py_SIZE(a) == Py_SIZE(b)
836 && (a->ob_sval[0] == b->ob_sval[0]
837 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
838 result = Py_True;
839 } else {
840 result = Py_False;
841 }
842 goto out;
843 }
844 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
845 min_len = (len_a < len_b) ? len_a : len_b;
846 if (min_len > 0) {
847 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
848 if (c==0)
849 c = memcmp(a->ob_sval, b->ob_sval, min_len);
850 } else
851 c = 0;
852 if (c == 0)
853 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
854 switch (op) {
855 case Py_LT: c = c < 0; break;
856 case Py_LE: c = c <= 0; break;
857 case Py_EQ: assert(0); break; /* unreachable */
858 case Py_NE: c = c != 0; break;
859 case Py_GT: c = c > 0; break;
860 case Py_GE: c = c >= 0; break;
861 default:
862 result = Py_NotImplemented;
863 goto out;
864 }
865 result = c ? Py_True : Py_False;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000866 out:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000867 Py_INCREF(result);
868 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000869}
870
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000871static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000872bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000873{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000874 register Py_ssize_t len;
875 register unsigned char *p;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000876 register Py_hash_t x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000877
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000878 if (a->ob_shash != -1)
879 return a->ob_shash;
880 len = Py_SIZE(a);
881 p = (unsigned char *) a->ob_sval;
882 x = *p << 7;
883 while (--len >= 0)
884 x = (1000003*x) ^ *p++;
885 x ^= Py_SIZE(a);
886 if (x == -1)
887 x = -2;
888 a->ob_shash = x;
889 return x;
Neal Norwitz6968b052007-02-27 19:02:19 +0000890}
891
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000892static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000893bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000894{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000895 if (PyIndex_Check(item)) {
896 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
897 if (i == -1 && PyErr_Occurred())
898 return NULL;
899 if (i < 0)
900 i += PyBytes_GET_SIZE(self);
901 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
902 PyErr_SetString(PyExc_IndexError,
903 "index out of range");
904 return NULL;
905 }
906 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
907 }
908 else if (PySlice_Check(item)) {
909 Py_ssize_t start, stop, step, slicelength, cur, i;
910 char* source_buf;
911 char* result_buf;
912 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000913
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000914 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000915 PyBytes_GET_SIZE(self),
916 &start, &stop, &step, &slicelength) < 0) {
917 return NULL;
918 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000919
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000920 if (slicelength <= 0) {
921 return PyBytes_FromStringAndSize("", 0);
922 }
923 else if (start == 0 && step == 1 &&
924 slicelength == PyBytes_GET_SIZE(self) &&
925 PyBytes_CheckExact(self)) {
926 Py_INCREF(self);
927 return (PyObject *)self;
928 }
929 else if (step == 1) {
930 return PyBytes_FromStringAndSize(
931 PyBytes_AS_STRING(self) + start,
932 slicelength);
933 }
934 else {
935 source_buf = PyBytes_AS_STRING(self);
936 result = PyBytes_FromStringAndSize(NULL, slicelength);
937 if (result == NULL)
938 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000939
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000940 result_buf = PyBytes_AS_STRING(result);
941 for (cur = start, i = 0; i < slicelength;
942 cur += step, i++) {
943 result_buf[i] = source_buf[cur];
944 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000945
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000946 return result;
947 }
948 }
949 else {
950 PyErr_Format(PyExc_TypeError,
951 "byte indices must be integers, not %.200s",
952 Py_TYPE(item)->tp_name);
953 return NULL;
954 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000955}
956
957static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000958bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000959{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000960 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
961 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000962}
963
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000964static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000965 (lenfunc)bytes_length, /*sq_length*/
966 (binaryfunc)bytes_concat, /*sq_concat*/
967 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
968 (ssizeargfunc)bytes_item, /*sq_item*/
969 0, /*sq_slice*/
970 0, /*sq_ass_item*/
971 0, /*sq_ass_slice*/
972 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000973};
974
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000975static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000976 (lenfunc)bytes_length,
977 (binaryfunc)bytes_subscript,
978 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000979};
980
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000981static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000982 (getbufferproc)bytes_buffer_getbuffer,
983 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000984};
985
986
987#define LEFTSTRIP 0
988#define RIGHTSTRIP 1
989#define BOTHSTRIP 2
990
991/* Arrays indexed by above */
992static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
993
994#define STRIPNAME(i) (stripformat[i]+3)
995
Neal Norwitz6968b052007-02-27 19:02:19 +0000996PyDoc_STRVAR(split__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000997"B.split([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +0000998\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +0000999Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001000If sep is not specified or is None, B is split on ASCII whitespace\n\
1001characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001002If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001003
1004static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001005bytes_split(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001006{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001007 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1008 Py_ssize_t maxsplit = -1;
1009 const char *s = PyBytes_AS_STRING(self), *sub;
1010 Py_buffer vsub;
1011 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001012
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001013 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1014 return NULL;
1015 if (maxsplit < 0)
1016 maxsplit = PY_SSIZE_T_MAX;
1017 if (subobj == Py_None)
1018 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1019 if (_getbuffer(subobj, &vsub) < 0)
1020 return NULL;
1021 sub = vsub.buf;
1022 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001023
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001024 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1025 PyBuffer_Release(&vsub);
1026 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001027}
1028
Neal Norwitz6968b052007-02-27 19:02:19 +00001029PyDoc_STRVAR(partition__doc__,
1030"B.partition(sep) -> (head, sep, tail)\n\
1031\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001032Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001033the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001034found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001035
1036static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001037bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001038{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001039 const char *sep;
1040 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001041
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001042 if (PyBytes_Check(sep_obj)) {
1043 sep = PyBytes_AS_STRING(sep_obj);
1044 sep_len = PyBytes_GET_SIZE(sep_obj);
1045 }
1046 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1047 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001048
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001049 return stringlib_partition(
1050 (PyObject*) self,
1051 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1052 sep_obj, sep, sep_len
1053 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001054}
1055
1056PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001057"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001058\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001059Search for the separator sep in B, starting at the end of B,\n\
1060and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001061part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001062bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001063
1064static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001065bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001066{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001067 const char *sep;
1068 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001069
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001070 if (PyBytes_Check(sep_obj)) {
1071 sep = PyBytes_AS_STRING(sep_obj);
1072 sep_len = PyBytes_GET_SIZE(sep_obj);
1073 }
1074 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1075 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001077 return stringlib_rpartition(
1078 (PyObject*) self,
1079 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1080 sep_obj, sep, sep_len
1081 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001082}
1083
Neal Norwitz6968b052007-02-27 19:02:19 +00001084PyDoc_STRVAR(rsplit__doc__,
Benjamin Peterson4116f362008-05-27 00:36:20 +00001085"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001086\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001087Return a list of the sections in B, using sep as the delimiter,\n\
1088starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001089If sep is not given, B is split on ASCII whitespace characters\n\
1090(space, tab, return, newline, formfeed, vertical tab).\n\
1091If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001092
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001093
Neal Norwitz6968b052007-02-27 19:02:19 +00001094static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001095bytes_rsplit(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001096{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1098 Py_ssize_t maxsplit = -1;
1099 const char *s = PyBytes_AS_STRING(self), *sub;
1100 Py_buffer vsub;
1101 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001102
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001103 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1104 return NULL;
1105 if (maxsplit < 0)
1106 maxsplit = PY_SSIZE_T_MAX;
1107 if (subobj == Py_None)
1108 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1109 if (_getbuffer(subobj, &vsub) < 0)
1110 return NULL;
1111 sub = vsub.buf;
1112 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001113
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001114 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1115 PyBuffer_Release(&vsub);
1116 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001117}
1118
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001119
1120PyDoc_STRVAR(join__doc__,
1121"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001122\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001123Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001124Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1125
Neal Norwitz6968b052007-02-27 19:02:19 +00001126static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001127bytes_join(PyObject *self, PyObject *orig)
Neal Norwitz6968b052007-02-27 19:02:19 +00001128{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001129 char *sep = PyBytes_AS_STRING(self);
1130 const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1131 PyObject *res = NULL;
1132 char *p;
1133 Py_ssize_t seqlen = 0;
1134 size_t sz = 0;
1135 Py_ssize_t i;
1136 PyObject *seq, *item;
Neal Norwitz6968b052007-02-27 19:02:19 +00001137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001138 seq = PySequence_Fast(orig, "");
1139 if (seq == NULL) {
1140 return NULL;
1141 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001143 seqlen = PySequence_Size(seq);
1144 if (seqlen == 0) {
1145 Py_DECREF(seq);
1146 return PyBytes_FromString("");
1147 }
1148 if (seqlen == 1) {
1149 item = PySequence_Fast_GET_ITEM(seq, 0);
1150 if (PyBytes_CheckExact(item)) {
1151 Py_INCREF(item);
1152 Py_DECREF(seq);
1153 return item;
1154 }
1155 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001156
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001157 /* There are at least two things to join, or else we have a subclass
1158 * of the builtin types in the sequence.
1159 * Do a pre-pass to figure out the total amount of space we'll
1160 * need (sz), and see whether all argument are bytes.
1161 */
1162 /* XXX Shouldn't we use _getbuffer() on these items instead? */
1163 for (i = 0; i < seqlen; i++) {
1164 const size_t old_sz = sz;
1165 item = PySequence_Fast_GET_ITEM(seq, i);
1166 if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1167 PyErr_Format(PyExc_TypeError,
1168 "sequence item %zd: expected bytes,"
1169 " %.80s found",
1170 i, Py_TYPE(item)->tp_name);
1171 Py_DECREF(seq);
1172 return NULL;
1173 }
1174 sz += Py_SIZE(item);
1175 if (i != 0)
1176 sz += seplen;
1177 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1178 PyErr_SetString(PyExc_OverflowError,
1179 "join() result is too long for bytes");
1180 Py_DECREF(seq);
1181 return NULL;
1182 }
1183 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001184
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001185 /* Allocate result space. */
1186 res = PyBytes_FromStringAndSize((char*)NULL, sz);
1187 if (res == NULL) {
1188 Py_DECREF(seq);
1189 return NULL;
1190 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001191
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001192 /* Catenate everything. */
1193 /* I'm not worried about a PyByteArray item growing because there's
1194 nowhere in this function where we release the GIL. */
1195 p = PyBytes_AS_STRING(res);
1196 for (i = 0; i < seqlen; ++i) {
1197 size_t n;
1198 char *q;
1199 if (i) {
1200 Py_MEMCPY(p, sep, seplen);
1201 p += seplen;
1202 }
1203 item = PySequence_Fast_GET_ITEM(seq, i);
1204 n = Py_SIZE(item);
1205 if (PyBytes_Check(item))
1206 q = PyBytes_AS_STRING(item);
1207 else
1208 q = PyByteArray_AS_STRING(item);
1209 Py_MEMCPY(p, q, n);
1210 p += n;
1211 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001212
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001213 Py_DECREF(seq);
1214 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001215}
1216
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001217PyObject *
1218_PyBytes_Join(PyObject *sep, PyObject *x)
1219{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001220 assert(sep != NULL && PyBytes_Check(sep));
1221 assert(x != NULL);
1222 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001223}
1224
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001225/* helper macro to fixup start/end slice values */
1226#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001227 if (end > len) \
1228 end = len; \
1229 else if (end < 0) { \
1230 end += len; \
1231 if (end < 0) \
1232 end = 0; \
1233 } \
1234 if (start < 0) { \
1235 start += len; \
1236 if (start < 0) \
1237 start = 0; \
1238 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001239
1240Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001241bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001242{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001243 PyObject *subobj;
1244 const char *sub;
1245 Py_ssize_t sub_len;
1246 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1247 PyObject *obj_start=Py_None, *obj_end=Py_None;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001248
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001249 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1250 &obj_start, &obj_end))
1251 return -2;
1252 /* To support None in "start" and "end" arguments, meaning
1253 the same as if they were not passed.
1254 */
1255 if (obj_start != Py_None)
1256 if (!_PyEval_SliceIndex(obj_start, &start))
1257 return -2;
1258 if (obj_end != Py_None)
1259 if (!_PyEval_SliceIndex(obj_end, &end))
1260 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001261
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001262 if (PyBytes_Check(subobj)) {
1263 sub = PyBytes_AS_STRING(subobj);
1264 sub_len = PyBytes_GET_SIZE(subobj);
1265 }
1266 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1267 /* XXX - the "expected a character buffer object" is pretty
1268 confusing for a non-expert. remap to something else ? */
1269 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001270
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001271 if (dir > 0)
1272 return stringlib_find_slice(
1273 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1274 sub, sub_len, start, end);
1275 else
1276 return stringlib_rfind_slice(
1277 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1278 sub, sub_len, start, end);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001279}
1280
1281
1282PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001283"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001284\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001285Return the lowest index in B where substring sub is found,\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001286such that sub is contained within s[start:end]. Optional\n\
1287arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001288\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001289Return -1 on failure.");
1290
Neal Norwitz6968b052007-02-27 19:02:19 +00001291static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001292bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001293{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001294 Py_ssize_t result = bytes_find_internal(self, args, +1);
1295 if (result == -2)
1296 return NULL;
1297 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001298}
1299
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001300
1301PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001302"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001303\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001304Like B.find() but raise ValueError when the substring is not found.");
1305
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001306static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001307bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001308{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001309 Py_ssize_t result = bytes_find_internal(self, args, +1);
1310 if (result == -2)
1311 return NULL;
1312 if (result == -1) {
1313 PyErr_SetString(PyExc_ValueError,
1314 "substring not found");
1315 return NULL;
1316 }
1317 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001318}
1319
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001320
1321PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001322"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001323\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001324Return the highest index in B where substring sub is found,\n\
1325such that sub is contained within s[start:end]. Optional\n\
1326arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001327\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001328Return -1 on failure.");
1329
Neal Norwitz6968b052007-02-27 19:02:19 +00001330static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001331bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001332{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001333 Py_ssize_t result = bytes_find_internal(self, args, -1);
1334 if (result == -2)
1335 return NULL;
1336 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001337}
1338
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001339
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001340PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001341"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001342\n\
1343Like B.rfind() but raise ValueError when the substring is not found.");
1344
1345static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001346bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001347{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001348 Py_ssize_t result = bytes_find_internal(self, args, -1);
1349 if (result == -2)
1350 return NULL;
1351 if (result == -1) {
1352 PyErr_SetString(PyExc_ValueError,
1353 "substring not found");
1354 return NULL;
1355 }
1356 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001357}
1358
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001359
1360Py_LOCAL_INLINE(PyObject *)
1361do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001362{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001363 Py_buffer vsep;
1364 char *s = PyBytes_AS_STRING(self);
1365 Py_ssize_t len = PyBytes_GET_SIZE(self);
1366 char *sep;
1367 Py_ssize_t seplen;
1368 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001369
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 if (_getbuffer(sepobj, &vsep) < 0)
1371 return NULL;
1372 sep = vsep.buf;
1373 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001374
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001375 i = 0;
1376 if (striptype != RIGHTSTRIP) {
1377 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1378 i++;
1379 }
1380 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001381
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001382 j = len;
1383 if (striptype != LEFTSTRIP) {
1384 do {
1385 j--;
1386 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1387 j++;
1388 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001389
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001391
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001392 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1393 Py_INCREF(self);
1394 return (PyObject*)self;
1395 }
1396 else
1397 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001398}
1399
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001400
1401Py_LOCAL_INLINE(PyObject *)
1402do_strip(PyBytesObject *self, int striptype)
1403{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001404 char *s = PyBytes_AS_STRING(self);
1405 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001406
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001407 i = 0;
1408 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001409 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001410 i++;
1411 }
1412 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001413
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001414 j = len;
1415 if (striptype != LEFTSTRIP) {
1416 do {
1417 j--;
David Malcolm96960882010-11-05 17:23:41 +00001418 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001419 j++;
1420 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1423 Py_INCREF(self);
1424 return (PyObject*)self;
1425 }
1426 else
1427 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001428}
1429
1430
1431Py_LOCAL_INLINE(PyObject *)
1432do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1433{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001434 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001435
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001436 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1437 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001438
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001439 if (sep != NULL && sep != Py_None) {
1440 return do_xstrip(self, striptype, sep);
1441 }
1442 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001443}
1444
1445
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001446PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001447"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001448\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001449Strip leading and trailing bytes contained in the argument.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001450If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001451static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001452bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001453{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001454 if (PyTuple_GET_SIZE(args) == 0)
1455 return do_strip(self, BOTHSTRIP); /* Common case */
1456 else
1457 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001458}
1459
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001460
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001461PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001462"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001463\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001464Strip leading bytes contained in the argument.\n\
1465If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001466static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001467bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001468{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001469 if (PyTuple_GET_SIZE(args) == 0)
1470 return do_strip(self, LEFTSTRIP); /* Common case */
1471 else
1472 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001473}
1474
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001475
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001476PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001477"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001478\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001479Strip trailing bytes contained in the argument.\n\
1480If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001481static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001482bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001483{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001484 if (PyTuple_GET_SIZE(args) == 0)
1485 return do_strip(self, RIGHTSTRIP); /* Common case */
1486 else
1487 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001488}
Neal Norwitz6968b052007-02-27 19:02:19 +00001489
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001490
1491PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001492"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001493\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001494Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001495string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001496as in slice notation.");
1497
1498static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001499bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001500{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001501 PyObject *sub_obj;
1502 const char *str = PyBytes_AS_STRING(self), *sub;
1503 Py_ssize_t sub_len;
1504 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001505
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001506 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1507 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1508 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001509
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001510 if (PyBytes_Check(sub_obj)) {
1511 sub = PyBytes_AS_STRING(sub_obj);
1512 sub_len = PyBytes_GET_SIZE(sub_obj);
1513 }
1514 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1515 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001516
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001517 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001518
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001519 return PyLong_FromSsize_t(
1520 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1521 );
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001522}
1523
1524
1525PyDoc_STRVAR(translate__doc__,
1526"B.translate(table[, deletechars]) -> bytes\n\
1527\n\
1528Return a copy of B, where all characters occurring in the\n\
1529optional argument deletechars are removed, and the remaining\n\
1530characters have been mapped through the given translation\n\
1531table, which must be a bytes object of length 256.");
1532
1533static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001534bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001535{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001536 register char *input, *output;
1537 const char *table;
1538 register Py_ssize_t i, c, changed = 0;
1539 PyObject *input_obj = (PyObject*)self;
1540 const char *output_start, *del_table=NULL;
1541 Py_ssize_t inlen, tablen, dellen = 0;
1542 PyObject *result;
1543 int trans_table[256];
1544 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001545
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001546 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1547 &tableobj, &delobj))
1548 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001549
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001550 if (PyBytes_Check(tableobj)) {
1551 table = PyBytes_AS_STRING(tableobj);
1552 tablen = PyBytes_GET_SIZE(tableobj);
1553 }
1554 else if (tableobj == Py_None) {
1555 table = NULL;
1556 tablen = 256;
1557 }
1558 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1559 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001560
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001561 if (tablen != 256) {
1562 PyErr_SetString(PyExc_ValueError,
1563 "translation table must be 256 characters long");
1564 return NULL;
1565 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001566
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001567 if (delobj != NULL) {
1568 if (PyBytes_Check(delobj)) {
1569 del_table = PyBytes_AS_STRING(delobj);
1570 dellen = PyBytes_GET_SIZE(delobj);
1571 }
1572 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1573 return NULL;
1574 }
1575 else {
1576 del_table = NULL;
1577 dellen = 0;
1578 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001579
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001580 inlen = PyBytes_GET_SIZE(input_obj);
1581 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1582 if (result == NULL)
1583 return NULL;
1584 output_start = output = PyBytes_AsString(result);
1585 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001586
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001587 if (dellen == 0 && table != NULL) {
1588 /* If no deletions are required, use faster code */
1589 for (i = inlen; --i >= 0; ) {
1590 c = Py_CHARMASK(*input++);
1591 if (Py_CHARMASK((*output++ = table[c])) != c)
1592 changed = 1;
1593 }
1594 if (changed || !PyBytes_CheckExact(input_obj))
1595 return result;
1596 Py_DECREF(result);
1597 Py_INCREF(input_obj);
1598 return input_obj;
1599 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001600
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001601 if (table == NULL) {
1602 for (i = 0; i < 256; i++)
1603 trans_table[i] = Py_CHARMASK(i);
1604 } else {
1605 for (i = 0; i < 256; i++)
1606 trans_table[i] = Py_CHARMASK(table[i]);
1607 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001608
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001609 for (i = 0; i < dellen; i++)
1610 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001611
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001612 for (i = inlen; --i >= 0; ) {
1613 c = Py_CHARMASK(*input++);
1614 if (trans_table[c] != -1)
1615 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1616 continue;
1617 changed = 1;
1618 }
1619 if (!changed && PyBytes_CheckExact(input_obj)) {
1620 Py_DECREF(result);
1621 Py_INCREF(input_obj);
1622 return input_obj;
1623 }
1624 /* Fix the size of the resulting string */
1625 if (inlen > 0)
1626 _PyBytes_Resize(&result, output - output_start);
1627 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001628}
1629
1630
Georg Brandlabc38772009-04-12 15:51:51 +00001631static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001632bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001633{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001634 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001635}
1636
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001637/* find and count characters and substrings */
1638
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001639#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001640 ((char *)memchr((const void *)(target), c, target_len))
1641
1642/* String ops must return a string. */
1643/* If the object is subclass of string, create a copy */
1644Py_LOCAL(PyBytesObject *)
1645return_self(PyBytesObject *self)
1646{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001647 if (PyBytes_CheckExact(self)) {
1648 Py_INCREF(self);
1649 return self;
1650 }
1651 return (PyBytesObject *)PyBytes_FromStringAndSize(
1652 PyBytes_AS_STRING(self),
1653 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001654}
1655
1656Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001657countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001658{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001659 Py_ssize_t count=0;
1660 const char *start=target;
1661 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001662
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001663 while ( (start=findchar(start, end-start, c)) != NULL ) {
1664 count++;
1665 if (count >= maxcount)
1666 break;
1667 start += 1;
1668 }
1669 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001670}
1671
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001672
1673/* Algorithms for different cases of string replacement */
1674
1675/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1676Py_LOCAL(PyBytesObject *)
1677replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001678 const char *to_s, Py_ssize_t to_len,
1679 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001680{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001681 char *self_s, *result_s;
1682 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001683 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001684 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001685
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001686 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001687
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001688 /* 1 at the end plus 1 after every character;
1689 count = min(maxcount, self_len + 1) */
1690 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001691 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001692 else
1693 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1694 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001695
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001696 /* Check for overflow */
1697 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001698 assert(count > 0);
1699 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001700 PyErr_SetString(PyExc_OverflowError,
1701 "replacement bytes are too long");
1702 return NULL;
1703 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001704 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001705
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001706 if (! (result = (PyBytesObject *)
1707 PyBytes_FromStringAndSize(NULL, result_len)) )
1708 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001709
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001710 self_s = PyBytes_AS_STRING(self);
1711 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001712
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001713 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001714
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001715 /* Lay the first one down (guaranteed this will occur) */
1716 Py_MEMCPY(result_s, to_s, to_len);
1717 result_s += to_len;
1718 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001719
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001720 for (i=0; i<count; i++) {
1721 *result_s++ = *self_s++;
1722 Py_MEMCPY(result_s, to_s, to_len);
1723 result_s += to_len;
1724 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001725
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001726 /* Copy the rest of the original string */
1727 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001728
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001729 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001730}
1731
1732/* Special case for deleting a single character */
1733/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1734Py_LOCAL(PyBytesObject *)
1735replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001736 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001737{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001738 char *self_s, *result_s;
1739 char *start, *next, *end;
1740 Py_ssize_t self_len, result_len;
1741 Py_ssize_t count;
1742 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001743
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001744 self_len = PyBytes_GET_SIZE(self);
1745 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001746
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001747 count = countchar(self_s, self_len, from_c, maxcount);
1748 if (count == 0) {
1749 return return_self(self);
1750 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001751
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001752 result_len = self_len - count; /* from_len == 1 */
1753 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001754
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001755 if ( (result = (PyBytesObject *)
1756 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1757 return NULL;
1758 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001759
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001760 start = self_s;
1761 end = self_s + self_len;
1762 while (count-- > 0) {
1763 next = findchar(start, end-start, from_c);
1764 if (next == NULL)
1765 break;
1766 Py_MEMCPY(result_s, start, next-start);
1767 result_s += (next-start);
1768 start = next+1;
1769 }
1770 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001771
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001772 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001773}
1774
1775/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1776
1777Py_LOCAL(PyBytesObject *)
1778replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001779 const char *from_s, Py_ssize_t from_len,
1780 Py_ssize_t maxcount) {
1781 char *self_s, *result_s;
1782 char *start, *next, *end;
1783 Py_ssize_t self_len, result_len;
1784 Py_ssize_t count, offset;
1785 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001786
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001787 self_len = PyBytes_GET_SIZE(self);
1788 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001789
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001790 count = stringlib_count(self_s, self_len,
1791 from_s, from_len,
1792 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001793
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001794 if (count == 0) {
1795 /* no matches */
1796 return return_self(self);
1797 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001798
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001799 result_len = self_len - (count * from_len);
1800 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001801
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001802 if ( (result = (PyBytesObject *)
1803 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1804 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001805
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001806 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001807
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001808 start = self_s;
1809 end = self_s + self_len;
1810 while (count-- > 0) {
1811 offset = stringlib_find(start, end-start,
1812 from_s, from_len,
1813 0);
1814 if (offset == -1)
1815 break;
1816 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001817
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001818 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001819
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001820 result_s += (next-start);
1821 start = next+from_len;
1822 }
1823 Py_MEMCPY(result_s, start, end-start);
1824 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001825}
1826
1827/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1828Py_LOCAL(PyBytesObject *)
1829replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001830 char from_c, char to_c,
1831 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001832{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001833 char *self_s, *result_s, *start, *end, *next;
1834 Py_ssize_t self_len;
1835 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001836
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001837 /* The result string will be the same size */
1838 self_s = PyBytes_AS_STRING(self);
1839 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001840
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001841 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001842
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001843 if (next == NULL) {
1844 /* No matches; return the original string */
1845 return return_self(self);
1846 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001847
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001848 /* Need to make a new string */
1849 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1850 if (result == NULL)
1851 return NULL;
1852 result_s = PyBytes_AS_STRING(result);
1853 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001854
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001855 /* change everything in-place, starting with this one */
1856 start = result_s + (next-self_s);
1857 *start = to_c;
1858 start++;
1859 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001860
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001861 while (--maxcount > 0) {
1862 next = findchar(start, end-start, from_c);
1863 if (next == NULL)
1864 break;
1865 *next = to_c;
1866 start = next+1;
1867 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001868
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001869 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001870}
1871
1872/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1873Py_LOCAL(PyBytesObject *)
1874replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001875 const char *from_s, Py_ssize_t from_len,
1876 const char *to_s, Py_ssize_t to_len,
1877 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001878{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001879 char *result_s, *start, *end;
1880 char *self_s;
1881 Py_ssize_t self_len, offset;
1882 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001883
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001884 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001885
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001886 self_s = PyBytes_AS_STRING(self);
1887 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001888
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001889 offset = stringlib_find(self_s, self_len,
1890 from_s, from_len,
1891 0);
1892 if (offset == -1) {
1893 /* No matches; return the original string */
1894 return return_self(self);
1895 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001896
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001897 /* Need to make a new string */
1898 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1899 if (result == NULL)
1900 return NULL;
1901 result_s = PyBytes_AS_STRING(result);
1902 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001903
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001904 /* change everything in-place, starting with this one */
1905 start = result_s + offset;
1906 Py_MEMCPY(start, to_s, from_len);
1907 start += from_len;
1908 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001909
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001910 while ( --maxcount > 0) {
1911 offset = stringlib_find(start, end-start,
1912 from_s, from_len,
1913 0);
1914 if (offset==-1)
1915 break;
1916 Py_MEMCPY(start+offset, to_s, from_len);
1917 start += offset+from_len;
1918 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001919
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001920 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001921}
1922
1923/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1924Py_LOCAL(PyBytesObject *)
1925replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001926 char from_c,
1927 const char *to_s, Py_ssize_t to_len,
1928 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001929{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001930 char *self_s, *result_s;
1931 char *start, *next, *end;
1932 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001933 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001934 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001935
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001936 self_s = PyBytes_AS_STRING(self);
1937 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001938
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001939 count = countchar(self_s, self_len, from_c, maxcount);
1940 if (count == 0) {
1941 /* no matches, return unchanged */
1942 return return_self(self);
1943 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001944
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001945 /* use the difference between current and new, hence the "-1" */
1946 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001947 assert(count > 0);
1948 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001949 PyErr_SetString(PyExc_OverflowError,
1950 "replacement bytes are too long");
1951 return NULL;
1952 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001953 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001954
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001955 if ( (result = (PyBytesObject *)
1956 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1957 return NULL;
1958 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001959
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001960 start = self_s;
1961 end = self_s + self_len;
1962 while (count-- > 0) {
1963 next = findchar(start, end-start, from_c);
1964 if (next == NULL)
1965 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001966
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001967 if (next == start) {
1968 /* replace with the 'to' */
1969 Py_MEMCPY(result_s, to_s, to_len);
1970 result_s += to_len;
1971 start += 1;
1972 } else {
1973 /* copy the unchanged old then the 'to' */
1974 Py_MEMCPY(result_s, start, next-start);
1975 result_s += (next-start);
1976 Py_MEMCPY(result_s, to_s, to_len);
1977 result_s += to_len;
1978 start = next+1;
1979 }
1980 }
1981 /* Copy the remainder of the remaining string */
1982 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001983
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001984 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001985}
1986
1987/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1988Py_LOCAL(PyBytesObject *)
1989replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001990 const char *from_s, Py_ssize_t from_len,
1991 const char *to_s, Py_ssize_t to_len,
1992 Py_ssize_t maxcount) {
1993 char *self_s, *result_s;
1994 char *start, *next, *end;
1995 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001996 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001997 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001998
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001999 self_s = PyBytes_AS_STRING(self);
2000 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002001
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002002 count = stringlib_count(self_s, self_len,
2003 from_s, from_len,
2004 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002005
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002006 if (count == 0) {
2007 /* no matches, return unchanged */
2008 return return_self(self);
2009 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002010
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002011 /* Check for overflow */
2012 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002013 assert(count > 0);
2014 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002015 PyErr_SetString(PyExc_OverflowError,
2016 "replacement bytes are too long");
2017 return NULL;
2018 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002019 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002020
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002021 if ( (result = (PyBytesObject *)
2022 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2023 return NULL;
2024 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002025
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002026 start = self_s;
2027 end = self_s + self_len;
2028 while (count-- > 0) {
2029 offset = stringlib_find(start, end-start,
2030 from_s, from_len,
2031 0);
2032 if (offset == -1)
2033 break;
2034 next = start+offset;
2035 if (next == start) {
2036 /* replace with the 'to' */
2037 Py_MEMCPY(result_s, to_s, to_len);
2038 result_s += to_len;
2039 start += from_len;
2040 } else {
2041 /* copy the unchanged old then the 'to' */
2042 Py_MEMCPY(result_s, start, next-start);
2043 result_s += (next-start);
2044 Py_MEMCPY(result_s, to_s, to_len);
2045 result_s += to_len;
2046 start = next+from_len;
2047 }
2048 }
2049 /* Copy the remainder of the remaining string */
2050 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002051
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002052 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002053}
2054
2055
2056Py_LOCAL(PyBytesObject *)
2057replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002058 const char *from_s, Py_ssize_t from_len,
2059 const char *to_s, Py_ssize_t to_len,
2060 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002061{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002062 if (maxcount < 0) {
2063 maxcount = PY_SSIZE_T_MAX;
2064 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2065 /* nothing to do; return the original string */
2066 return return_self(self);
2067 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002068
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002069 if (maxcount == 0 ||
2070 (from_len == 0 && to_len == 0)) {
2071 /* nothing to do; return the original string */
2072 return return_self(self);
2073 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002074
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002075 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002077 if (from_len == 0) {
2078 /* insert the 'to' string everywhere. */
2079 /* >>> "Python".replace("", ".") */
2080 /* '.P.y.t.h.o.n.' */
2081 return replace_interleave(self, to_s, to_len, maxcount);
2082 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002083
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002084 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2085 /* point for an empty self string to generate a non-empty string */
2086 /* Special case so the remaining code always gets a non-empty string */
2087 if (PyBytes_GET_SIZE(self) == 0) {
2088 return return_self(self);
2089 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002091 if (to_len == 0) {
2092 /* delete all occurrences of 'from' string */
2093 if (from_len == 1) {
2094 return replace_delete_single_character(
2095 self, from_s[0], maxcount);
2096 } else {
2097 return replace_delete_substring(self, from_s,
2098 from_len, maxcount);
2099 }
2100 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002101
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002102 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002103
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002104 if (from_len == to_len) {
2105 if (from_len == 1) {
2106 return replace_single_character_in_place(
2107 self,
2108 from_s[0],
2109 to_s[0],
2110 maxcount);
2111 } else {
2112 return replace_substring_in_place(
2113 self, from_s, from_len, to_s, to_len,
2114 maxcount);
2115 }
2116 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002117
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002118 /* Otherwise use the more generic algorithms */
2119 if (from_len == 1) {
2120 return replace_single_character(self, from_s[0],
2121 to_s, to_len, maxcount);
2122 } else {
2123 /* len('from')>=2, len('to')>=1 */
2124 return replace_substring(self, from_s, from_len, to_s, to_len,
2125 maxcount);
2126 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002127}
2128
2129PyDoc_STRVAR(replace__doc__,
2130"B.replace(old, new[, count]) -> bytes\n\
2131\n\
2132Return a copy of B with all occurrences of subsection\n\
2133old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002134given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002135
2136static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002137bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002138{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002139 Py_ssize_t count = -1;
2140 PyObject *from, *to;
2141 const char *from_s, *to_s;
2142 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002143
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002144 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2145 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002146
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002147 if (PyBytes_Check(from)) {
2148 from_s = PyBytes_AS_STRING(from);
2149 from_len = PyBytes_GET_SIZE(from);
2150 }
2151 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2152 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002153
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002154 if (PyBytes_Check(to)) {
2155 to_s = PyBytes_AS_STRING(to);
2156 to_len = PyBytes_GET_SIZE(to);
2157 }
2158 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2159 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002160
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002161 return (PyObject *)replace((PyBytesObject *) self,
2162 from_s, from_len,
2163 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002164}
2165
2166/** End DALKE **/
2167
2168/* Matches the end (direction >= 0) or start (direction < 0) of self
2169 * against substr, using the start and end arguments. Returns
2170 * -1 on error, 0 if not found and 1 if found.
2171 */
2172Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002173_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002174 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002175{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002176 Py_ssize_t len = PyBytes_GET_SIZE(self);
2177 Py_ssize_t slen;
2178 const char* sub;
2179 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002180
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002181 if (PyBytes_Check(substr)) {
2182 sub = PyBytes_AS_STRING(substr);
2183 slen = PyBytes_GET_SIZE(substr);
2184 }
2185 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2186 return -1;
2187 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002188
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002189 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002191 if (direction < 0) {
2192 /* startswith */
2193 if (start+slen > len)
2194 return 0;
2195 } else {
2196 /* endswith */
2197 if (end-start < slen || start > len)
2198 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002199
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002200 if (end-slen > start)
2201 start = end - slen;
2202 }
2203 if (end-start >= slen)
2204 return ! memcmp(str+start, sub, slen);
2205 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002206}
2207
2208
2209PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002210"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002211\n\
2212Return True if B starts with the specified prefix, False otherwise.\n\
2213With optional start, test B beginning at that position.\n\
2214With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002215prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002216
2217static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002218bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002219{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002220 Py_ssize_t start = 0;
2221 Py_ssize_t end = PY_SSIZE_T_MAX;
2222 PyObject *subobj;
2223 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002224
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002225 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2226 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2227 return NULL;
2228 if (PyTuple_Check(subobj)) {
2229 Py_ssize_t i;
2230 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2231 result = _bytes_tailmatch(self,
2232 PyTuple_GET_ITEM(subobj, i),
2233 start, end, -1);
2234 if (result == -1)
2235 return NULL;
2236 else if (result) {
2237 Py_RETURN_TRUE;
2238 }
2239 }
2240 Py_RETURN_FALSE;
2241 }
2242 result = _bytes_tailmatch(self, subobj, start, end, -1);
2243 if (result == -1)
2244 return NULL;
2245 else
2246 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002247}
2248
2249
2250PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002251"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002252\n\
2253Return True if B ends with the specified suffix, False otherwise.\n\
2254With optional start, test B beginning at that position.\n\
2255With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002256suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002257
2258static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002259bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002260{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002261 Py_ssize_t start = 0;
2262 Py_ssize_t end = PY_SSIZE_T_MAX;
2263 PyObject *subobj;
2264 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002265
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002266 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2267 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2268 return NULL;
2269 if (PyTuple_Check(subobj)) {
2270 Py_ssize_t i;
2271 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2272 result = _bytes_tailmatch(self,
2273 PyTuple_GET_ITEM(subobj, i),
2274 start, end, +1);
2275 if (result == -1)
2276 return NULL;
2277 else if (result) {
2278 Py_RETURN_TRUE;
2279 }
2280 }
2281 Py_RETURN_FALSE;
2282 }
2283 result = _bytes_tailmatch(self, subobj, start, end, +1);
2284 if (result == -1)
2285 return NULL;
2286 else
2287 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002288}
2289
2290
2291PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002292"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002293\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002294Decode B using the codec registered for encoding. Default encoding\n\
2295is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002296handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2297a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002298as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002299able to handle UnicodeDecodeErrors.");
2300
2301static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002302bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002303{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002304 const char *encoding = NULL;
2305 const char *errors = NULL;
2306 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002307
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002308 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2309 return NULL;
2310 if (encoding == NULL)
2311 encoding = PyUnicode_GetDefaultEncoding();
2312 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002313}
2314
Georg Brandl02524622010-12-02 18:06:51 +00002315PyDoc_STRVAR(transform__doc__,
2316"B.transform(encoding, errors='strict') -> bytes\n\
2317\n\
2318Transform B using the codec registered for encoding. errors may be given\n\
2319to set a different error handling scheme.");
2320
2321static PyObject *
2322bytes_transform(PyObject *self, PyObject *args, PyObject *kwargs)
2323{
2324 const char *encoding = NULL;
2325 const char *errors = NULL;
2326 static char *kwlist[] = {"encoding", "errors", 0};
2327 PyObject *v;
2328
2329 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|s:transform",
2330 kwlist, &encoding, &errors))
2331 return NULL;
2332
2333 v = PyCodec_Encode(self, encoding, errors);
2334 if (v == NULL)
2335 return NULL;
2336 if (!PyBytes_Check(v)) {
2337 PyErr_Format(PyExc_TypeError,
2338 "encoder did not return a bytes object (type=%.400s)",
2339 Py_TYPE(v)->tp_name);
2340 Py_DECREF(v);
2341 return NULL;
2342 }
2343 return v;
2344}
2345
2346
2347PyDoc_STRVAR(untransform__doc__,
2348"B.untransform(encoding, errors='strict') -> bytes\n\
2349\n\
2350Reverse-transform B using the codec registered for encoding. errors may\n\
2351be given to set a different error handling scheme.");
2352
2353static PyObject *
2354bytes_untransform(PyObject *self, PyObject *args, PyObject *kwargs)
2355{
2356 const char *encoding = NULL;
2357 const char *errors = NULL;
2358 static char *kwlist[] = {"encoding", "errors", 0};
2359 PyObject *v;
2360
2361 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|s:untransform",
2362 kwlist, &encoding, &errors))
2363 return NULL;
2364
2365 v = PyCodec_Decode(self, encoding, errors);
2366 if (v == NULL)
2367 return NULL;
2368 if (!PyBytes_Check(v)) {
2369 PyErr_Format(PyExc_TypeError,
2370 "decoder did not return a bytes object (type=%.400s)",
2371 Py_TYPE(v)->tp_name);
2372 Py_DECREF(v);
2373 return NULL;
2374 }
2375 return v;
2376}
Guido van Rossum20188312006-05-05 15:15:40 +00002377
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002378PyDoc_STRVAR(splitlines__doc__,
2379"B.splitlines([keepends]) -> list of lines\n\
2380\n\
2381Return a list of the lines in B, breaking at line boundaries.\n\
2382Line breaks are not included in the resulting list unless keepends\n\
2383is given and true.");
2384
2385static PyObject*
2386bytes_splitlines(PyObject *self, PyObject *args)
2387{
2388 int keepends = 0;
2389
2390 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002391 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002392
2393 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002394 (PyObject*) self, PyBytes_AS_STRING(self),
2395 PyBytes_GET_SIZE(self), keepends
2396 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002397}
2398
2399
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002400PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002401"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002402\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002403Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002404Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002405Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002406
2407static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002408hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002409{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002410 if (c >= 128)
2411 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002412 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002413 return c - '0';
2414 else {
David Malcolm96960882010-11-05 17:23:41 +00002415 if (Py_ISUPPER(c))
2416 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002417 if (c >= 'a' && c <= 'f')
2418 return c - 'a' + 10;
2419 }
2420 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002421}
2422
2423static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002424bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002425{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002426 PyObject *newstring, *hexobj;
2427 char *buf;
2428 Py_UNICODE *hex;
2429 Py_ssize_t hexlen, byteslen, i, j;
2430 int top, bot;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002431
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002432 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2433 return NULL;
2434 assert(PyUnicode_Check(hexobj));
2435 hexlen = PyUnicode_GET_SIZE(hexobj);
2436 hex = PyUnicode_AS_UNICODE(hexobj);
2437 byteslen = hexlen/2; /* This overestimates if there are spaces */
2438 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2439 if (!newstring)
2440 return NULL;
2441 buf = PyBytes_AS_STRING(newstring);
2442 for (i = j = 0; i < hexlen; i += 2) {
2443 /* skip over spaces in the input */
2444 while (hex[i] == ' ')
2445 i++;
2446 if (i >= hexlen)
2447 break;
2448 top = hex_digit_to_int(hex[i]);
2449 bot = hex_digit_to_int(hex[i+1]);
2450 if (top == -1 || bot == -1) {
2451 PyErr_Format(PyExc_ValueError,
2452 "non-hexadecimal number found in "
2453 "fromhex() arg at position %zd", i);
2454 goto error;
2455 }
2456 buf[j++] = (top << 4) + bot;
2457 }
2458 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2459 goto error;
2460 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002461
2462 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002463 Py_XDECREF(newstring);
2464 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002465}
2466
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002467PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002468"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002469
2470static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002471bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002472{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002473 Py_ssize_t res;
2474 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2475 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002476}
2477
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002478
2479static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002480bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002481{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002482 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002483}
2484
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002485
2486static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002487bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002488 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2489 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2490 _Py_capitalize__doc__},
2491 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2492 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2493 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2494 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2495 endswith__doc__},
2496 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2497 expandtabs__doc__},
2498 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2499 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2500 fromhex_doc},
2501 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2502 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2503 _Py_isalnum__doc__},
2504 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2505 _Py_isalpha__doc__},
2506 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2507 _Py_isdigit__doc__},
2508 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2509 _Py_islower__doc__},
2510 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2511 _Py_isspace__doc__},
2512 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2513 _Py_istitle__doc__},
2514 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2515 _Py_isupper__doc__},
2516 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2517 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2518 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2519 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2520 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2521 _Py_maketrans__doc__},
2522 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2523 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2524 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2525 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2526 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2527 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2528 rpartition__doc__},
2529 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2530 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
2531 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2532 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS,
2533 splitlines__doc__},
2534 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2535 startswith__doc__},
2536 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2537 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2538 _Py_swapcase__doc__},
2539 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Georg Brandl02524622010-12-02 18:06:51 +00002540 {"transform", (PyCFunction)bytes_transform, METH_VARARGS | METH_KEYWORDS, transform__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002541 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2542 translate__doc__},
Georg Brandl02524622010-12-02 18:06:51 +00002543 {"untransform", (PyCFunction)bytes_untransform, METH_VARARGS | METH_KEYWORDS, untransform__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002544 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2545 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2546 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2547 sizeof__doc__},
2548 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002549};
2550
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002551static PyObject *
2552str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2553
2554static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002555bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002556{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002557 PyObject *x = NULL;
2558 const char *encoding = NULL;
2559 const char *errors = NULL;
2560 PyObject *new = NULL;
2561 Py_ssize_t size;
2562 static char *kwlist[] = {"source", "encoding", "errors", 0};
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002563
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002564 if (type != &PyBytes_Type)
2565 return str_subtype_new(type, args, kwds);
2566 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2567 &encoding, &errors))
2568 return NULL;
2569 if (x == NULL) {
2570 if (encoding != NULL || errors != NULL) {
2571 PyErr_SetString(PyExc_TypeError,
2572 "encoding or errors without sequence "
2573 "argument");
2574 return NULL;
2575 }
2576 return PyBytes_FromString("");
2577 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002578
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002579 if (PyUnicode_Check(x)) {
2580 /* Encode via the codec registry */
2581 if (encoding == NULL) {
2582 PyErr_SetString(PyExc_TypeError,
2583 "string argument without an encoding");
2584 return NULL;
2585 }
2586 new = PyUnicode_AsEncodedString(x, encoding, errors);
2587 if (new == NULL)
2588 return NULL;
2589 assert(PyBytes_Check(new));
2590 return new;
2591 }
2592 /* Is it an integer? */
2593 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2594 if (size == -1 && PyErr_Occurred()) {
2595 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2596 return NULL;
2597 PyErr_Clear();
2598 }
2599 else if (size < 0) {
2600 PyErr_SetString(PyExc_ValueError, "negative count");
2601 return NULL;
2602 }
2603 else {
2604 new = PyBytes_FromStringAndSize(NULL, size);
2605 if (new == NULL) {
2606 return NULL;
2607 }
2608 if (size > 0) {
2609 memset(((PyBytesObject*)new)->ob_sval, 0, size);
2610 }
2611 return new;
2612 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002613
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002614 /* If it's not unicode, there can't be encoding or errors */
2615 if (encoding != NULL || errors != NULL) {
2616 PyErr_SetString(PyExc_TypeError,
2617 "encoding or errors without a string argument");
2618 return NULL;
2619 }
2620 return PyObject_Bytes(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002621}
2622
2623PyObject *
2624PyBytes_FromObject(PyObject *x)
2625{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002626 PyObject *new, *it;
2627 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002628
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002629 if (x == NULL) {
2630 PyErr_BadInternalCall();
2631 return NULL;
2632 }
2633 /* Use the modern buffer interface */
2634 if (PyObject_CheckBuffer(x)) {
2635 Py_buffer view;
2636 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2637 return NULL;
2638 new = PyBytes_FromStringAndSize(NULL, view.len);
2639 if (!new)
2640 goto fail;
2641 /* XXX(brett.cannon): Better way to get to internal buffer? */
2642 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2643 &view, view.len, 'C') < 0)
2644 goto fail;
2645 PyBuffer_Release(&view);
2646 return new;
2647 fail:
2648 Py_XDECREF(new);
2649 PyBuffer_Release(&view);
2650 return NULL;
2651 }
2652 if (PyUnicode_Check(x)) {
2653 PyErr_SetString(PyExc_TypeError,
2654 "cannot convert unicode object to bytes");
2655 return NULL;
2656 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002657
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002658 if (PyList_CheckExact(x)) {
2659 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2660 if (new == NULL)
2661 return NULL;
2662 for (i = 0; i < Py_SIZE(x); i++) {
2663 Py_ssize_t value = PyNumber_AsSsize_t(
2664 PyList_GET_ITEM(x, i), PyExc_ValueError);
2665 if (value == -1 && PyErr_Occurred()) {
2666 Py_DECREF(new);
2667 return NULL;
2668 }
2669 if (value < 0 || value >= 256) {
2670 PyErr_SetString(PyExc_ValueError,
2671 "bytes must be in range(0, 256)");
2672 Py_DECREF(new);
2673 return NULL;
2674 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002675 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002676 }
2677 return new;
2678 }
2679 if (PyTuple_CheckExact(x)) {
2680 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2681 if (new == NULL)
2682 return NULL;
2683 for (i = 0; i < Py_SIZE(x); i++) {
2684 Py_ssize_t value = PyNumber_AsSsize_t(
2685 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2686 if (value == -1 && PyErr_Occurred()) {
2687 Py_DECREF(new);
2688 return NULL;
2689 }
2690 if (value < 0 || value >= 256) {
2691 PyErr_SetString(PyExc_ValueError,
2692 "bytes must be in range(0, 256)");
2693 Py_DECREF(new);
2694 return NULL;
2695 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002696 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002697 }
2698 return new;
2699 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002700
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002701 /* For iterator version, create a string object and resize as needed */
2702 size = _PyObject_LengthHint(x, 64);
2703 if (size == -1 && PyErr_Occurred())
2704 return NULL;
2705 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2706 returning a shared empty bytes string. This required because we
2707 want to call _PyBytes_Resize() the returned object, which we can
2708 only do on bytes objects with refcount == 1. */
2709 size += 1;
2710 new = PyBytes_FromStringAndSize(NULL, size);
2711 if (new == NULL)
2712 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002713
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002714 /* Get the iterator */
2715 it = PyObject_GetIter(x);
2716 if (it == NULL)
2717 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002718
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002719 /* Run the iterator to exhaustion */
2720 for (i = 0; ; i++) {
2721 PyObject *item;
2722 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002723
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002724 /* Get the next item */
2725 item = PyIter_Next(it);
2726 if (item == NULL) {
2727 if (PyErr_Occurred())
2728 goto error;
2729 break;
2730 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002731
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002732 /* Interpret it as an int (__index__) */
2733 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2734 Py_DECREF(item);
2735 if (value == -1 && PyErr_Occurred())
2736 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002737
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002738 /* Range check */
2739 if (value < 0 || value >= 256) {
2740 PyErr_SetString(PyExc_ValueError,
2741 "bytes must be in range(0, 256)");
2742 goto error;
2743 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002744
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002745 /* Append the byte */
2746 if (i >= size) {
2747 size = 2 * size + 1;
2748 if (_PyBytes_Resize(&new, size) < 0)
2749 goto error;
2750 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002751 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002752 }
2753 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002754
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002755 /* Clean up and return success */
2756 Py_DECREF(it);
2757 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002758
2759 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002760 /* Error handling when new != NULL */
2761 Py_XDECREF(it);
2762 Py_DECREF(new);
2763 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002764}
2765
2766static PyObject *
2767str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2768{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002769 PyObject *tmp, *pnew;
2770 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002771
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002772 assert(PyType_IsSubtype(type, &PyBytes_Type));
2773 tmp = bytes_new(&PyBytes_Type, args, kwds);
2774 if (tmp == NULL)
2775 return NULL;
2776 assert(PyBytes_CheckExact(tmp));
2777 n = PyBytes_GET_SIZE(tmp);
2778 pnew = type->tp_alloc(type, n);
2779 if (pnew != NULL) {
2780 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2781 PyBytes_AS_STRING(tmp), n+1);
2782 ((PyBytesObject *)pnew)->ob_shash =
2783 ((PyBytesObject *)tmp)->ob_shash;
2784 }
2785 Py_DECREF(tmp);
2786 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002787}
2788
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002789PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002790"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002791bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002792bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2793bytes(memory_view) -> bytes\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002794\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002795Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002796 - an iterable yielding integers in range(256)\n\
2797 - a text string encoded using the specified encoding\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002798 - a bytes or a buffer object\n\
2799 - any object implementing the buffer API.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002800
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002801static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002802
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002803PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002804 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2805 "bytes",
2806 PyBytesObject_SIZE,
2807 sizeof(char),
2808 bytes_dealloc, /* tp_dealloc */
2809 0, /* tp_print */
2810 0, /* tp_getattr */
2811 0, /* tp_setattr */
2812 0, /* tp_reserved */
2813 (reprfunc)bytes_repr, /* tp_repr */
2814 0, /* tp_as_number */
2815 &bytes_as_sequence, /* tp_as_sequence */
2816 &bytes_as_mapping, /* tp_as_mapping */
2817 (hashfunc)bytes_hash, /* tp_hash */
2818 0, /* tp_call */
2819 bytes_str, /* tp_str */
2820 PyObject_GenericGetAttr, /* tp_getattro */
2821 0, /* tp_setattro */
2822 &bytes_as_buffer, /* tp_as_buffer */
2823 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2824 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2825 bytes_doc, /* tp_doc */
2826 0, /* tp_traverse */
2827 0, /* tp_clear */
2828 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2829 0, /* tp_weaklistoffset */
2830 bytes_iter, /* tp_iter */
2831 0, /* tp_iternext */
2832 bytes_methods, /* tp_methods */
2833 0, /* tp_members */
2834 0, /* tp_getset */
2835 &PyBaseObject_Type, /* tp_base */
2836 0, /* tp_dict */
2837 0, /* tp_descr_get */
2838 0, /* tp_descr_set */
2839 0, /* tp_dictoffset */
2840 0, /* tp_init */
2841 0, /* tp_alloc */
2842 bytes_new, /* tp_new */
2843 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002844};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002845
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002846void
2847PyBytes_Concat(register PyObject **pv, register PyObject *w)
2848{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002849 register PyObject *v;
2850 assert(pv != NULL);
2851 if (*pv == NULL)
2852 return;
2853 if (w == NULL) {
2854 Py_DECREF(*pv);
2855 *pv = NULL;
2856 return;
2857 }
2858 v = bytes_concat(*pv, w);
2859 Py_DECREF(*pv);
2860 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002861}
2862
2863void
2864PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
2865{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002866 PyBytes_Concat(pv, w);
2867 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002868}
2869
2870
2871/* The following function breaks the notion that strings are immutable:
2872 it changes the size of a string. We get away with this only if there
2873 is only one module referencing the object. You can also think of it
2874 as creating a new string object and destroying the old one, only
2875 more efficiently. In any case, don't use this if the string may
2876 already be known to some other part of the code...
2877 Note that if there's not enough memory to resize the string, the original
2878 string object at *pv is deallocated, *pv is set to NULL, an "out of
2879 memory" exception is set, and -1 is returned. Else (on success) 0 is
2880 returned, and the value in *pv may or may not be the same as on input.
2881 As always, an extra byte is allocated for a trailing \0 byte (newsize
2882 does *not* include that), and a trailing \0 byte is stored.
2883*/
2884
2885int
2886_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2887{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002888 register PyObject *v;
2889 register PyBytesObject *sv;
2890 v = *pv;
2891 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2892 *pv = 0;
2893 Py_DECREF(v);
2894 PyErr_BadInternalCall();
2895 return -1;
2896 }
2897 /* XXX UNREF/NEWREF interface should be more symmetrical */
2898 _Py_DEC_REFTOTAL;
2899 _Py_ForgetReference(v);
2900 *pv = (PyObject *)
2901 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2902 if (*pv == NULL) {
2903 PyObject_Del(v);
2904 PyErr_NoMemory();
2905 return -1;
2906 }
2907 _Py_NewReference(*pv);
2908 sv = (PyBytesObject *) *pv;
2909 Py_SIZE(sv) = newsize;
2910 sv->ob_sval[newsize] = '\0';
2911 sv->ob_shash = -1; /* invalidate cached hash value */
2912 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002913}
2914
2915/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
2916 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2917 * Python's regular ints.
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002918 * Return value: a new PyBytes*, or NULL if error.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002919 * . *pbuf is set to point into it,
2920 * *plen set to the # of chars following that.
2921 * Caller must decref it when done using pbuf.
2922 * The string starting at *pbuf is of the form
2923 * "-"? ("0x" | "0X")? digit+
2924 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2925 * set in flags. The case of hex digits will be correct,
2926 * There will be at least prec digits, zero-filled on the left if
2927 * necessary to get that many.
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002928 * val object to be converted
2929 * flags bitmask of format flags; only F_ALT is looked at
2930 * prec minimum number of digits; 0-fill on left if needed
2931 * type a character in [duoxX]; u acts the same as d
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002932 *
2933 * CAUTION: o, x and X conversions on regular ints can never
2934 * produce a '-' sign, but can for Python's unbounded ints.
2935 */
2936PyObject*
2937_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002938 char **pbuf, int *plen)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002939{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002940 PyObject *result = NULL;
2941 char *buf;
2942 Py_ssize_t i;
2943 int sign; /* 1 if '-', else 0 */
2944 int len; /* number of characters */
2945 Py_ssize_t llen;
2946 int numdigits; /* len == numnondigits + numdigits */
2947 int numnondigits = 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002948
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002949 /* Avoid exceeding SSIZE_T_MAX */
2950 if (prec > INT_MAX-3) {
2951 PyErr_SetString(PyExc_OverflowError,
2952 "precision too large");
2953 return NULL;
2954 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002955
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002956 switch (type) {
2957 case 'd':
2958 case 'u':
2959 /* Special-case boolean: we want 0/1 */
2960 if (PyBool_Check(val))
2961 result = PyNumber_ToBase(val, 10);
2962 else
2963 result = Py_TYPE(val)->tp_str(val);
2964 break;
2965 case 'o':
2966 numnondigits = 2;
2967 result = PyNumber_ToBase(val, 8);
2968 break;
2969 case 'x':
2970 case 'X':
2971 numnondigits = 2;
2972 result = PyNumber_ToBase(val, 16);
2973 break;
2974 default:
2975 assert(!"'type' not in [duoxX]");
2976 }
2977 if (!result)
2978 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002979
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002980 buf = _PyUnicode_AsString(result);
2981 if (!buf) {
2982 Py_DECREF(result);
2983 return NULL;
2984 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002985
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002986 /* To modify the string in-place, there can only be one reference. */
2987 if (Py_REFCNT(result) != 1) {
2988 PyErr_BadInternalCall();
2989 return NULL;
2990 }
2991 llen = PyUnicode_GetSize(result);
2992 if (llen > INT_MAX) {
2993 PyErr_SetString(PyExc_ValueError,
2994 "string too large in _PyBytes_FormatLong");
2995 return NULL;
2996 }
2997 len = (int)llen;
2998 if (buf[len-1] == 'L') {
2999 --len;
3000 buf[len] = '\0';
3001 }
3002 sign = buf[0] == '-';
3003 numnondigits += sign;
3004 numdigits = len - numnondigits;
3005 assert(numdigits > 0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003006
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003007 /* Get rid of base marker unless F_ALT */
3008 if (((flags & F_ALT) == 0 &&
3009 (type == 'o' || type == 'x' || type == 'X'))) {
3010 assert(buf[sign] == '0');
3011 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
3012 buf[sign+1] == 'o');
3013 numnondigits -= 2;
3014 buf += 2;
3015 len -= 2;
3016 if (sign)
3017 buf[0] = '-';
3018 assert(len == numnondigits + numdigits);
3019 assert(numdigits > 0);
3020 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003021
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003022 /* Fill with leading zeroes to meet minimum width. */
3023 if (prec > numdigits) {
3024 PyObject *r1 = PyBytes_FromStringAndSize(NULL,
3025 numnondigits + prec);
3026 char *b1;
3027 if (!r1) {
3028 Py_DECREF(result);
3029 return NULL;
3030 }
3031 b1 = PyBytes_AS_STRING(r1);
3032 for (i = 0; i < numnondigits; ++i)
3033 *b1++ = *buf++;
3034 for (i = 0; i < prec - numdigits; i++)
3035 *b1++ = '0';
3036 for (i = 0; i < numdigits; i++)
3037 *b1++ = *buf++;
3038 *b1 = '\0';
3039 Py_DECREF(result);
3040 result = r1;
3041 buf = PyBytes_AS_STRING(result);
3042 len = numnondigits + prec;
3043 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003044
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003045 /* Fix up case for hex conversions. */
3046 if (type == 'X') {
3047 /* Need to convert all lower case letters to upper case.
3048 and need to convert 0x to 0X (and -0x to -0X). */
3049 for (i = 0; i < len; i++)
3050 if (buf[i] >= 'a' && buf[i] <= 'x')
3051 buf[i] -= 'a'-'A';
3052 }
3053 *pbuf = buf;
3054 *plen = len;
3055 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003056}
3057
3058void
3059PyBytes_Fini(void)
3060{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003061 int i;
3062 for (i = 0; i < UCHAR_MAX + 1; i++) {
3063 Py_XDECREF(characters[i]);
3064 characters[i] = NULL;
3065 }
3066 Py_XDECREF(nullstring);
3067 nullstring = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003068}
3069
Benjamin Peterson4116f362008-05-27 00:36:20 +00003070/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003071
3072typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003073 PyObject_HEAD
3074 Py_ssize_t it_index;
3075 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003076} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003077
3078static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003079striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003080{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003081 _PyObject_GC_UNTRACK(it);
3082 Py_XDECREF(it->it_seq);
3083 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003084}
3085
3086static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003087striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003088{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003089 Py_VISIT(it->it_seq);
3090 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003091}
3092
3093static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003094striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003095{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003096 PyBytesObject *seq;
3097 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003098
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003099 assert(it != NULL);
3100 seq = it->it_seq;
3101 if (seq == NULL)
3102 return NULL;
3103 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003104
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003105 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3106 item = PyLong_FromLong(
3107 (unsigned char)seq->ob_sval[it->it_index]);
3108 if (item != NULL)
3109 ++it->it_index;
3110 return item;
3111 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003112
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003113 Py_DECREF(seq);
3114 it->it_seq = NULL;
3115 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003116}
3117
3118static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003119striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003120{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003121 Py_ssize_t len = 0;
3122 if (it->it_seq)
3123 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3124 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003125}
3126
3127PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003128 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003129
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003130static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003131 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3132 length_hint_doc},
3133 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003134};
3135
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003136PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003137 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3138 "bytes_iterator", /* tp_name */
3139 sizeof(striterobject), /* tp_basicsize */
3140 0, /* tp_itemsize */
3141 /* methods */
3142 (destructor)striter_dealloc, /* tp_dealloc */
3143 0, /* tp_print */
3144 0, /* tp_getattr */
3145 0, /* tp_setattr */
3146 0, /* tp_reserved */
3147 0, /* tp_repr */
3148 0, /* tp_as_number */
3149 0, /* tp_as_sequence */
3150 0, /* tp_as_mapping */
3151 0, /* tp_hash */
3152 0, /* tp_call */
3153 0, /* tp_str */
3154 PyObject_GenericGetAttr, /* tp_getattro */
3155 0, /* tp_setattro */
3156 0, /* tp_as_buffer */
3157 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3158 0, /* tp_doc */
3159 (traverseproc)striter_traverse, /* tp_traverse */
3160 0, /* tp_clear */
3161 0, /* tp_richcompare */
3162 0, /* tp_weaklistoffset */
3163 PyObject_SelfIter, /* tp_iter */
3164 (iternextfunc)striter_next, /* tp_iternext */
3165 striter_methods, /* tp_methods */
3166 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003167};
3168
3169static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003170bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003171{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003172 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003173
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003174 if (!PyBytes_Check(seq)) {
3175 PyErr_BadInternalCall();
3176 return NULL;
3177 }
3178 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3179 if (it == NULL)
3180 return NULL;
3181 it->it_index = 0;
3182 Py_INCREF(seq);
3183 it->it_seq = (PyBytesObject *)seq;
3184 _PyObject_GC_TRACK(it);
3185 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003186}