blob: 27f406947208632acf37b30d58609ea9c24cf87f [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Christian Heimes2c9c7a52008-05-26 13:42:13 +000010#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000011Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000012#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000013
Christian Heimes2c9c7a52008-05-26 13:42:13 +000014static PyBytesObject *characters[UCHAR_MAX + 1];
15static PyBytesObject *nullstring;
16
Mark Dickinsonfd24b322008-12-06 15:33:31 +000017/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
18 for a string of length n should request PyBytesObject_SIZE + n bytes.
19
20 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
21 3 bytes per string allocation on a typical system.
22*/
23#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
24
Christian Heimes2c9c7a52008-05-26 13:42:13 +000025/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000026 For PyBytes_FromString(), the parameter `str' points to a null-terminated
27 string containing exactly `size' bytes.
28
29 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
30 either NULL or else points to a string containing at least `size' bytes.
31 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
32 not have to be null-terminated. (Therefore it is safe to construct a
33 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
34 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
35 bytes (setting the last byte to the null terminating character) and you can
36 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000037 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000038 alter the data yourself, since the strings may be shared.
39
40 The PyObject member `op->ob_size', which denotes the number of "extra
41 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020042 allocated for string data, not counting the null terminating character.
43 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044 PyBytes_FromStringAndSize()) or the length of the string in the `str'
45 parameter (for PyBytes_FromString()).
46*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000047PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000048PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000049{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020050 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000051 if (size < 0) {
52 PyErr_SetString(PyExc_SystemError,
53 "Negative size passed to PyBytes_FromStringAndSize");
54 return NULL;
55 }
56 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000057#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000058 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000059#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000060 Py_INCREF(op);
61 return (PyObject *)op;
62 }
63 if (size == 1 && str != NULL &&
64 (op = characters[*str & UCHAR_MAX]) != NULL)
65 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000066#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000068#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 Py_INCREF(op);
70 return (PyObject *)op;
71 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000072
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000073 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
74 PyErr_SetString(PyExc_OverflowError,
75 "byte string is too large");
76 return NULL;
77 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 /* Inline PyObject_NewVar */
80 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
81 if (op == NULL)
82 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010083 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 op->ob_shash = -1;
85 if (str != NULL)
86 Py_MEMCPY(op->ob_sval, str, size);
87 op->ob_sval[size] = '\0';
88 /* share short strings */
89 if (size == 0) {
90 nullstring = op;
91 Py_INCREF(op);
92 } else if (size == 1 && str != NULL) {
93 characters[*str & UCHAR_MAX] = op;
94 Py_INCREF(op);
95 }
96 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +000097}
98
Christian Heimes2c9c7a52008-05-26 13:42:13 +000099PyObject *
100PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000101{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200102 size_t size;
103 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000104
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000105 assert(str != NULL);
106 size = strlen(str);
107 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
108 PyErr_SetString(PyExc_OverflowError,
109 "byte string is too long");
110 return NULL;
111 }
112 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000113#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000114 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000115#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000116 Py_INCREF(op);
117 return (PyObject *)op;
118 }
119 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000120#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000121 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000122#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000123 Py_INCREF(op);
124 return (PyObject *)op;
125 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000127 /* Inline PyObject_NewVar */
128 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
129 if (op == NULL)
130 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100131 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 op->ob_shash = -1;
133 Py_MEMCPY(op->ob_sval, str, size+1);
134 /* share short strings */
135 if (size == 0) {
136 nullstring = op;
137 Py_INCREF(op);
138 } else if (size == 1) {
139 characters[*str & UCHAR_MAX] = op;
140 Py_INCREF(op);
141 }
142 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000143}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000144
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000145PyObject *
146PyBytes_FromFormatV(const char *format, va_list vargs)
147{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000148 va_list count;
149 Py_ssize_t n = 0;
150 const char* f;
151 char *s;
152 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000153
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000154 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000155 /* step 1: figure out how large a buffer we need */
156 for (f = format; *f; f++) {
157 if (*f == '%') {
158 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000159 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000160 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000161
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000162 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
163 * they don't affect the amount of space we reserve.
164 */
165 if ((*f == 'l' || *f == 'z') &&
166 (f[1] == 'd' || f[1] == 'u'))
167 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000168
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000169 switch (*f) {
170 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100171 {
172 int c = va_arg(count, int);
173 if (c < 0 || c > 255) {
174 PyErr_SetString(PyExc_OverflowError,
175 "PyBytes_FromFormatV(): %c format "
176 "expects an integer in range [0; 255]");
177 return NULL;
178 }
179 n++;
180 break;
181 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 case '%':
183 n++;
184 break;
185 case 'd': case 'u': case 'i': case 'x':
186 (void) va_arg(count, int);
187 /* 20 bytes is enough to hold a 64-bit
188 integer. Decimal takes the most space.
189 This isn't enough for octal. */
190 n += 20;
191 break;
192 case 's':
193 s = va_arg(count, char*);
194 n += strlen(s);
195 break;
196 case 'p':
197 (void) va_arg(count, int);
198 /* maximum 64-bit pointer representation:
199 * 0xffffffffffffffff
200 * so 19 characters is enough.
201 * XXX I count 18 -- what's the extra for?
202 */
203 n += 19;
204 break;
205 default:
206 /* if we stumble upon an unknown
207 formatting code, copy the rest of
208 the format string to the output
209 string. (we cannot just skip the
210 code, since there's no way to know
211 what's in the argument list) */
212 n += strlen(p);
213 goto expand;
214 }
215 } else
216 n++;
217 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000218 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000219 /* step 2: fill the buffer */
220 /* Since we've analyzed how much space we need for the worst case,
221 use sprintf directly instead of the slower PyOS_snprintf. */
222 string = PyBytes_FromStringAndSize(NULL, n);
223 if (!string)
224 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000225
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000226 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000227
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 for (f = format; *f; f++) {
229 if (*f == '%') {
230 const char* p = f++;
231 Py_ssize_t i;
232 int longflag = 0;
233 int size_tflag = 0;
234 /* parse the width.precision part (we're only
235 interested in the precision value, if any) */
236 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000237 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000238 n = (n*10) + *f++ - '0';
239 if (*f == '.') {
240 f++;
241 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000242 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000243 n = (n*10) + *f++ - '0';
244 }
David Malcolm96960882010-11-05 17:23:41 +0000245 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000246 f++;
247 /* handle the long flag, but only for %ld and %lu.
248 others can be added when necessary. */
249 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
250 longflag = 1;
251 ++f;
252 }
253 /* handle the size_t flag. */
254 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
255 size_tflag = 1;
256 ++f;
257 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000258
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000259 switch (*f) {
260 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100261 {
262 int c = va_arg(vargs, int);
263 /* c has been checked for overflow in the first step */
264 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000265 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100266 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000267 case 'd':
268 if (longflag)
269 sprintf(s, "%ld", va_arg(vargs, long));
270 else if (size_tflag)
271 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
272 va_arg(vargs, Py_ssize_t));
273 else
274 sprintf(s, "%d", va_arg(vargs, int));
275 s += strlen(s);
276 break;
277 case 'u':
278 if (longflag)
279 sprintf(s, "%lu",
280 va_arg(vargs, unsigned long));
281 else if (size_tflag)
282 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
283 va_arg(vargs, size_t));
284 else
285 sprintf(s, "%u",
286 va_arg(vargs, unsigned int));
287 s += strlen(s);
288 break;
289 case 'i':
290 sprintf(s, "%i", va_arg(vargs, int));
291 s += strlen(s);
292 break;
293 case 'x':
294 sprintf(s, "%x", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 's':
298 p = va_arg(vargs, char*);
299 i = strlen(p);
300 if (n > 0 && i > n)
301 i = n;
302 Py_MEMCPY(s, p, i);
303 s += i;
304 break;
305 case 'p':
306 sprintf(s, "%p", va_arg(vargs, void*));
307 /* %p is ill-defined: ensure leading 0x. */
308 if (s[1] == 'X')
309 s[1] = 'x';
310 else if (s[1] != 'x') {
311 memmove(s+2, s, strlen(s)+1);
312 s[0] = '0';
313 s[1] = 'x';
314 }
315 s += strlen(s);
316 break;
317 case '%':
318 *s++ = '%';
319 break;
320 default:
321 strcpy(s, p);
322 s += strlen(s);
323 goto end;
324 }
325 } else
326 *s++ = *f;
327 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000328
329 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000330 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
331 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000332}
333
334PyObject *
335PyBytes_FromFormat(const char *format, ...)
336{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000337 PyObject* ret;
338 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000339
340#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000341 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000342#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000343 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000344#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000345 ret = PyBytes_FromFormatV(format, vargs);
346 va_end(vargs);
347 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000348}
349
350static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000351bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000352{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000353 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000354}
355
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000356/* Unescape a backslash-escaped string. If unicode is non-zero,
357 the string is a u-literal. If recode_encoding is non-zero,
358 the string is UTF-8 encoded and should be re-encoded in the
359 specified encoding. */
360
361PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000362 Py_ssize_t len,
363 const char *errors,
364 Py_ssize_t unicode,
365 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000366{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 int c;
368 char *p, *buf;
369 const char *end;
370 PyObject *v;
371 Py_ssize_t newlen = recode_encoding ? 4*len:len;
372 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
373 if (v == NULL)
374 return NULL;
375 p = buf = PyBytes_AsString(v);
376 end = s + len;
377 while (s < end) {
378 if (*s != '\\') {
379 non_esc:
380 if (recode_encoding && (*s & 0x80)) {
381 PyObject *u, *w;
382 char *r;
383 const char* t;
384 Py_ssize_t rn;
385 t = s;
386 /* Decode non-ASCII bytes as UTF-8. */
387 while (t < end && (*t & 0x80)) t++;
388 u = PyUnicode_DecodeUTF8(s, t - s, errors);
389 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000390
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000391 /* Recode them in target encoding. */
392 w = PyUnicode_AsEncodedString(
393 u, recode_encoding, errors);
394 Py_DECREF(u);
395 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000396
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000397 /* Append bytes to output buffer. */
398 assert(PyBytes_Check(w));
399 r = PyBytes_AS_STRING(w);
400 rn = PyBytes_GET_SIZE(w);
401 Py_MEMCPY(p, r, rn);
402 p += rn;
403 Py_DECREF(w);
404 s = t;
405 } else {
406 *p++ = *s++;
407 }
408 continue;
409 }
410 s++;
411 if (s==end) {
412 PyErr_SetString(PyExc_ValueError,
413 "Trailing \\ in string");
414 goto failed;
415 }
416 switch (*s++) {
417 /* XXX This assumes ASCII! */
418 case '\n': break;
419 case '\\': *p++ = '\\'; break;
420 case '\'': *p++ = '\''; break;
421 case '\"': *p++ = '\"'; break;
422 case 'b': *p++ = '\b'; break;
423 case 'f': *p++ = '\014'; break; /* FF */
424 case 't': *p++ = '\t'; break;
425 case 'n': *p++ = '\n'; break;
426 case 'r': *p++ = '\r'; break;
427 case 'v': *p++ = '\013'; break; /* VT */
428 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
429 case '0': case '1': case '2': case '3':
430 case '4': case '5': case '6': case '7':
431 c = s[-1] - '0';
432 if (s < end && '0' <= *s && *s <= '7') {
433 c = (c<<3) + *s++ - '0';
434 if (s < end && '0' <= *s && *s <= '7')
435 c = (c<<3) + *s++ - '0';
436 }
437 *p++ = c;
438 break;
439 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000440 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000441 unsigned int x = 0;
442 c = Py_CHARMASK(*s);
443 s++;
David Malcolm96960882010-11-05 17:23:41 +0000444 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000445 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000446 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000447 x = 10 + c - 'a';
448 else
449 x = 10 + c - 'A';
450 x = x << 4;
451 c = Py_CHARMASK(*s);
452 s++;
David Malcolm96960882010-11-05 17:23:41 +0000453 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000454 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000455 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000456 x += 10 + c - 'a';
457 else
458 x += 10 + c - 'A';
459 *p++ = x;
460 break;
461 }
462 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200463 PyErr_Format(PyExc_ValueError,
464 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +0200465 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000466 goto failed;
467 }
468 if (strcmp(errors, "replace") == 0) {
469 *p++ = '?';
470 } else if (strcmp(errors, "ignore") == 0)
471 /* do nothing */;
472 else {
473 PyErr_Format(PyExc_ValueError,
474 "decoding error; unknown "
475 "error handling code: %.400s",
476 errors);
477 goto failed;
478 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +0200479 /* skip \x */
480 if (s < end && Py_ISXDIGIT(s[0]))
481 s++; /* and a hexdigit */
482 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000483 default:
484 *p++ = '\\';
485 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200486 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000487 UTF-8 bytes may follow. */
488 }
489 }
490 if (p-buf < newlen)
491 _PyBytes_Resize(&v, p - buf);
492 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000493 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000494 Py_DECREF(v);
495 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000496}
497
498/* -------------------------------------------------------------------- */
499/* object api */
500
501Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200502PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000503{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000504 if (!PyBytes_Check(op)) {
505 PyErr_Format(PyExc_TypeError,
506 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
507 return -1;
508 }
509 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000510}
511
512char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200513PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000514{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000515 if (!PyBytes_Check(op)) {
516 PyErr_Format(PyExc_TypeError,
517 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
518 return NULL;
519 }
520 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000521}
522
523int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200524PyBytes_AsStringAndSize(PyObject *obj,
525 char **s,
526 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000527{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000528 if (s == NULL) {
529 PyErr_BadInternalCall();
530 return -1;
531 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000532
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000533 if (!PyBytes_Check(obj)) {
534 PyErr_Format(PyExc_TypeError,
535 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
536 return -1;
537 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000538
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 *s = PyBytes_AS_STRING(obj);
540 if (len != NULL)
541 *len = PyBytes_GET_SIZE(obj);
542 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
543 PyErr_SetString(PyExc_TypeError,
544 "expected bytes with no null");
545 return -1;
546 }
547 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000548}
Neal Norwitz6968b052007-02-27 19:02:19 +0000549
550/* -------------------------------------------------------------------- */
551/* Methods */
552
Eric Smith0923d1d2009-04-16 20:16:10 +0000553#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000554
555#include "stringlib/fastsearch.h"
556#include "stringlib/count.h"
557#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +0200558#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000559#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000560#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000561#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000562
Eric Smith0f78bff2009-11-30 01:01:42 +0000563#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000564
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000565PyObject *
566PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000567{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200568 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200569 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -0400570 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000571 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200572 unsigned char quote, *s, *p;
573
574 /* Compute size of output string */
575 squotes = dquotes = 0;
576 newsize = 3; /* b'' */
577 s = (unsigned char*)op->ob_sval;
578 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400579 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200580 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400581 case '\'': squotes++; break;
582 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200583 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400584 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200585 default:
586 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400587 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200588 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400589 if (newsize > PY_SSIZE_T_MAX - incr)
590 goto overflow;
591 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200592 }
593 quote = '\'';
594 if (smartquotes && squotes && !dquotes)
595 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400596 if (squotes && quote == '\'') {
597 if (newsize > PY_SSIZE_T_MAX - squotes)
598 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200599 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000600 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200601
602 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000603 if (v == NULL) {
604 return NULL;
605 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200606 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000607
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200608 *p++ = 'b', *p++ = quote;
609 for (i = 0; i < length; i++) {
610 unsigned char c = op->ob_sval[i];
611 if (c == quote || c == '\\')
612 *p++ = '\\', *p++ = c;
613 else if (c == '\t')
614 *p++ = '\\', *p++ = 't';
615 else if (c == '\n')
616 *p++ = '\\', *p++ = 'n';
617 else if (c == '\r')
618 *p++ = '\\', *p++ = 'r';
619 else if (c < ' ' || c >= 0x7f) {
620 *p++ = '\\';
621 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200622 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
623 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000624 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200625 else
626 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000627 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200628 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +0200629 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200630 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400631
632 overflow:
633 PyErr_SetString(PyExc_OverflowError,
634 "bytes object is too large to make repr");
635 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000636}
637
Neal Norwitz6968b052007-02-27 19:02:19 +0000638static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000639bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000640{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000641 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000642}
643
Neal Norwitz6968b052007-02-27 19:02:19 +0000644static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000645bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000646{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000647 if (Py_BytesWarningFlag) {
648 if (PyErr_WarnEx(PyExc_BytesWarning,
649 "str() on a bytes instance", 1))
650 return NULL;
651 }
652 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000653}
654
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000655static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000656bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000657{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000658 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000659}
Neal Norwitz6968b052007-02-27 19:02:19 +0000660
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000661/* This is also used by PyBytes_Concat() */
662static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000663bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000664{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000665 Py_ssize_t size;
666 Py_buffer va, vb;
667 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000668
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000669 va.len = -1;
670 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200671 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
672 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000673 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
674 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
675 goto done;
676 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000677
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000678 /* Optimize end cases */
679 if (va.len == 0 && PyBytes_CheckExact(b)) {
680 result = b;
681 Py_INCREF(result);
682 goto done;
683 }
684 if (vb.len == 0 && PyBytes_CheckExact(a)) {
685 result = a;
686 Py_INCREF(result);
687 goto done;
688 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000689
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000690 size = va.len + vb.len;
691 if (size < 0) {
692 PyErr_NoMemory();
693 goto done;
694 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000695
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000696 result = PyBytes_FromStringAndSize(NULL, size);
697 if (result != NULL) {
698 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
699 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
700 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000701
702 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000703 if (va.len != -1)
704 PyBuffer_Release(&va);
705 if (vb.len != -1)
706 PyBuffer_Release(&vb);
707 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000708}
Neal Norwitz6968b052007-02-27 19:02:19 +0000709
710static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200711bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000712{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200713 Py_ssize_t i;
714 Py_ssize_t j;
715 Py_ssize_t size;
716 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000717 size_t nbytes;
718 if (n < 0)
719 n = 0;
720 /* watch out for overflows: the size can overflow int,
721 * and the # of bytes needed can overflow size_t
722 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000723 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000724 PyErr_SetString(PyExc_OverflowError,
725 "repeated bytes are too long");
726 return NULL;
727 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000728 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000729 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
730 Py_INCREF(a);
731 return (PyObject *)a;
732 }
733 nbytes = (size_t)size;
734 if (nbytes + PyBytesObject_SIZE <= nbytes) {
735 PyErr_SetString(PyExc_OverflowError,
736 "repeated bytes are too long");
737 return NULL;
738 }
739 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
740 if (op == NULL)
741 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100742 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000743 op->ob_shash = -1;
744 op->ob_sval[size] = '\0';
745 if (Py_SIZE(a) == 1 && n > 0) {
746 memset(op->ob_sval, a->ob_sval[0] , n);
747 return (PyObject *) op;
748 }
749 i = 0;
750 if (i < size) {
751 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
752 i = Py_SIZE(a);
753 }
754 while (i < size) {
755 j = (i <= size-i) ? i : size-i;
756 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
757 i += j;
758 }
759 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000760}
761
Guido van Rossum98297ee2007-11-06 21:34:58 +0000762static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000763bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000764{
765 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
766 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000767 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000768 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000769 PyErr_Clear();
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +0200770 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
Antoine Pitroud1188562010-06-09 16:38:55 +0000771 return -1;
772 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
773 varg.buf, varg.len, 0);
774 PyBuffer_Release(&varg);
775 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000776 }
777 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000778 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
779 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000780 }
781
Antoine Pitrou0010d372010-08-15 17:12:55 +0000782 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000783}
784
Neal Norwitz6968b052007-02-27 19:02:19 +0000785static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200786bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000787{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000788 if (i < 0 || i >= Py_SIZE(a)) {
789 PyErr_SetString(PyExc_IndexError, "index out of range");
790 return NULL;
791 }
792 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000793}
794
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100795Py_LOCAL(int)
796bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
797{
798 int cmp;
799 Py_ssize_t len;
800
801 len = Py_SIZE(a);
802 if (Py_SIZE(b) != len)
803 return 0;
804
805 if (a->ob_sval[0] != b->ob_sval[0])
806 return 0;
807
808 cmp = memcmp(a->ob_sval, b->ob_sval, len);
809 return (cmp == 0);
810}
811
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000812static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000813bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000814{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000815 int c;
816 Py_ssize_t len_a, len_b;
817 Py_ssize_t min_len;
818 PyObject *result;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +0300819 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +0000820
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000821 /* Make sure both arguments are strings. */
822 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +0300823 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
824 rc = PyObject_IsInstance((PyObject*)a,
825 (PyObject*)&PyUnicode_Type);
826 if (!rc)
827 rc = PyObject_IsInstance((PyObject*)b,
828 (PyObject*)&PyUnicode_Type);
829 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000830 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +0300831 if (rc) {
832 if (PyErr_WarnEx(PyExc_BytesWarning,
833 "Comparison between bytes and string", 1))
834 return NULL;
835 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000836 }
837 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000838 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100839 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000840 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100841 case Py_EQ:
842 case Py_LE:
843 case Py_GE:
844 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000845 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100846 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100847 case Py_NE:
848 case Py_LT:
849 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000850 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100851 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100852 default:
853 PyErr_BadArgument();
854 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000855 }
856 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100857 else if (op == Py_EQ || op == Py_NE) {
858 int eq = bytes_compare_eq(a, b);
859 eq ^= (op == Py_NE);
860 result = eq ? Py_True : Py_False;
861 }
862 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100863 len_a = Py_SIZE(a);
864 len_b = Py_SIZE(b);
865 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100866 if (min_len > 0) {
867 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100868 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100869 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000870 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100871 else
872 c = 0;
873 if (c == 0)
874 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
875 switch (op) {
876 case Py_LT: c = c < 0; break;
877 case Py_LE: c = c <= 0; break;
878 case Py_GT: c = c > 0; break;
879 case Py_GE: c = c >= 0; break;
880 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100881 PyErr_BadArgument();
882 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100883 }
884 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000885 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100886
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000887 Py_INCREF(result);
888 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000889}
890
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000891static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000892bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000893{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100894 if (a->ob_shash == -1) {
895 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +0100896 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100897 }
898 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +0000899}
900
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000901static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000902bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000903{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000904 if (PyIndex_Check(item)) {
905 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
906 if (i == -1 && PyErr_Occurred())
907 return NULL;
908 if (i < 0)
909 i += PyBytes_GET_SIZE(self);
910 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
911 PyErr_SetString(PyExc_IndexError,
912 "index out of range");
913 return NULL;
914 }
915 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
916 }
917 else if (PySlice_Check(item)) {
918 Py_ssize_t start, stop, step, slicelength, cur, i;
919 char* source_buf;
920 char* result_buf;
921 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000922
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000923 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000924 PyBytes_GET_SIZE(self),
925 &start, &stop, &step, &slicelength) < 0) {
926 return NULL;
927 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000928
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000929 if (slicelength <= 0) {
930 return PyBytes_FromStringAndSize("", 0);
931 }
932 else if (start == 0 && step == 1 &&
933 slicelength == PyBytes_GET_SIZE(self) &&
934 PyBytes_CheckExact(self)) {
935 Py_INCREF(self);
936 return (PyObject *)self;
937 }
938 else if (step == 1) {
939 return PyBytes_FromStringAndSize(
940 PyBytes_AS_STRING(self) + start,
941 slicelength);
942 }
943 else {
944 source_buf = PyBytes_AS_STRING(self);
945 result = PyBytes_FromStringAndSize(NULL, slicelength);
946 if (result == NULL)
947 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000948
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000949 result_buf = PyBytes_AS_STRING(result);
950 for (cur = start, i = 0; i < slicelength;
951 cur += step, i++) {
952 result_buf[i] = source_buf[cur];
953 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000954
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000955 return result;
956 }
957 }
958 else {
959 PyErr_Format(PyExc_TypeError,
960 "byte indices must be integers, not %.200s",
961 Py_TYPE(item)->tp_name);
962 return NULL;
963 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000964}
965
966static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000967bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000968{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000969 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
970 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000971}
972
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000973static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000974 (lenfunc)bytes_length, /*sq_length*/
975 (binaryfunc)bytes_concat, /*sq_concat*/
976 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
977 (ssizeargfunc)bytes_item, /*sq_item*/
978 0, /*sq_slice*/
979 0, /*sq_ass_item*/
980 0, /*sq_ass_slice*/
981 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000982};
983
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000984static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000985 (lenfunc)bytes_length,
986 (binaryfunc)bytes_subscript,
987 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000988};
989
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000990static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000991 (getbufferproc)bytes_buffer_getbuffer,
992 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000993};
994
995
996#define LEFTSTRIP 0
997#define RIGHTSTRIP 1
998#define BOTHSTRIP 2
999
1000/* Arrays indexed by above */
1001static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1002
1003#define STRIPNAME(i) (stripformat[i]+3)
1004
Neal Norwitz6968b052007-02-27 19:02:19 +00001005PyDoc_STRVAR(split__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001006"B.split(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001007\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001008Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001009If sep is not specified or is None, B is split on ASCII whitespace\n\
1010characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001011If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001012
1013static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001014bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +00001015{
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001016 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001017 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1018 Py_ssize_t maxsplit = -1;
1019 const char *s = PyBytes_AS_STRING(self), *sub;
1020 Py_buffer vsub;
1021 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001022
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001023 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split",
1024 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001025 return NULL;
1026 if (maxsplit < 0)
1027 maxsplit = PY_SSIZE_T_MAX;
1028 if (subobj == Py_None)
1029 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001030 if (PyObject_GetBuffer(subobj, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001031 return NULL;
1032 sub = vsub.buf;
1033 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001034
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001035 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1036 PyBuffer_Release(&vsub);
1037 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001038}
1039
Neal Norwitz6968b052007-02-27 19:02:19 +00001040PyDoc_STRVAR(partition__doc__,
1041"B.partition(sep) -> (head, sep, tail)\n\
1042\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001043Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001044the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001045found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001046
1047static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001048bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001049{
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001050 Py_buffer sep = {NULL, NULL};
1051 PyObject *res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001052
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001053 if (PyObject_GetBuffer(sep_obj, &sep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001054 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001055
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001056 res = stringlib_partition(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001057 (PyObject*) self,
1058 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001059 sep_obj, sep.buf, sep.len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001060 );
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001061 PyBuffer_Release(&sep);
1062 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001063}
1064
1065PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001066"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001067\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001068Search for the separator sep in B, starting at the end of B,\n\
1069and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001070part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001071bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001072
1073static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001074bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001075{
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001076 Py_buffer sep = {NULL, NULL};
1077 PyObject *res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001078
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001079 if (PyObject_GetBuffer(sep_obj, &sep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001080 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001081
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001082 res = stringlib_rpartition(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001083 (PyObject*) self,
1084 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001085 sep_obj, sep.buf, sep.len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 );
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001087 PyBuffer_Release(&sep);
1088 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001089}
1090
Neal Norwitz6968b052007-02-27 19:02:19 +00001091PyDoc_STRVAR(rsplit__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001092"B.rsplit(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001093\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001094Return a list of the sections in B, using sep as the delimiter,\n\
1095starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001096If sep is not given, B is split on ASCII whitespace characters\n\
1097(space, tab, return, newline, formfeed, vertical tab).\n\
1098If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001099
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001100
Neal Norwitz6968b052007-02-27 19:02:19 +00001101static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001102bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +00001103{
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001104 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001105 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1106 Py_ssize_t maxsplit = -1;
1107 const char *s = PyBytes_AS_STRING(self), *sub;
1108 Py_buffer vsub;
1109 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001110
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001111 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit",
1112 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001113 return NULL;
1114 if (maxsplit < 0)
1115 maxsplit = PY_SSIZE_T_MAX;
1116 if (subobj == Py_None)
1117 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001118 if (PyObject_GetBuffer(subobj, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001119 return NULL;
1120 sub = vsub.buf;
1121 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001122
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001123 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1124 PyBuffer_Release(&vsub);
1125 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001126}
1127
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001128
1129PyDoc_STRVAR(join__doc__,
1130"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001131\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001132Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001133Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1134
Neal Norwitz6968b052007-02-27 19:02:19 +00001135static PyObject *
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001136bytes_join(PyObject *self, PyObject *iterable)
Neal Norwitz6968b052007-02-27 19:02:19 +00001137{
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001138 return stringlib_bytes_join(self, iterable);
Neal Norwitz6968b052007-02-27 19:02:19 +00001139}
1140
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001141PyObject *
1142_PyBytes_Join(PyObject *sep, PyObject *x)
1143{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001144 assert(sep != NULL && PyBytes_Check(sep));
1145 assert(x != NULL);
1146 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001147}
1148
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001149/* helper macro to fixup start/end slice values */
1150#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001151 if (end > len) \
1152 end = len; \
1153 else if (end < 0) { \
1154 end += len; \
1155 if (end < 0) \
1156 end = 0; \
1157 } \
1158 if (start < 0) { \
1159 start += len; \
1160 if (start < 0) \
1161 start = 0; \
1162 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001163
1164Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001165bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001166{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001167 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001168 char byte;
1169 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001170 const char *sub;
1171 Py_ssize_t sub_len;
1172 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001173 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001174
Antoine Pitrouac65d962011-10-20 23:54:17 +02001175 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1176 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001177 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001178
Antoine Pitrouac65d962011-10-20 23:54:17 +02001179 if (subobj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001180 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001181 return -2;
1182
1183 sub = subbuf.buf;
1184 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001185 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001186 else {
1187 sub = &byte;
1188 sub_len = 1;
1189 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001191 if (dir > 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001192 res = stringlib_find_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001193 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1194 sub, sub_len, start, end);
1195 else
Antoine Pitrouac65d962011-10-20 23:54:17 +02001196 res = stringlib_rfind_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001197 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1198 sub, sub_len, start, end);
Antoine Pitrouac65d962011-10-20 23:54:17 +02001199
1200 if (subobj)
1201 PyBuffer_Release(&subbuf);
1202
1203 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001204}
1205
1206
1207PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001208"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001209\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001210Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001211such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001212arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001213\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001214Return -1 on failure.");
1215
Neal Norwitz6968b052007-02-27 19:02:19 +00001216static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001217bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001218{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001219 Py_ssize_t result = bytes_find_internal(self, args, +1);
1220 if (result == -2)
1221 return NULL;
1222 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001223}
1224
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001225
1226PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001227"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001228\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001229Like B.find() but raise ValueError when the substring is not found.");
1230
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001231static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001232bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001233{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001234 Py_ssize_t result = bytes_find_internal(self, args, +1);
1235 if (result == -2)
1236 return NULL;
1237 if (result == -1) {
1238 PyErr_SetString(PyExc_ValueError,
1239 "substring not found");
1240 return NULL;
1241 }
1242 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001243}
1244
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001245
1246PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001247"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001248\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001249Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001250such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001251arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001252\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001253Return -1 on failure.");
1254
Neal Norwitz6968b052007-02-27 19:02:19 +00001255static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001256bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001257{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001258 Py_ssize_t result = bytes_find_internal(self, args, -1);
1259 if (result == -2)
1260 return NULL;
1261 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001262}
1263
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001264
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001265PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001266"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001267\n\
1268Like B.rfind() but raise ValueError when the substring is not found.");
1269
1270static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001271bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001272{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001273 Py_ssize_t result = bytes_find_internal(self, args, -1);
1274 if (result == -2)
1275 return NULL;
1276 if (result == -1) {
1277 PyErr_SetString(PyExc_ValueError,
1278 "substring not found");
1279 return NULL;
1280 }
1281 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001282}
1283
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001284
1285Py_LOCAL_INLINE(PyObject *)
1286do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001287{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001288 Py_buffer vsep;
1289 char *s = PyBytes_AS_STRING(self);
1290 Py_ssize_t len = PyBytes_GET_SIZE(self);
1291 char *sep;
1292 Py_ssize_t seplen;
1293 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001294
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001295 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001296 return NULL;
1297 sep = vsep.buf;
1298 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001299
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001300 i = 0;
1301 if (striptype != RIGHTSTRIP) {
1302 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1303 i++;
1304 }
1305 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001306
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001307 j = len;
1308 if (striptype != LEFTSTRIP) {
1309 do {
1310 j--;
1311 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1312 j++;
1313 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001314
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001315 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001316
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001317 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1318 Py_INCREF(self);
1319 return (PyObject*)self;
1320 }
1321 else
1322 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001323}
1324
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001325
1326Py_LOCAL_INLINE(PyObject *)
1327do_strip(PyBytesObject *self, int striptype)
1328{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001329 char *s = PyBytes_AS_STRING(self);
1330 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001331
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001332 i = 0;
1333 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001334 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001335 i++;
1336 }
1337 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001338
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001339 j = len;
1340 if (striptype != LEFTSTRIP) {
1341 do {
1342 j--;
David Malcolm96960882010-11-05 17:23:41 +00001343 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001344 j++;
1345 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001346
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001347 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1348 Py_INCREF(self);
1349 return (PyObject*)self;
1350 }
1351 else
1352 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001353}
1354
1355
1356Py_LOCAL_INLINE(PyObject *)
1357do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1358{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001359 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001360
Serhiy Storchakac6792272013-10-19 21:03:34 +03001361 if (!PyArg_ParseTuple(args, stripformat[striptype], &sep))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001362 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001363
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001364 if (sep != NULL && sep != Py_None) {
1365 return do_xstrip(self, striptype, sep);
1366 }
1367 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001368}
1369
1370
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001371PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001372"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001373\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001374Strip leading and trailing bytes contained in the argument.\n\
Georg Brandlbeca27a2012-01-22 21:31:21 +01001375If the argument is omitted, strip leading and trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001376static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001377bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001378{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001379 if (PyTuple_GET_SIZE(args) == 0)
1380 return do_strip(self, BOTHSTRIP); /* Common case */
1381 else
1382 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001383}
1384
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001385
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001386PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001387"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001388\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001389Strip leading bytes contained in the argument.\n\
1390If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001391static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001392bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001393{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 if (PyTuple_GET_SIZE(args) == 0)
1395 return do_strip(self, LEFTSTRIP); /* Common case */
1396 else
1397 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001398}
1399
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001400
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001401PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001402"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001403\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001404Strip trailing bytes contained in the argument.\n\
1405If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001406static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001407bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001408{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001409 if (PyTuple_GET_SIZE(args) == 0)
1410 return do_strip(self, RIGHTSTRIP); /* Common case */
1411 else
1412 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001413}
Neal Norwitz6968b052007-02-27 19:02:19 +00001414
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001415
1416PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001417"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001418\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001419Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001420string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001421as in slice notation.");
1422
1423static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001424bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001425{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001426 PyObject *sub_obj;
1427 const char *str = PyBytes_AS_STRING(self), *sub;
1428 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001429 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001431
Antoine Pitrouac65d962011-10-20 23:54:17 +02001432 Py_buffer vsub;
1433 PyObject *count_obj;
1434
1435 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
1436 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001437 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001438
Antoine Pitrouac65d962011-10-20 23:54:17 +02001439 if (sub_obj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001440 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001441 return NULL;
1442
1443 sub = vsub.buf;
1444 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001445 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001446 else {
1447 sub = &byte;
1448 sub_len = 1;
1449 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001450
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001451 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001452
Antoine Pitrouac65d962011-10-20 23:54:17 +02001453 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001454 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1455 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02001456
1457 if (sub_obj)
1458 PyBuffer_Release(&vsub);
1459
1460 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001461}
1462
1463
1464PyDoc_STRVAR(translate__doc__,
1465"B.translate(table[, deletechars]) -> bytes\n\
1466\n\
1467Return a copy of B, where all characters occurring in the\n\
1468optional argument deletechars are removed, and the remaining\n\
1469characters have been mapped through the given translation\n\
1470table, which must be a bytes object of length 256.");
1471
1472static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001473bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001474{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001475 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001476 Py_buffer table_view = {NULL, NULL};
1477 Py_buffer del_table_view = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001478 const char *table;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001479 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001480 PyObject *input_obj = (PyObject*)self;
1481 const char *output_start, *del_table=NULL;
1482 Py_ssize_t inlen, tablen, dellen = 0;
1483 PyObject *result;
1484 int trans_table[256];
1485 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001486
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001487 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1488 &tableobj, &delobj))
1489 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001490
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001491 if (PyBytes_Check(tableobj)) {
1492 table = PyBytes_AS_STRING(tableobj);
1493 tablen = PyBytes_GET_SIZE(tableobj);
1494 }
1495 else if (tableobj == Py_None) {
1496 table = NULL;
1497 tablen = 256;
1498 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001499 else {
1500 if (PyObject_GetBuffer(tableobj, &table_view, PyBUF_SIMPLE) != 0)
1501 return NULL;
1502 table = table_view.buf;
1503 tablen = table_view.len;
1504 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001505
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001506 if (tablen != 256) {
1507 PyErr_SetString(PyExc_ValueError,
1508 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001509 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001510 return NULL;
1511 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001512
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001513 if (delobj != NULL) {
1514 if (PyBytes_Check(delobj)) {
1515 del_table = PyBytes_AS_STRING(delobj);
1516 dellen = PyBytes_GET_SIZE(delobj);
1517 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001518 else {
1519 if (PyObject_GetBuffer(delobj, &del_table_view, PyBUF_SIMPLE) != 0) {
1520 PyBuffer_Release(&table_view);
1521 return NULL;
1522 }
1523 del_table = del_table_view.buf;
1524 dellen = del_table_view.len;
1525 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001526 }
1527 else {
1528 del_table = NULL;
1529 dellen = 0;
1530 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001531
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001532 inlen = PyBytes_GET_SIZE(input_obj);
1533 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001534 if (result == NULL) {
1535 PyBuffer_Release(&del_table_view);
1536 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001537 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001538 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001539 output_start = output = PyBytes_AsString(result);
1540 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001541
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001542 if (dellen == 0 && table != NULL) {
1543 /* If no deletions are required, use faster code */
1544 for (i = inlen; --i >= 0; ) {
1545 c = Py_CHARMASK(*input++);
1546 if (Py_CHARMASK((*output++ = table[c])) != c)
1547 changed = 1;
1548 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001549 if (!changed && PyBytes_CheckExact(input_obj)) {
1550 Py_INCREF(input_obj);
1551 Py_DECREF(result);
1552 result = input_obj;
1553 }
1554 PyBuffer_Release(&del_table_view);
1555 PyBuffer_Release(&table_view);
1556 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001557 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001558
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001559 if (table == NULL) {
1560 for (i = 0; i < 256; i++)
1561 trans_table[i] = Py_CHARMASK(i);
1562 } else {
1563 for (i = 0; i < 256; i++)
1564 trans_table[i] = Py_CHARMASK(table[i]);
1565 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001566 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001567
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001568 for (i = 0; i < dellen; i++)
1569 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001570 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001571
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001572 for (i = inlen; --i >= 0; ) {
1573 c = Py_CHARMASK(*input++);
1574 if (trans_table[c] != -1)
1575 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1576 continue;
1577 changed = 1;
1578 }
1579 if (!changed && PyBytes_CheckExact(input_obj)) {
1580 Py_DECREF(result);
1581 Py_INCREF(input_obj);
1582 return input_obj;
1583 }
1584 /* Fix the size of the resulting string */
1585 if (inlen > 0)
1586 _PyBytes_Resize(&result, output - output_start);
1587 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001588}
1589
1590
Georg Brandlabc38772009-04-12 15:51:51 +00001591static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001592bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001593{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001594 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001595}
1596
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001597/* find and count characters and substrings */
1598
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001599#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001600 ((char *)memchr((const void *)(target), c, target_len))
1601
1602/* String ops must return a string. */
1603/* If the object is subclass of string, create a copy */
1604Py_LOCAL(PyBytesObject *)
1605return_self(PyBytesObject *self)
1606{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001607 if (PyBytes_CheckExact(self)) {
1608 Py_INCREF(self);
1609 return self;
1610 }
1611 return (PyBytesObject *)PyBytes_FromStringAndSize(
1612 PyBytes_AS_STRING(self),
1613 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001614}
1615
1616Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001617countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001618{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001619 Py_ssize_t count=0;
1620 const char *start=target;
1621 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001622
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001623 while ( (start=findchar(start, end-start, c)) != NULL ) {
1624 count++;
1625 if (count >= maxcount)
1626 break;
1627 start += 1;
1628 }
1629 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001630}
1631
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001632
1633/* Algorithms for different cases of string replacement */
1634
1635/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1636Py_LOCAL(PyBytesObject *)
1637replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001638 const char *to_s, Py_ssize_t to_len,
1639 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001640{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001641 char *self_s, *result_s;
1642 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001643 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001644 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001645
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001646 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001647
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001648 /* 1 at the end plus 1 after every character;
1649 count = min(maxcount, self_len + 1) */
1650 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001651 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001652 else
1653 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1654 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001655
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001656 /* Check for overflow */
1657 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001658 assert(count > 0);
1659 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001660 PyErr_SetString(PyExc_OverflowError,
1661 "replacement bytes are too long");
1662 return NULL;
1663 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001664 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001665
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001666 if (! (result = (PyBytesObject *)
1667 PyBytes_FromStringAndSize(NULL, result_len)) )
1668 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001669
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001670 self_s = PyBytes_AS_STRING(self);
1671 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001672
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001673 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001674
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001675 /* Lay the first one down (guaranteed this will occur) */
1676 Py_MEMCPY(result_s, to_s, to_len);
1677 result_s += to_len;
1678 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001679
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001680 for (i=0; i<count; i++) {
1681 *result_s++ = *self_s++;
1682 Py_MEMCPY(result_s, to_s, to_len);
1683 result_s += to_len;
1684 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001685
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001686 /* Copy the rest of the original string */
1687 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001688
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001689 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001690}
1691
1692/* Special case for deleting a single character */
1693/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1694Py_LOCAL(PyBytesObject *)
1695replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001696 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001697{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001698 char *self_s, *result_s;
1699 char *start, *next, *end;
1700 Py_ssize_t self_len, result_len;
1701 Py_ssize_t count;
1702 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001703
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001704 self_len = PyBytes_GET_SIZE(self);
1705 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001706
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001707 count = countchar(self_s, self_len, from_c, maxcount);
1708 if (count == 0) {
1709 return return_self(self);
1710 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001711
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001712 result_len = self_len - count; /* from_len == 1 */
1713 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001714
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001715 if ( (result = (PyBytesObject *)
1716 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1717 return NULL;
1718 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001719
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001720 start = self_s;
1721 end = self_s + self_len;
1722 while (count-- > 0) {
1723 next = findchar(start, end-start, from_c);
1724 if (next == NULL)
1725 break;
1726 Py_MEMCPY(result_s, start, next-start);
1727 result_s += (next-start);
1728 start = next+1;
1729 }
1730 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001731
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001732 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001733}
1734
1735/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1736
1737Py_LOCAL(PyBytesObject *)
1738replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001739 const char *from_s, Py_ssize_t from_len,
1740 Py_ssize_t maxcount) {
1741 char *self_s, *result_s;
1742 char *start, *next, *end;
1743 Py_ssize_t self_len, result_len;
1744 Py_ssize_t count, offset;
1745 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001746
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001747 self_len = PyBytes_GET_SIZE(self);
1748 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001749
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001750 count = stringlib_count(self_s, self_len,
1751 from_s, from_len,
1752 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001753
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001754 if (count == 0) {
1755 /* no matches */
1756 return return_self(self);
1757 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001758
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001759 result_len = self_len - (count * from_len);
1760 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001761
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001762 if ( (result = (PyBytesObject *)
1763 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1764 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001765
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001766 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001767
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001768 start = self_s;
1769 end = self_s + self_len;
1770 while (count-- > 0) {
1771 offset = stringlib_find(start, end-start,
1772 from_s, from_len,
1773 0);
1774 if (offset == -1)
1775 break;
1776 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001777
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001778 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001779
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001780 result_s += (next-start);
1781 start = next+from_len;
1782 }
1783 Py_MEMCPY(result_s, start, end-start);
1784 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001785}
1786
1787/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1788Py_LOCAL(PyBytesObject *)
1789replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001790 char from_c, char to_c,
1791 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001792{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001793 char *self_s, *result_s, *start, *end, *next;
1794 Py_ssize_t self_len;
1795 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001796
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001797 /* The result string will be the same size */
1798 self_s = PyBytes_AS_STRING(self);
1799 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001800
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001801 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001802
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001803 if (next == NULL) {
1804 /* No matches; return the original string */
1805 return return_self(self);
1806 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001807
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001808 /* Need to make a new string */
1809 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1810 if (result == NULL)
1811 return NULL;
1812 result_s = PyBytes_AS_STRING(result);
1813 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001814
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001815 /* change everything in-place, starting with this one */
1816 start = result_s + (next-self_s);
1817 *start = to_c;
1818 start++;
1819 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001820
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001821 while (--maxcount > 0) {
1822 next = findchar(start, end-start, from_c);
1823 if (next == NULL)
1824 break;
1825 *next = to_c;
1826 start = next+1;
1827 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001828
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001829 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001830}
1831
1832/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1833Py_LOCAL(PyBytesObject *)
1834replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001835 const char *from_s, Py_ssize_t from_len,
1836 const char *to_s, Py_ssize_t to_len,
1837 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001838{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001839 char *result_s, *start, *end;
1840 char *self_s;
1841 Py_ssize_t self_len, offset;
1842 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001843
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001844 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001845
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001846 self_s = PyBytes_AS_STRING(self);
1847 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001848
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001849 offset = stringlib_find(self_s, self_len,
1850 from_s, from_len,
1851 0);
1852 if (offset == -1) {
1853 /* No matches; return the original string */
1854 return return_self(self);
1855 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001856
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001857 /* Need to make a new string */
1858 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1859 if (result == NULL)
1860 return NULL;
1861 result_s = PyBytes_AS_STRING(result);
1862 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001863
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001864 /* change everything in-place, starting with this one */
1865 start = result_s + offset;
1866 Py_MEMCPY(start, to_s, from_len);
1867 start += from_len;
1868 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001869
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001870 while ( --maxcount > 0) {
1871 offset = stringlib_find(start, end-start,
1872 from_s, from_len,
1873 0);
1874 if (offset==-1)
1875 break;
1876 Py_MEMCPY(start+offset, to_s, from_len);
1877 start += offset+from_len;
1878 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001879
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001880 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001881}
1882
1883/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1884Py_LOCAL(PyBytesObject *)
1885replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001886 char from_c,
1887 const char *to_s, Py_ssize_t to_len,
1888 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001889{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001890 char *self_s, *result_s;
1891 char *start, *next, *end;
1892 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001893 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001894 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001895
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001896 self_s = PyBytes_AS_STRING(self);
1897 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001898
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001899 count = countchar(self_s, self_len, from_c, maxcount);
1900 if (count == 0) {
1901 /* no matches, return unchanged */
1902 return return_self(self);
1903 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001904
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001905 /* use the difference between current and new, hence the "-1" */
1906 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001907 assert(count > 0);
1908 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001909 PyErr_SetString(PyExc_OverflowError,
1910 "replacement bytes are too long");
1911 return NULL;
1912 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001913 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001914
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001915 if ( (result = (PyBytesObject *)
1916 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1917 return NULL;
1918 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001919
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001920 start = self_s;
1921 end = self_s + self_len;
1922 while (count-- > 0) {
1923 next = findchar(start, end-start, from_c);
1924 if (next == NULL)
1925 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001926
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001927 if (next == start) {
1928 /* replace with the 'to' */
1929 Py_MEMCPY(result_s, to_s, to_len);
1930 result_s += to_len;
1931 start += 1;
1932 } else {
1933 /* copy the unchanged old then the 'to' */
1934 Py_MEMCPY(result_s, start, next-start);
1935 result_s += (next-start);
1936 Py_MEMCPY(result_s, to_s, to_len);
1937 result_s += to_len;
1938 start = next+1;
1939 }
1940 }
1941 /* Copy the remainder of the remaining string */
1942 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001943
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001944 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001945}
1946
1947/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1948Py_LOCAL(PyBytesObject *)
1949replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001950 const char *from_s, Py_ssize_t from_len,
1951 const char *to_s, Py_ssize_t to_len,
1952 Py_ssize_t maxcount) {
1953 char *self_s, *result_s;
1954 char *start, *next, *end;
1955 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001956 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001957 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001958
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001959 self_s = PyBytes_AS_STRING(self);
1960 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001961
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001962 count = stringlib_count(self_s, self_len,
1963 from_s, from_len,
1964 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001965
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001966 if (count == 0) {
1967 /* no matches, return unchanged */
1968 return return_self(self);
1969 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001970
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001971 /* Check for overflow */
1972 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001973 assert(count > 0);
1974 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001975 PyErr_SetString(PyExc_OverflowError,
1976 "replacement bytes are too long");
1977 return NULL;
1978 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001979 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001980
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001981 if ( (result = (PyBytesObject *)
1982 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1983 return NULL;
1984 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001985
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001986 start = self_s;
1987 end = self_s + self_len;
1988 while (count-- > 0) {
1989 offset = stringlib_find(start, end-start,
1990 from_s, from_len,
1991 0);
1992 if (offset == -1)
1993 break;
1994 next = start+offset;
1995 if (next == start) {
1996 /* replace with the 'to' */
1997 Py_MEMCPY(result_s, to_s, to_len);
1998 result_s += to_len;
1999 start += from_len;
2000 } else {
2001 /* copy the unchanged old then the 'to' */
2002 Py_MEMCPY(result_s, start, next-start);
2003 result_s += (next-start);
2004 Py_MEMCPY(result_s, to_s, to_len);
2005 result_s += to_len;
2006 start = next+from_len;
2007 }
2008 }
2009 /* Copy the remainder of the remaining string */
2010 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002011
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002012 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002013}
2014
2015
2016Py_LOCAL(PyBytesObject *)
2017replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002018 const char *from_s, Py_ssize_t from_len,
2019 const char *to_s, Py_ssize_t to_len,
2020 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002021{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002022 if (maxcount < 0) {
2023 maxcount = PY_SSIZE_T_MAX;
2024 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2025 /* nothing to do; return the original string */
2026 return return_self(self);
2027 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002028
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002029 if (maxcount == 0 ||
2030 (from_len == 0 && to_len == 0)) {
2031 /* nothing to do; return the original string */
2032 return return_self(self);
2033 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002034
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002035 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002036
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002037 if (from_len == 0) {
2038 /* insert the 'to' string everywhere. */
2039 /* >>> "Python".replace("", ".") */
2040 /* '.P.y.t.h.o.n.' */
2041 return replace_interleave(self, to_s, to_len, maxcount);
2042 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002043
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002044 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2045 /* point for an empty self string to generate a non-empty string */
2046 /* Special case so the remaining code always gets a non-empty string */
2047 if (PyBytes_GET_SIZE(self) == 0) {
2048 return return_self(self);
2049 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002050
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002051 if (to_len == 0) {
2052 /* delete all occurrences of 'from' string */
2053 if (from_len == 1) {
2054 return replace_delete_single_character(
2055 self, from_s[0], maxcount);
2056 } else {
2057 return replace_delete_substring(self, from_s,
2058 from_len, maxcount);
2059 }
2060 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002061
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002062 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002063
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002064 if (from_len == to_len) {
2065 if (from_len == 1) {
2066 return replace_single_character_in_place(
2067 self,
2068 from_s[0],
2069 to_s[0],
2070 maxcount);
2071 } else {
2072 return replace_substring_in_place(
2073 self, from_s, from_len, to_s, to_len,
2074 maxcount);
2075 }
2076 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002077
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002078 /* Otherwise use the more generic algorithms */
2079 if (from_len == 1) {
2080 return replace_single_character(self, from_s[0],
2081 to_s, to_len, maxcount);
2082 } else {
2083 /* len('from')>=2, len('to')>=1 */
2084 return replace_substring(self, from_s, from_len, to_s, to_len,
2085 maxcount);
2086 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002087}
2088
2089PyDoc_STRVAR(replace__doc__,
2090"B.replace(old, new[, count]) -> bytes\n\
2091\n\
2092Return a copy of B with all occurrences of subsection\n\
2093old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002094given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002095
2096static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002097bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002098{
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002099 PyObject *res;
2100 Py_buffer old = {NULL, NULL};
2101 Py_buffer new = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002102 Py_ssize_t count = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002103
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002104 if (!PyArg_ParseTuple(args, "y*y*|n:replace", &old, &new, &count))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002105 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002106
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002107 res = (PyObject *)replace((PyBytesObject *) self,
2108 (const char *)old.buf, old.len,
2109 (const char *)new.buf, new.len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002110
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002111 PyBuffer_Release(&old);
2112 PyBuffer_Release(&new);
2113 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002114}
2115
2116/** End DALKE **/
2117
2118/* Matches the end (direction >= 0) or start (direction < 0) of self
2119 * against substr, using the start and end arguments. Returns
2120 * -1 on error, 0 if not found and 1 if found.
2121 */
2122Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002123_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002124 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002125{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002126 Py_ssize_t len = PyBytes_GET_SIZE(self);
2127 Py_ssize_t slen;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002128 Py_buffer sub_view = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002129 const char* sub;
2130 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002131
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002132 if (PyBytes_Check(substr)) {
2133 sub = PyBytes_AS_STRING(substr);
2134 slen = PyBytes_GET_SIZE(substr);
2135 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002136 else {
2137 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
2138 return -1;
2139 sub = sub_view.buf;
2140 slen = sub_view.len;
2141 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002142 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002143
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002144 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002145
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002146 if (direction < 0) {
2147 /* startswith */
2148 if (start+slen > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002149 goto notfound;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002150 } else {
2151 /* endswith */
2152 if (end-start < slen || start > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002153 goto notfound;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002154
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002155 if (end-slen > start)
2156 start = end - slen;
2157 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002158 if (end-start < slen)
2159 goto notfound;
2160 if (memcmp(str+start, sub, slen) != 0)
2161 goto notfound;
2162
2163 PyBuffer_Release(&sub_view);
2164 return 1;
2165
2166notfound:
2167 PyBuffer_Release(&sub_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002168 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002169}
2170
2171
2172PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002173"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002174\n\
2175Return True if B starts with the specified prefix, False otherwise.\n\
2176With optional start, test B beginning at that position.\n\
2177With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002178prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002179
2180static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002181bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002182{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002183 Py_ssize_t start = 0;
2184 Py_ssize_t end = PY_SSIZE_T_MAX;
2185 PyObject *subobj;
2186 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002187
Jesus Ceaac451502011-04-20 17:09:23 +02002188 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002189 return NULL;
2190 if (PyTuple_Check(subobj)) {
2191 Py_ssize_t i;
2192 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2193 result = _bytes_tailmatch(self,
2194 PyTuple_GET_ITEM(subobj, i),
2195 start, end, -1);
2196 if (result == -1)
2197 return NULL;
2198 else if (result) {
2199 Py_RETURN_TRUE;
2200 }
2201 }
2202 Py_RETURN_FALSE;
2203 }
2204 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002205 if (result == -1) {
2206 if (PyErr_ExceptionMatches(PyExc_TypeError))
2207 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2208 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002209 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002210 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002211 else
2212 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002213}
2214
2215
2216PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002217"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002218\n\
2219Return True if B ends with the specified suffix, False otherwise.\n\
2220With optional start, test B beginning at that position.\n\
2221With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002222suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002223
2224static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002225bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002226{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002227 Py_ssize_t start = 0;
2228 Py_ssize_t end = PY_SSIZE_T_MAX;
2229 PyObject *subobj;
2230 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002231
Jesus Ceaac451502011-04-20 17:09:23 +02002232 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002233 return NULL;
2234 if (PyTuple_Check(subobj)) {
2235 Py_ssize_t i;
2236 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2237 result = _bytes_tailmatch(self,
2238 PyTuple_GET_ITEM(subobj, i),
2239 start, end, +1);
2240 if (result == -1)
2241 return NULL;
2242 else if (result) {
2243 Py_RETURN_TRUE;
2244 }
2245 }
2246 Py_RETURN_FALSE;
2247 }
2248 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002249 if (result == -1) {
2250 if (PyErr_ExceptionMatches(PyExc_TypeError))
2251 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2252 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002253 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002254 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002255 else
2256 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002257}
2258
2259
2260PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002261"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002262\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002263Decode B using the codec registered for encoding. Default encoding\n\
2264is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002265handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2266a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002267as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002268able to handle UnicodeDecodeErrors.");
2269
2270static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002271bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002272{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002273 const char *encoding = NULL;
2274 const char *errors = NULL;
2275 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002276
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002277 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2278 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002279 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002280}
2281
Guido van Rossum20188312006-05-05 15:15:40 +00002282
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002283PyDoc_STRVAR(splitlines__doc__,
2284"B.splitlines([keepends]) -> list of lines\n\
2285\n\
2286Return a list of the lines in B, breaking at line boundaries.\n\
2287Line breaks are not included in the resulting list unless keepends\n\
2288is given and true.");
2289
2290static PyObject*
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002291bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002292{
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002293 static char *kwlist[] = {"keepends", 0};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002294 int keepends = 0;
2295
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002296 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines",
2297 kwlist, &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002298 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002299
2300 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002301 (PyObject*) self, PyBytes_AS_STRING(self),
2302 PyBytes_GET_SIZE(self), keepends
2303 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002304}
2305
2306
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002307PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002308"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002309\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002310Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002311Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002312Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002313
2314static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002315hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002316{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002317 if (c >= 128)
2318 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002319 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002320 return c - '0';
2321 else {
David Malcolm96960882010-11-05 17:23:41 +00002322 if (Py_ISUPPER(c))
2323 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002324 if (c >= 'a' && c <= 'f')
2325 return c - 'a' + 10;
2326 }
2327 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002328}
2329
2330static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002331bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002332{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002333 PyObject *newstring, *hexobj;
2334 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002335 Py_ssize_t hexlen, byteslen, i, j;
2336 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002337 void *data;
2338 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002339
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002340 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2341 return NULL;
2342 assert(PyUnicode_Check(hexobj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002343 if (PyUnicode_READY(hexobj))
2344 return NULL;
2345 kind = PyUnicode_KIND(hexobj);
2346 data = PyUnicode_DATA(hexobj);
2347 hexlen = PyUnicode_GET_LENGTH(hexobj);
2348
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002349 byteslen = hexlen/2; /* This overestimates if there are spaces */
2350 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2351 if (!newstring)
2352 return NULL;
2353 buf = PyBytes_AS_STRING(newstring);
2354 for (i = j = 0; i < hexlen; i += 2) {
2355 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002356 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002357 i++;
2358 if (i >= hexlen)
2359 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002360 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
2361 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002362 if (top == -1 || bot == -1) {
2363 PyErr_Format(PyExc_ValueError,
2364 "non-hexadecimal number found in "
2365 "fromhex() arg at position %zd", i);
2366 goto error;
2367 }
2368 buf[j++] = (top << 4) + bot;
2369 }
2370 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2371 goto error;
2372 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002373
2374 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002375 Py_XDECREF(newstring);
2376 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002377}
2378
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002379static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002380bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002381{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002382 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002383}
2384
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002385
2386static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002387bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002388 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2389 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2390 _Py_capitalize__doc__},
2391 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2392 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2393 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2394 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2395 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02002396 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002397 expandtabs__doc__},
2398 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2399 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2400 fromhex_doc},
2401 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2402 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2403 _Py_isalnum__doc__},
2404 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2405 _Py_isalpha__doc__},
2406 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2407 _Py_isdigit__doc__},
2408 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2409 _Py_islower__doc__},
2410 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2411 _Py_isspace__doc__},
2412 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2413 _Py_istitle__doc__},
2414 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2415 _Py_isupper__doc__},
2416 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2417 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2418 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2419 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2420 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2421 _Py_maketrans__doc__},
2422 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2423 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2424 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2425 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2426 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2427 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2428 rpartition__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002429 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002430 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002431 {"split", (PyCFunction)bytes_split, METH_VARARGS | METH_KEYWORDS, split__doc__},
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002432 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002433 splitlines__doc__},
2434 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2435 startswith__doc__},
2436 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2437 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2438 _Py_swapcase__doc__},
2439 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2440 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2441 translate__doc__},
2442 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2443 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002444 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002445};
2446
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002447static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002448bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002449
2450static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002451bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002452{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002453 PyObject *x = NULL;
2454 const char *encoding = NULL;
2455 const char *errors = NULL;
2456 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002457 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002458 Py_ssize_t size;
2459 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002460 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002461
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002462 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02002463 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002464 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2465 &encoding, &errors))
2466 return NULL;
2467 if (x == NULL) {
2468 if (encoding != NULL || errors != NULL) {
2469 PyErr_SetString(PyExc_TypeError,
2470 "encoding or errors without sequence "
2471 "argument");
2472 return NULL;
2473 }
2474 return PyBytes_FromString("");
2475 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002476
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002477 if (PyUnicode_Check(x)) {
2478 /* Encode via the codec registry */
2479 if (encoding == NULL) {
2480 PyErr_SetString(PyExc_TypeError,
2481 "string argument without an encoding");
2482 return NULL;
2483 }
2484 new = PyUnicode_AsEncodedString(x, encoding, errors);
2485 if (new == NULL)
2486 return NULL;
2487 assert(PyBytes_Check(new));
2488 return new;
2489 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002490
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002491 /* If it's not unicode, there can't be encoding or errors */
2492 if (encoding != NULL || errors != NULL) {
2493 PyErr_SetString(PyExc_TypeError,
2494 "encoding or errors without a string argument");
2495 return NULL;
2496 }
2497
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002498 /* We'd like to call PyObject_Bytes here, but we need to check for an
2499 integer argument before deferring to PyBytes_FromObject, something
2500 PyObject_Bytes doesn't do. */
2501 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2502 if (func != NULL) {
2503 new = PyObject_CallFunctionObjArgs(func, NULL);
2504 Py_DECREF(func);
2505 if (new == NULL)
2506 return NULL;
2507 if (!PyBytes_Check(new)) {
2508 PyErr_Format(PyExc_TypeError,
2509 "__bytes__ returned non-bytes (type %.200s)",
2510 Py_TYPE(new)->tp_name);
2511 Py_DECREF(new);
2512 return NULL;
2513 }
2514 return new;
2515 }
2516 else if (PyErr_Occurred())
2517 return NULL;
2518
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002519 /* Is it an integer? */
2520 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2521 if (size == -1 && PyErr_Occurred()) {
2522 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2523 return NULL;
2524 PyErr_Clear();
2525 }
2526 else if (size < 0) {
2527 PyErr_SetString(PyExc_ValueError, "negative count");
2528 return NULL;
2529 }
2530 else {
2531 new = PyBytes_FromStringAndSize(NULL, size);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002532 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002533 return NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002534 if (size > 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002535 memset(((PyBytesObject*)new)->ob_sval, 0, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002536 return new;
2537 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002538
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002539 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002540}
2541
2542PyObject *
2543PyBytes_FromObject(PyObject *x)
2544{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002545 PyObject *new, *it;
2546 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002547
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002548 if (x == NULL) {
2549 PyErr_BadInternalCall();
2550 return NULL;
2551 }
Larry Hastingsca28e992012-05-24 22:58:30 -07002552
2553 if (PyBytes_CheckExact(x)) {
2554 Py_INCREF(x);
2555 return x;
2556 }
2557
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002558 /* Use the modern buffer interface */
2559 if (PyObject_CheckBuffer(x)) {
2560 Py_buffer view;
2561 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2562 return NULL;
2563 new = PyBytes_FromStringAndSize(NULL, view.len);
2564 if (!new)
2565 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002566 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2567 &view, view.len, 'C') < 0)
2568 goto fail;
2569 PyBuffer_Release(&view);
2570 return new;
2571 fail:
2572 Py_XDECREF(new);
2573 PyBuffer_Release(&view);
2574 return NULL;
2575 }
2576 if (PyUnicode_Check(x)) {
2577 PyErr_SetString(PyExc_TypeError,
2578 "cannot convert unicode object to bytes");
2579 return NULL;
2580 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002581
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002582 if (PyList_CheckExact(x)) {
2583 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2584 if (new == NULL)
2585 return NULL;
2586 for (i = 0; i < Py_SIZE(x); i++) {
2587 Py_ssize_t value = PyNumber_AsSsize_t(
2588 PyList_GET_ITEM(x, i), PyExc_ValueError);
2589 if (value == -1 && PyErr_Occurred()) {
2590 Py_DECREF(new);
2591 return NULL;
2592 }
2593 if (value < 0 || value >= 256) {
2594 PyErr_SetString(PyExc_ValueError,
2595 "bytes must be in range(0, 256)");
2596 Py_DECREF(new);
2597 return NULL;
2598 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002599 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002600 }
2601 return new;
2602 }
2603 if (PyTuple_CheckExact(x)) {
2604 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2605 if (new == NULL)
2606 return NULL;
2607 for (i = 0; i < Py_SIZE(x); i++) {
2608 Py_ssize_t value = PyNumber_AsSsize_t(
2609 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2610 if (value == -1 && PyErr_Occurred()) {
2611 Py_DECREF(new);
2612 return NULL;
2613 }
2614 if (value < 0 || value >= 256) {
2615 PyErr_SetString(PyExc_ValueError,
2616 "bytes must be in range(0, 256)");
2617 Py_DECREF(new);
2618 return NULL;
2619 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002620 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002621 }
2622 return new;
2623 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002624
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002625 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002626 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002627 if (size == -1 && PyErr_Occurred())
2628 return NULL;
2629 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2630 returning a shared empty bytes string. This required because we
2631 want to call _PyBytes_Resize() the returned object, which we can
2632 only do on bytes objects with refcount == 1. */
2633 size += 1;
2634 new = PyBytes_FromStringAndSize(NULL, size);
2635 if (new == NULL)
2636 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002637
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002638 /* Get the iterator */
2639 it = PyObject_GetIter(x);
2640 if (it == NULL)
2641 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002642
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002643 /* Run the iterator to exhaustion */
2644 for (i = 0; ; i++) {
2645 PyObject *item;
2646 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002647
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002648 /* Get the next item */
2649 item = PyIter_Next(it);
2650 if (item == NULL) {
2651 if (PyErr_Occurred())
2652 goto error;
2653 break;
2654 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002655
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002656 /* Interpret it as an int (__index__) */
2657 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2658 Py_DECREF(item);
2659 if (value == -1 && PyErr_Occurred())
2660 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002661
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002662 /* Range check */
2663 if (value < 0 || value >= 256) {
2664 PyErr_SetString(PyExc_ValueError,
2665 "bytes must be in range(0, 256)");
2666 goto error;
2667 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002668
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002669 /* Append the byte */
2670 if (i >= size) {
2671 size = 2 * size + 1;
2672 if (_PyBytes_Resize(&new, size) < 0)
2673 goto error;
2674 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002675 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002676 }
2677 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002678
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002679 /* Clean up and return success */
2680 Py_DECREF(it);
2681 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002682
2683 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002684 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01002685 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002686 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002687}
2688
2689static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02002690bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002691{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002692 PyObject *tmp, *pnew;
2693 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002694
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002695 assert(PyType_IsSubtype(type, &PyBytes_Type));
2696 tmp = bytes_new(&PyBytes_Type, args, kwds);
2697 if (tmp == NULL)
2698 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02002699 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002700 n = PyBytes_GET_SIZE(tmp);
2701 pnew = type->tp_alloc(type, n);
2702 if (pnew != NULL) {
2703 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2704 PyBytes_AS_STRING(tmp), n+1);
2705 ((PyBytesObject *)pnew)->ob_shash =
2706 ((PyBytesObject *)tmp)->ob_shash;
2707 }
2708 Py_DECREF(tmp);
2709 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002710}
2711
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002712PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002713"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002714bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002715bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002716bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2717bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002718\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002719Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002720 - an iterable yielding integers in range(256)\n\
2721 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002722 - any object implementing the buffer API.\n\
2723 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002724
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002725static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002726
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002727PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002728 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2729 "bytes",
2730 PyBytesObject_SIZE,
2731 sizeof(char),
2732 bytes_dealloc, /* tp_dealloc */
2733 0, /* tp_print */
2734 0, /* tp_getattr */
2735 0, /* tp_setattr */
2736 0, /* tp_reserved */
2737 (reprfunc)bytes_repr, /* tp_repr */
2738 0, /* tp_as_number */
2739 &bytes_as_sequence, /* tp_as_sequence */
2740 &bytes_as_mapping, /* tp_as_mapping */
2741 (hashfunc)bytes_hash, /* tp_hash */
2742 0, /* tp_call */
2743 bytes_str, /* tp_str */
2744 PyObject_GenericGetAttr, /* tp_getattro */
2745 0, /* tp_setattro */
2746 &bytes_as_buffer, /* tp_as_buffer */
2747 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2748 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2749 bytes_doc, /* tp_doc */
2750 0, /* tp_traverse */
2751 0, /* tp_clear */
2752 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2753 0, /* tp_weaklistoffset */
2754 bytes_iter, /* tp_iter */
2755 0, /* tp_iternext */
2756 bytes_methods, /* tp_methods */
2757 0, /* tp_members */
2758 0, /* tp_getset */
2759 &PyBaseObject_Type, /* tp_base */
2760 0, /* tp_dict */
2761 0, /* tp_descr_get */
2762 0, /* tp_descr_set */
2763 0, /* tp_dictoffset */
2764 0, /* tp_init */
2765 0, /* tp_alloc */
2766 bytes_new, /* tp_new */
2767 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002768};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002769
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002770void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002771PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002772{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002773 PyObject *v;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002774 assert(pv != NULL);
2775 if (*pv == NULL)
2776 return;
2777 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002778 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002779 return;
2780 }
2781 v = bytes_concat(*pv, w);
2782 Py_DECREF(*pv);
2783 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002784}
2785
2786void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002787PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002788{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002789 PyBytes_Concat(pv, w);
2790 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002791}
2792
2793
2794/* The following function breaks the notion that strings are immutable:
2795 it changes the size of a string. We get away with this only if there
2796 is only one module referencing the object. You can also think of it
2797 as creating a new string object and destroying the old one, only
2798 more efficiently. In any case, don't use this if the string may
2799 already be known to some other part of the code...
2800 Note that if there's not enough memory to resize the string, the original
2801 string object at *pv is deallocated, *pv is set to NULL, an "out of
2802 memory" exception is set, and -1 is returned. Else (on success) 0 is
2803 returned, and the value in *pv may or may not be the same as on input.
2804 As always, an extra byte is allocated for a trailing \0 byte (newsize
2805 does *not* include that), and a trailing \0 byte is stored.
2806*/
2807
2808int
2809_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2810{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002811 PyObject *v;
2812 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002813 v = *pv;
2814 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2815 *pv = 0;
2816 Py_DECREF(v);
2817 PyErr_BadInternalCall();
2818 return -1;
2819 }
2820 /* XXX UNREF/NEWREF interface should be more symmetrical */
2821 _Py_DEC_REFTOTAL;
2822 _Py_ForgetReference(v);
2823 *pv = (PyObject *)
2824 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2825 if (*pv == NULL) {
2826 PyObject_Del(v);
2827 PyErr_NoMemory();
2828 return -1;
2829 }
2830 _Py_NewReference(*pv);
2831 sv = (PyBytesObject *) *pv;
2832 Py_SIZE(sv) = newsize;
2833 sv->ob_sval[newsize] = '\0';
2834 sv->ob_shash = -1; /* invalidate cached hash value */
2835 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002836}
2837
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002838void
2839PyBytes_Fini(void)
2840{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002841 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002842 for (i = 0; i < UCHAR_MAX + 1; i++)
2843 Py_CLEAR(characters[i]);
2844 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002845}
2846
Benjamin Peterson4116f362008-05-27 00:36:20 +00002847/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002848
2849typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002850 PyObject_HEAD
2851 Py_ssize_t it_index;
2852 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002853} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002854
2855static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002856striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002857{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002858 _PyObject_GC_UNTRACK(it);
2859 Py_XDECREF(it->it_seq);
2860 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002861}
2862
2863static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002864striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002865{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002866 Py_VISIT(it->it_seq);
2867 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002868}
2869
2870static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002871striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002872{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002873 PyBytesObject *seq;
2874 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002875
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002876 assert(it != NULL);
2877 seq = it->it_seq;
2878 if (seq == NULL)
2879 return NULL;
2880 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002881
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002882 if (it->it_index < PyBytes_GET_SIZE(seq)) {
2883 item = PyLong_FromLong(
2884 (unsigned char)seq->ob_sval[it->it_index]);
2885 if (item != NULL)
2886 ++it->it_index;
2887 return item;
2888 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002889
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002890 Py_DECREF(seq);
2891 it->it_seq = NULL;
2892 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002893}
2894
2895static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002896striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002897{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002898 Py_ssize_t len = 0;
2899 if (it->it_seq)
2900 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
2901 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002902}
2903
2904PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002905 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002906
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002907static PyObject *
2908striter_reduce(striterobject *it)
2909{
2910 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02002911 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002912 it->it_seq, it->it_index);
2913 } else {
2914 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
2915 if (u == NULL)
2916 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02002917 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002918 }
2919}
2920
2921PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2922
2923static PyObject *
2924striter_setstate(striterobject *it, PyObject *state)
2925{
2926 Py_ssize_t index = PyLong_AsSsize_t(state);
2927 if (index == -1 && PyErr_Occurred())
2928 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00002929 if (it->it_seq != NULL) {
2930 if (index < 0)
2931 index = 0;
2932 else if (index > PyBytes_GET_SIZE(it->it_seq))
2933 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
2934 it->it_index = index;
2935 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002936 Py_RETURN_NONE;
2937}
2938
2939PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
2940
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002941static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002942 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
2943 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002944 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
2945 reduce_doc},
2946 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
2947 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002948 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002949};
2950
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002951PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002952 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2953 "bytes_iterator", /* tp_name */
2954 sizeof(striterobject), /* tp_basicsize */
2955 0, /* tp_itemsize */
2956 /* methods */
2957 (destructor)striter_dealloc, /* tp_dealloc */
2958 0, /* tp_print */
2959 0, /* tp_getattr */
2960 0, /* tp_setattr */
2961 0, /* tp_reserved */
2962 0, /* tp_repr */
2963 0, /* tp_as_number */
2964 0, /* tp_as_sequence */
2965 0, /* tp_as_mapping */
2966 0, /* tp_hash */
2967 0, /* tp_call */
2968 0, /* tp_str */
2969 PyObject_GenericGetAttr, /* tp_getattro */
2970 0, /* tp_setattro */
2971 0, /* tp_as_buffer */
2972 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
2973 0, /* tp_doc */
2974 (traverseproc)striter_traverse, /* tp_traverse */
2975 0, /* tp_clear */
2976 0, /* tp_richcompare */
2977 0, /* tp_weaklistoffset */
2978 PyObject_SelfIter, /* tp_iter */
2979 (iternextfunc)striter_next, /* tp_iternext */
2980 striter_methods, /* tp_methods */
2981 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002982};
2983
2984static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002985bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002986{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002987 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002988
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002989 if (!PyBytes_Check(seq)) {
2990 PyErr_BadInternalCall();
2991 return NULL;
2992 }
2993 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
2994 if (it == NULL)
2995 return NULL;
2996 it->it_index = 0;
2997 Py_INCREF(seq);
2998 it->it_seq = (PyBytesObject *)seq;
2999 _PyObject_GC_TRACK(it);
3000 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003001}