blob: a3ccbcfd613494eebb5f600010d744f4f86a1f3f [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Antoine Pitroucfc22b42012-10-16 21:07:23 +020013 PyBufferProcs *bufferprocs;
14 if (PyBytes_CheckExact(obj)) {
15 /* Fast path, e.g. for .join() of many bytes objects */
16 Py_INCREF(obj);
17 view->obj = obj;
18 view->buf = PyBytes_AS_STRING(obj);
19 view->len = PyBytes_GET_SIZE(obj);
20 return view->len;
21 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Antoine Pitroucfc22b42012-10-16 21:07:23 +020023 bufferprocs = Py_TYPE(obj)->tp_as_buffer;
24 if (bufferprocs == NULL || bufferprocs->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000025 {
Antoine Pitroud1188562010-06-09 16:38:55 +000026 PyErr_Format(PyExc_TypeError,
27 "Type %.100s doesn't support the buffer API",
28 Py_TYPE(obj)->tp_name);
29 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000030 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000031
Antoine Pitroucfc22b42012-10-16 21:07:23 +020032 if (bufferprocs->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000033 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000034 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000035}
36
Christian Heimes2c9c7a52008-05-26 13:42:13 +000037#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000038Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000039#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000040
Christian Heimes2c9c7a52008-05-26 13:42:13 +000041static PyBytesObject *characters[UCHAR_MAX + 1];
42static PyBytesObject *nullstring;
43
Mark Dickinsonfd24b322008-12-06 15:33:31 +000044/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
45 for a string of length n should request PyBytesObject_SIZE + n bytes.
46
47 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
48 3 bytes per string allocation on a typical system.
49*/
50#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
51
Christian Heimes2c9c7a52008-05-26 13:42:13 +000052/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000053 For PyBytes_FromString(), the parameter `str' points to a null-terminated
54 string containing exactly `size' bytes.
55
56 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
57 either NULL or else points to a string containing at least `size' bytes.
58 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
59 not have to be null-terminated. (Therefore it is safe to construct a
60 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
61 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
62 bytes (setting the last byte to the null terminating character) and you can
63 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000064 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000065 alter the data yourself, since the strings may be shared.
66
67 The PyObject member `op->ob_size', which denotes the number of "extra
68 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020069 allocated for string data, not counting the null terminating character.
70 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000071 PyBytes_FromStringAndSize()) or the length of the string in the `str'
72 parameter (for PyBytes_FromString()).
73*/
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000074PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +000075PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossumd624f182006-04-24 13:47:05 +000076{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020077 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 if (size < 0) {
79 PyErr_SetString(PyExc_SystemError,
80 "Negative size passed to PyBytes_FromStringAndSize");
81 return NULL;
82 }
83 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000084#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000086#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 Py_INCREF(op);
88 return (PyObject *)op;
89 }
90 if (size == 1 && str != NULL &&
91 (op = characters[*str & UCHAR_MAX]) != NULL)
92 {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000093#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000095#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000096 Py_INCREF(op);
97 return (PyObject *)op;
98 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000099
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000100 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
101 PyErr_SetString(PyExc_OverflowError,
102 "byte string is too large");
103 return NULL;
104 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +0000105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 /* Inline PyObject_NewVar */
107 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
108 if (op == NULL)
109 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100110 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 op->ob_shash = -1;
112 if (str != NULL)
113 Py_MEMCPY(op->ob_sval, str, size);
114 op->ob_sval[size] = '\0';
115 /* share short strings */
116 if (size == 0) {
117 nullstring = op;
118 Py_INCREF(op);
119 } else if (size == 1 && str != NULL) {
120 characters[*str & UCHAR_MAX] = op;
121 Py_INCREF(op);
122 }
123 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000124}
125
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000126PyObject *
127PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000128{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200129 size_t size;
130 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000131
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 assert(str != NULL);
133 size = strlen(str);
134 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
135 PyErr_SetString(PyExc_OverflowError,
136 "byte string is too long");
137 return NULL;
138 }
139 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000140#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 Py_INCREF(op);
144 return (PyObject *)op;
145 }
146 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000147#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000148 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 Py_INCREF(op);
151 return (PyObject *)op;
152 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000153
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 /* Inline PyObject_NewVar */
155 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
156 if (op == NULL)
157 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100158 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000159 op->ob_shash = -1;
160 Py_MEMCPY(op->ob_sval, str, size+1);
161 /* share short strings */
162 if (size == 0) {
163 nullstring = op;
164 Py_INCREF(op);
165 } else if (size == 1) {
166 characters[*str & UCHAR_MAX] = op;
167 Py_INCREF(op);
168 }
169 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000170}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000171
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000172PyObject *
173PyBytes_FromFormatV(const char *format, va_list vargs)
174{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 va_list count;
176 Py_ssize_t n = 0;
177 const char* f;
178 char *s;
179 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000180
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000181 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 /* step 1: figure out how large a buffer we need */
183 for (f = format; *f; f++) {
184 if (*f == '%') {
185 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000186 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000188
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
190 * they don't affect the amount of space we reserve.
191 */
192 if ((*f == 'l' || *f == 'z') &&
193 (f[1] == 'd' || f[1] == 'u'))
194 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000195
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000196 switch (*f) {
197 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100198 {
199 int c = va_arg(count, int);
200 if (c < 0 || c > 255) {
201 PyErr_SetString(PyExc_OverflowError,
202 "PyBytes_FromFormatV(): %c format "
203 "expects an integer in range [0; 255]");
204 return NULL;
205 }
206 n++;
207 break;
208 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000209 case '%':
210 n++;
211 break;
212 case 'd': case 'u': case 'i': case 'x':
213 (void) va_arg(count, int);
214 /* 20 bytes is enough to hold a 64-bit
215 integer. Decimal takes the most space.
216 This isn't enough for octal. */
217 n += 20;
218 break;
219 case 's':
220 s = va_arg(count, char*);
221 n += strlen(s);
222 break;
223 case 'p':
224 (void) va_arg(count, int);
225 /* maximum 64-bit pointer representation:
226 * 0xffffffffffffffff
227 * so 19 characters is enough.
228 * XXX I count 18 -- what's the extra for?
229 */
230 n += 19;
231 break;
232 default:
233 /* if we stumble upon an unknown
234 formatting code, copy the rest of
235 the format string to the output
236 string. (we cannot just skip the
237 code, since there's no way to know
238 what's in the argument list) */
239 n += strlen(p);
240 goto expand;
241 }
242 } else
243 n++;
244 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000245 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000246 /* step 2: fill the buffer */
247 /* Since we've analyzed how much space we need for the worst case,
248 use sprintf directly instead of the slower PyOS_snprintf. */
249 string = PyBytes_FromStringAndSize(NULL, n);
250 if (!string)
251 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000252
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000253 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000254
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 for (f = format; *f; f++) {
256 if (*f == '%') {
257 const char* p = f++;
258 Py_ssize_t i;
259 int longflag = 0;
260 int size_tflag = 0;
261 /* parse the width.precision part (we're only
262 interested in the precision value, if any) */
263 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000264 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000265 n = (n*10) + *f++ - '0';
266 if (*f == '.') {
267 f++;
268 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000269 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000270 n = (n*10) + *f++ - '0';
271 }
David Malcolm96960882010-11-05 17:23:41 +0000272 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 f++;
274 /* handle the long flag, but only for %ld and %lu.
275 others can be added when necessary. */
276 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
277 longflag = 1;
278 ++f;
279 }
280 /* handle the size_t flag. */
281 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
282 size_tflag = 1;
283 ++f;
284 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000285
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000286 switch (*f) {
287 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100288 {
289 int c = va_arg(vargs, int);
290 /* c has been checked for overflow in the first step */
291 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000292 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100293 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000294 case 'd':
295 if (longflag)
296 sprintf(s, "%ld", va_arg(vargs, long));
297 else if (size_tflag)
298 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
299 va_arg(vargs, Py_ssize_t));
300 else
301 sprintf(s, "%d", va_arg(vargs, int));
302 s += strlen(s);
303 break;
304 case 'u':
305 if (longflag)
306 sprintf(s, "%lu",
307 va_arg(vargs, unsigned long));
308 else if (size_tflag)
309 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
310 va_arg(vargs, size_t));
311 else
312 sprintf(s, "%u",
313 va_arg(vargs, unsigned int));
314 s += strlen(s);
315 break;
316 case 'i':
317 sprintf(s, "%i", va_arg(vargs, int));
318 s += strlen(s);
319 break;
320 case 'x':
321 sprintf(s, "%x", va_arg(vargs, int));
322 s += strlen(s);
323 break;
324 case 's':
325 p = va_arg(vargs, char*);
326 i = strlen(p);
327 if (n > 0 && i > n)
328 i = n;
329 Py_MEMCPY(s, p, i);
330 s += i;
331 break;
332 case 'p':
333 sprintf(s, "%p", va_arg(vargs, void*));
334 /* %p is ill-defined: ensure leading 0x. */
335 if (s[1] == 'X')
336 s[1] = 'x';
337 else if (s[1] != 'x') {
338 memmove(s+2, s, strlen(s)+1);
339 s[0] = '0';
340 s[1] = 'x';
341 }
342 s += strlen(s);
343 break;
344 case '%':
345 *s++ = '%';
346 break;
347 default:
348 strcpy(s, p);
349 s += strlen(s);
350 goto end;
351 }
352 } else
353 *s++ = *f;
354 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000355
356 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000357 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
358 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000359}
360
361PyObject *
362PyBytes_FromFormat(const char *format, ...)
363{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000364 PyObject* ret;
365 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000366
367#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000368 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000369#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 ret = PyBytes_FromFormatV(format, vargs);
373 va_end(vargs);
374 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000375}
376
377static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000378bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000379{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000380 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000381}
382
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000383/* Unescape a backslash-escaped string. If unicode is non-zero,
384 the string is a u-literal. If recode_encoding is non-zero,
385 the string is UTF-8 encoded and should be re-encoded in the
386 specified encoding. */
387
388PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000389 Py_ssize_t len,
390 const char *errors,
391 Py_ssize_t unicode,
392 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000393{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000394 int c;
395 char *p, *buf;
396 const char *end;
397 PyObject *v;
398 Py_ssize_t newlen = recode_encoding ? 4*len:len;
399 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
400 if (v == NULL)
401 return NULL;
402 p = buf = PyBytes_AsString(v);
403 end = s + len;
404 while (s < end) {
405 if (*s != '\\') {
406 non_esc:
407 if (recode_encoding && (*s & 0x80)) {
408 PyObject *u, *w;
409 char *r;
410 const char* t;
411 Py_ssize_t rn;
412 t = s;
413 /* Decode non-ASCII bytes as UTF-8. */
414 while (t < end && (*t & 0x80)) t++;
415 u = PyUnicode_DecodeUTF8(s, t - s, errors);
416 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000417
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000418 /* Recode them in target encoding. */
419 w = PyUnicode_AsEncodedString(
420 u, recode_encoding, errors);
421 Py_DECREF(u);
422 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000423
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000424 /* Append bytes to output buffer. */
425 assert(PyBytes_Check(w));
426 r = PyBytes_AS_STRING(w);
427 rn = PyBytes_GET_SIZE(w);
428 Py_MEMCPY(p, r, rn);
429 p += rn;
430 Py_DECREF(w);
431 s = t;
432 } else {
433 *p++ = *s++;
434 }
435 continue;
436 }
437 s++;
438 if (s==end) {
439 PyErr_SetString(PyExc_ValueError,
440 "Trailing \\ in string");
441 goto failed;
442 }
443 switch (*s++) {
444 /* XXX This assumes ASCII! */
445 case '\n': break;
446 case '\\': *p++ = '\\'; break;
447 case '\'': *p++ = '\''; break;
448 case '\"': *p++ = '\"'; break;
449 case 'b': *p++ = '\b'; break;
450 case 'f': *p++ = '\014'; break; /* FF */
451 case 't': *p++ = '\t'; break;
452 case 'n': *p++ = '\n'; break;
453 case 'r': *p++ = '\r'; break;
454 case 'v': *p++ = '\013'; break; /* VT */
455 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
456 case '0': case '1': case '2': case '3':
457 case '4': case '5': case '6': case '7':
458 c = s[-1] - '0';
459 if (s < end && '0' <= *s && *s <= '7') {
460 c = (c<<3) + *s++ - '0';
461 if (s < end && '0' <= *s && *s <= '7')
462 c = (c<<3) + *s++ - '0';
463 }
464 *p++ = c;
465 break;
466 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000467 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000468 unsigned int x = 0;
469 c = Py_CHARMASK(*s);
470 s++;
David Malcolm96960882010-11-05 17:23:41 +0000471 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000472 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000473 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000474 x = 10 + c - 'a';
475 else
476 x = 10 + c - 'A';
477 x = x << 4;
478 c = Py_CHARMASK(*s);
479 s++;
David Malcolm96960882010-11-05 17:23:41 +0000480 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000481 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000482 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000483 x += 10 + c - 'a';
484 else
485 x += 10 + c - 'A';
486 *p++ = x;
487 break;
488 }
489 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200490 PyErr_Format(PyExc_ValueError,
491 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +0200492 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000493 goto failed;
494 }
495 if (strcmp(errors, "replace") == 0) {
496 *p++ = '?';
497 } else if (strcmp(errors, "ignore") == 0)
498 /* do nothing */;
499 else {
500 PyErr_Format(PyExc_ValueError,
501 "decoding error; unknown "
502 "error handling code: %.400s",
503 errors);
504 goto failed;
505 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +0200506 /* skip \x */
507 if (s < end && Py_ISXDIGIT(s[0]))
508 s++; /* and a hexdigit */
509 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000510 default:
511 *p++ = '\\';
512 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200513 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000514 UTF-8 bytes may follow. */
515 }
516 }
517 if (p-buf < newlen)
518 _PyBytes_Resize(&v, p - buf);
519 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000520 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000521 Py_DECREF(v);
522 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000523}
524
525/* -------------------------------------------------------------------- */
526/* object api */
527
528Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200529PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000530{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000531 if (!PyBytes_Check(op)) {
532 PyErr_Format(PyExc_TypeError,
533 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
534 return -1;
535 }
536 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000537}
538
539char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200540PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000541{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000542 if (!PyBytes_Check(op)) {
543 PyErr_Format(PyExc_TypeError,
544 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
545 return NULL;
546 }
547 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000548}
549
550int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200551PyBytes_AsStringAndSize(PyObject *obj,
552 char **s,
553 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000554{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000555 if (s == NULL) {
556 PyErr_BadInternalCall();
557 return -1;
558 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000559
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000560 if (!PyBytes_Check(obj)) {
561 PyErr_Format(PyExc_TypeError,
562 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
563 return -1;
564 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000565
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000566 *s = PyBytes_AS_STRING(obj);
567 if (len != NULL)
568 *len = PyBytes_GET_SIZE(obj);
569 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
570 PyErr_SetString(PyExc_TypeError,
571 "expected bytes with no null");
572 return -1;
573 }
574 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000575}
Neal Norwitz6968b052007-02-27 19:02:19 +0000576
577/* -------------------------------------------------------------------- */
578/* Methods */
579
Eric Smith0923d1d2009-04-16 20:16:10 +0000580#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000581
582#include "stringlib/fastsearch.h"
583#include "stringlib/count.h"
584#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +0200585#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000586#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000587#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000588#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000589
Eric Smith0f78bff2009-11-30 01:01:42 +0000590#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000591
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000592PyObject *
593PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000594{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200595 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200596 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -0400597 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000598 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200599 unsigned char quote, *s, *p;
600
601 /* Compute size of output string */
602 squotes = dquotes = 0;
603 newsize = 3; /* b'' */
604 s = (unsigned char*)op->ob_sval;
605 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400606 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200607 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400608 case '\'': squotes++; break;
609 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200610 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400611 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200612 default:
613 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400614 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200615 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400616 if (newsize > PY_SSIZE_T_MAX - incr)
617 goto overflow;
618 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200619 }
620 quote = '\'';
621 if (smartquotes && squotes && !dquotes)
622 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400623 if (squotes && quote == '\'') {
624 if (newsize > PY_SSIZE_T_MAX - squotes)
625 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200626 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000627 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200628
629 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000630 if (v == NULL) {
631 return NULL;
632 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200633 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000634
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200635 *p++ = 'b', *p++ = quote;
636 for (i = 0; i < length; i++) {
637 unsigned char c = op->ob_sval[i];
638 if (c == quote || c == '\\')
639 *p++ = '\\', *p++ = c;
640 else if (c == '\t')
641 *p++ = '\\', *p++ = 't';
642 else if (c == '\n')
643 *p++ = '\\', *p++ = 'n';
644 else if (c == '\r')
645 *p++ = '\\', *p++ = 'r';
646 else if (c < ' ' || c >= 0x7f) {
647 *p++ = '\\';
648 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200649 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
650 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000651 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200652 else
653 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000654 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200655 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +0200656 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200657 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400658
659 overflow:
660 PyErr_SetString(PyExc_OverflowError,
661 "bytes object is too large to make repr");
662 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000663}
664
Neal Norwitz6968b052007-02-27 19:02:19 +0000665static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000666bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000667{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000668 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000669}
670
Neal Norwitz6968b052007-02-27 19:02:19 +0000671static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000672bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000673{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000674 if (Py_BytesWarningFlag) {
675 if (PyErr_WarnEx(PyExc_BytesWarning,
676 "str() on a bytes instance", 1))
677 return NULL;
678 }
679 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000680}
681
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000682static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000683bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000684{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000685 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000686}
Neal Norwitz6968b052007-02-27 19:02:19 +0000687
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000688/* This is also used by PyBytes_Concat() */
689static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000690bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000691{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000692 Py_ssize_t size;
693 Py_buffer va, vb;
694 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000695
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000696 va.len = -1;
697 vb.len = -1;
698 if (_getbuffer(a, &va) < 0 ||
699 _getbuffer(b, &vb) < 0) {
700 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
701 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
702 goto done;
703 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000704
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000705 /* Optimize end cases */
706 if (va.len == 0 && PyBytes_CheckExact(b)) {
707 result = b;
708 Py_INCREF(result);
709 goto done;
710 }
711 if (vb.len == 0 && PyBytes_CheckExact(a)) {
712 result = a;
713 Py_INCREF(result);
714 goto done;
715 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000716
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000717 size = va.len + vb.len;
718 if (size < 0) {
719 PyErr_NoMemory();
720 goto done;
721 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000722
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000723 result = PyBytes_FromStringAndSize(NULL, size);
724 if (result != NULL) {
725 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
726 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
727 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000728
729 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000730 if (va.len != -1)
731 PyBuffer_Release(&va);
732 if (vb.len != -1)
733 PyBuffer_Release(&vb);
734 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000735}
Neal Norwitz6968b052007-02-27 19:02:19 +0000736
737static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200738bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000739{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200740 Py_ssize_t i;
741 Py_ssize_t j;
742 Py_ssize_t size;
743 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000744 size_t nbytes;
745 if (n < 0)
746 n = 0;
747 /* watch out for overflows: the size can overflow int,
748 * and the # of bytes needed can overflow size_t
749 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000750 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000751 PyErr_SetString(PyExc_OverflowError,
752 "repeated bytes are too long");
753 return NULL;
754 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000755 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000756 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
757 Py_INCREF(a);
758 return (PyObject *)a;
759 }
760 nbytes = (size_t)size;
761 if (nbytes + PyBytesObject_SIZE <= nbytes) {
762 PyErr_SetString(PyExc_OverflowError,
763 "repeated bytes are too long");
764 return NULL;
765 }
766 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
767 if (op == NULL)
768 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100769 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000770 op->ob_shash = -1;
771 op->ob_sval[size] = '\0';
772 if (Py_SIZE(a) == 1 && n > 0) {
773 memset(op->ob_sval, a->ob_sval[0] , n);
774 return (PyObject *) op;
775 }
776 i = 0;
777 if (i < size) {
778 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
779 i = Py_SIZE(a);
780 }
781 while (i < size) {
782 j = (i <= size-i) ? i : size-i;
783 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
784 i += j;
785 }
786 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000787}
788
Guido van Rossum98297ee2007-11-06 21:34:58 +0000789static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000790bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000791{
792 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
793 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000794 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000795 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000796 PyErr_Clear();
797 if (_getbuffer(arg, &varg) < 0)
798 return -1;
799 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
800 varg.buf, varg.len, 0);
801 PyBuffer_Release(&varg);
802 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000803 }
804 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000805 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
806 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000807 }
808
Antoine Pitrou0010d372010-08-15 17:12:55 +0000809 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000810}
811
Neal Norwitz6968b052007-02-27 19:02:19 +0000812static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200813bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000814{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000815 if (i < 0 || i >= Py_SIZE(a)) {
816 PyErr_SetString(PyExc_IndexError, "index out of range");
817 return NULL;
818 }
819 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000820}
821
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100822Py_LOCAL(int)
823bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
824{
825 int cmp;
826 Py_ssize_t len;
827
828 len = Py_SIZE(a);
829 if (Py_SIZE(b) != len)
830 return 0;
831
832 if (a->ob_sval[0] != b->ob_sval[0])
833 return 0;
834
835 cmp = memcmp(a->ob_sval, b->ob_sval, len);
836 return (cmp == 0);
837}
838
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000839static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000840bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000841{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000842 int c;
843 Py_ssize_t len_a, len_b;
844 Py_ssize_t min_len;
845 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000846
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000847 /* Make sure both arguments are strings. */
848 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
849 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
850 (PyObject_IsInstance((PyObject*)a,
851 (PyObject*)&PyUnicode_Type) ||
852 PyObject_IsInstance((PyObject*)b,
853 (PyObject*)&PyUnicode_Type))) {
854 if (PyErr_WarnEx(PyExc_BytesWarning,
855 "Comparison between bytes and string", 1))
856 return NULL;
857 }
858 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000859 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100860 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000861 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100862 case Py_EQ:
863 case Py_LE:
864 case Py_GE:
865 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000866 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100867 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100868 case Py_NE:
869 case Py_LT:
870 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000871 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100872 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100873 default:
874 PyErr_BadArgument();
875 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000876 }
877 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100878 else if (op == Py_EQ || op == Py_NE) {
879 int eq = bytes_compare_eq(a, b);
880 eq ^= (op == Py_NE);
881 result = eq ? Py_True : Py_False;
882 }
883 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100884 len_a = Py_SIZE(a);
885 len_b = Py_SIZE(b);
886 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100887 if (min_len > 0) {
888 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100889 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100890 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000891 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100892 else
893 c = 0;
894 if (c == 0)
895 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
896 switch (op) {
897 case Py_LT: c = c < 0; break;
898 case Py_LE: c = c <= 0; break;
899 case Py_GT: c = c > 0; break;
900 case Py_GE: c = c >= 0; break;
901 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100902 PyErr_BadArgument();
903 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100904 }
905 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000906 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100907
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000908 Py_INCREF(result);
909 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000910}
911
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000912static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000913bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000914{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100915 if (a->ob_shash == -1) {
916 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +0100917 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100918 }
919 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +0000920}
921
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000922static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000923bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000924{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000925 if (PyIndex_Check(item)) {
926 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
927 if (i == -1 && PyErr_Occurred())
928 return NULL;
929 if (i < 0)
930 i += PyBytes_GET_SIZE(self);
931 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
932 PyErr_SetString(PyExc_IndexError,
933 "index out of range");
934 return NULL;
935 }
936 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
937 }
938 else if (PySlice_Check(item)) {
939 Py_ssize_t start, stop, step, slicelength, cur, i;
940 char* source_buf;
941 char* result_buf;
942 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000943
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000944 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000945 PyBytes_GET_SIZE(self),
946 &start, &stop, &step, &slicelength) < 0) {
947 return NULL;
948 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000949
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000950 if (slicelength <= 0) {
951 return PyBytes_FromStringAndSize("", 0);
952 }
953 else if (start == 0 && step == 1 &&
954 slicelength == PyBytes_GET_SIZE(self) &&
955 PyBytes_CheckExact(self)) {
956 Py_INCREF(self);
957 return (PyObject *)self;
958 }
959 else if (step == 1) {
960 return PyBytes_FromStringAndSize(
961 PyBytes_AS_STRING(self) + start,
962 slicelength);
963 }
964 else {
965 source_buf = PyBytes_AS_STRING(self);
966 result = PyBytes_FromStringAndSize(NULL, slicelength);
967 if (result == NULL)
968 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000969
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000970 result_buf = PyBytes_AS_STRING(result);
971 for (cur = start, i = 0; i < slicelength;
972 cur += step, i++) {
973 result_buf[i] = source_buf[cur];
974 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000975
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000976 return result;
977 }
978 }
979 else {
980 PyErr_Format(PyExc_TypeError,
981 "byte indices must be integers, not %.200s",
982 Py_TYPE(item)->tp_name);
983 return NULL;
984 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000985}
986
987static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000988bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000989{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000990 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
991 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000992}
993
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000994static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000995 (lenfunc)bytes_length, /*sq_length*/
996 (binaryfunc)bytes_concat, /*sq_concat*/
997 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
998 (ssizeargfunc)bytes_item, /*sq_item*/
999 0, /*sq_slice*/
1000 0, /*sq_ass_item*/
1001 0, /*sq_ass_slice*/
1002 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001003};
1004
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001005static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001006 (lenfunc)bytes_length,
1007 (binaryfunc)bytes_subscript,
1008 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001009};
1010
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001011static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001012 (getbufferproc)bytes_buffer_getbuffer,
1013 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001014};
1015
1016
1017#define LEFTSTRIP 0
1018#define RIGHTSTRIP 1
1019#define BOTHSTRIP 2
1020
1021/* Arrays indexed by above */
1022static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1023
1024#define STRIPNAME(i) (stripformat[i]+3)
1025
Neal Norwitz6968b052007-02-27 19:02:19 +00001026PyDoc_STRVAR(split__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001027"B.split(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001028\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001029Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001030If sep is not specified or is None, B is split on ASCII whitespace\n\
1031characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001032If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001033
1034static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001035bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +00001036{
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001037 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001038 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1039 Py_ssize_t maxsplit = -1;
1040 const char *s = PyBytes_AS_STRING(self), *sub;
1041 Py_buffer vsub;
1042 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001043
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001044 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split",
1045 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001046 return NULL;
1047 if (maxsplit < 0)
1048 maxsplit = PY_SSIZE_T_MAX;
1049 if (subobj == Py_None)
1050 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1051 if (_getbuffer(subobj, &vsub) < 0)
1052 return NULL;
1053 sub = vsub.buf;
1054 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001055
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001056 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1057 PyBuffer_Release(&vsub);
1058 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001059}
1060
Neal Norwitz6968b052007-02-27 19:02:19 +00001061PyDoc_STRVAR(partition__doc__,
1062"B.partition(sep) -> (head, sep, tail)\n\
1063\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001064Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001065the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001066found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001067
1068static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001069bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001070{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001071 const char *sep;
1072 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001073
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001074 if (PyBytes_Check(sep_obj)) {
1075 sep = PyBytes_AS_STRING(sep_obj);
1076 sep_len = PyBytes_GET_SIZE(sep_obj);
1077 }
1078 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1079 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001080
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001081 return stringlib_partition(
1082 (PyObject*) self,
1083 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1084 sep_obj, sep, sep_len
1085 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001086}
1087
1088PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001089"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001090\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001091Search for the separator sep in B, starting at the end of B,\n\
1092and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001093part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001094bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001095
1096static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001097bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001098{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001099 const char *sep;
1100 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001101
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001102 if (PyBytes_Check(sep_obj)) {
1103 sep = PyBytes_AS_STRING(sep_obj);
1104 sep_len = PyBytes_GET_SIZE(sep_obj);
1105 }
1106 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1107 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001108
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001109 return stringlib_rpartition(
1110 (PyObject*) self,
1111 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1112 sep_obj, sep, sep_len
1113 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001114}
1115
Neal Norwitz6968b052007-02-27 19:02:19 +00001116PyDoc_STRVAR(rsplit__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001117"B.rsplit(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001118\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001119Return a list of the sections in B, using sep as the delimiter,\n\
1120starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001121If sep is not given, B is split on ASCII whitespace characters\n\
1122(space, tab, return, newline, formfeed, vertical tab).\n\
1123If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001124
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001125
Neal Norwitz6968b052007-02-27 19:02:19 +00001126static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001127bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +00001128{
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001129 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001130 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1131 Py_ssize_t maxsplit = -1;
1132 const char *s = PyBytes_AS_STRING(self), *sub;
1133 Py_buffer vsub;
1134 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001135
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001136 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit",
1137 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001138 return NULL;
1139 if (maxsplit < 0)
1140 maxsplit = PY_SSIZE_T_MAX;
1141 if (subobj == Py_None)
1142 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1143 if (_getbuffer(subobj, &vsub) < 0)
1144 return NULL;
1145 sub = vsub.buf;
1146 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001147
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001148 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1149 PyBuffer_Release(&vsub);
1150 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001151}
1152
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001153
1154PyDoc_STRVAR(join__doc__,
1155"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001156\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001157Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001158Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1159
Neal Norwitz6968b052007-02-27 19:02:19 +00001160static PyObject *
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001161bytes_join(PyObject *self, PyObject *iterable)
Neal Norwitz6968b052007-02-27 19:02:19 +00001162{
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001163 return stringlib_bytes_join(self, iterable);
Neal Norwitz6968b052007-02-27 19:02:19 +00001164}
1165
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001166PyObject *
1167_PyBytes_Join(PyObject *sep, PyObject *x)
1168{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001169 assert(sep != NULL && PyBytes_Check(sep));
1170 assert(x != NULL);
1171 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001172}
1173
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001174/* helper macro to fixup start/end slice values */
1175#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001176 if (end > len) \
1177 end = len; \
1178 else if (end < 0) { \
1179 end += len; \
1180 if (end < 0) \
1181 end = 0; \
1182 } \
1183 if (start < 0) { \
1184 start += len; \
1185 if (start < 0) \
1186 start = 0; \
1187 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001188
1189Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001190bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001191{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001192 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001193 char byte;
1194 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001195 const char *sub;
1196 Py_ssize_t sub_len;
1197 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001198 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001199
Antoine Pitrouac65d962011-10-20 23:54:17 +02001200 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1201 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001202 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001203
Antoine Pitrouac65d962011-10-20 23:54:17 +02001204 if (subobj) {
1205 if (_getbuffer(subobj, &subbuf) < 0)
1206 return -2;
1207
1208 sub = subbuf.buf;
1209 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001210 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001211 else {
1212 sub = &byte;
1213 sub_len = 1;
1214 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001215
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001216 if (dir > 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001217 res = stringlib_find_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001218 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1219 sub, sub_len, start, end);
1220 else
Antoine Pitrouac65d962011-10-20 23:54:17 +02001221 res = stringlib_rfind_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001222 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1223 sub, sub_len, start, end);
Antoine Pitrouac65d962011-10-20 23:54:17 +02001224
1225 if (subobj)
1226 PyBuffer_Release(&subbuf);
1227
1228 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001229}
1230
1231
1232PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001233"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001234\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001235Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001236such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001237arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001238\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001239Return -1 on failure.");
1240
Neal Norwitz6968b052007-02-27 19:02:19 +00001241static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001242bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001243{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001244 Py_ssize_t result = bytes_find_internal(self, args, +1);
1245 if (result == -2)
1246 return NULL;
1247 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001248}
1249
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001250
1251PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001252"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001253\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001254Like B.find() but raise ValueError when the substring is not found.");
1255
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001256static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001257bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001258{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001259 Py_ssize_t result = bytes_find_internal(self, args, +1);
1260 if (result == -2)
1261 return NULL;
1262 if (result == -1) {
1263 PyErr_SetString(PyExc_ValueError,
1264 "substring not found");
1265 return NULL;
1266 }
1267 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001268}
1269
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001270
1271PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001272"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001273\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001274Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001275such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001276arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001277\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001278Return -1 on failure.");
1279
Neal Norwitz6968b052007-02-27 19:02:19 +00001280static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001281bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001282{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001283 Py_ssize_t result = bytes_find_internal(self, args, -1);
1284 if (result == -2)
1285 return NULL;
1286 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001287}
1288
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001289
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001290PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001291"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001292\n\
1293Like B.rfind() but raise ValueError when the substring is not found.");
1294
1295static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001296bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001297{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001298 Py_ssize_t result = bytes_find_internal(self, args, -1);
1299 if (result == -2)
1300 return NULL;
1301 if (result == -1) {
1302 PyErr_SetString(PyExc_ValueError,
1303 "substring not found");
1304 return NULL;
1305 }
1306 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001307}
1308
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001309
1310Py_LOCAL_INLINE(PyObject *)
1311do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001312{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001313 Py_buffer vsep;
1314 char *s = PyBytes_AS_STRING(self);
1315 Py_ssize_t len = PyBytes_GET_SIZE(self);
1316 char *sep;
1317 Py_ssize_t seplen;
1318 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001319
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001320 if (_getbuffer(sepobj, &vsep) < 0)
1321 return NULL;
1322 sep = vsep.buf;
1323 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001324
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001325 i = 0;
1326 if (striptype != RIGHTSTRIP) {
1327 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1328 i++;
1329 }
1330 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001331
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001332 j = len;
1333 if (striptype != LEFTSTRIP) {
1334 do {
1335 j--;
1336 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1337 j++;
1338 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001339
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001340 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001341
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001342 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1343 Py_INCREF(self);
1344 return (PyObject*)self;
1345 }
1346 else
1347 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001348}
1349
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001350
1351Py_LOCAL_INLINE(PyObject *)
1352do_strip(PyBytesObject *self, int striptype)
1353{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001354 char *s = PyBytes_AS_STRING(self);
1355 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001356
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001357 i = 0;
1358 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001359 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001360 i++;
1361 }
1362 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001363
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001364 j = len;
1365 if (striptype != LEFTSTRIP) {
1366 do {
1367 j--;
David Malcolm96960882010-11-05 17:23:41 +00001368 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001369 j++;
1370 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001371
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001372 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1373 Py_INCREF(self);
1374 return (PyObject*)self;
1375 }
1376 else
1377 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001378}
1379
1380
1381Py_LOCAL_INLINE(PyObject *)
1382do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1383{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001384 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001385
Serhiy Storchakac6792272013-10-19 21:03:34 +03001386 if (!PyArg_ParseTuple(args, stripformat[striptype], &sep))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001387 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001388
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001389 if (sep != NULL && sep != Py_None) {
1390 return do_xstrip(self, striptype, sep);
1391 }
1392 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001393}
1394
1395
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001396PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001397"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001398\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001399Strip leading and trailing bytes contained in the argument.\n\
Georg Brandlbeca27a2012-01-22 21:31:21 +01001400If the argument is omitted, strip leading and trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001401static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001402bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001403{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001404 if (PyTuple_GET_SIZE(args) == 0)
1405 return do_strip(self, BOTHSTRIP); /* Common case */
1406 else
1407 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001408}
1409
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001410
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001411PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001412"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001413\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001414Strip leading bytes contained in the argument.\n\
1415If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001416static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001417bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001418{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001419 if (PyTuple_GET_SIZE(args) == 0)
1420 return do_strip(self, LEFTSTRIP); /* Common case */
1421 else
1422 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001423}
1424
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001425
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001426PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001427"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001428\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001429Strip trailing bytes contained in the argument.\n\
1430If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001431static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001432bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001433{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001434 if (PyTuple_GET_SIZE(args) == 0)
1435 return do_strip(self, RIGHTSTRIP); /* Common case */
1436 else
1437 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001438}
Neal Norwitz6968b052007-02-27 19:02:19 +00001439
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001440
1441PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001442"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001443\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001444Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001445string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001446as in slice notation.");
1447
1448static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001449bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001450{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001451 PyObject *sub_obj;
1452 const char *str = PyBytes_AS_STRING(self), *sub;
1453 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001454 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001455 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001456
Antoine Pitrouac65d962011-10-20 23:54:17 +02001457 Py_buffer vsub;
1458 PyObject *count_obj;
1459
1460 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
1461 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001462 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001463
Antoine Pitrouac65d962011-10-20 23:54:17 +02001464 if (sub_obj) {
1465 if (_getbuffer(sub_obj, &vsub) < 0)
1466 return NULL;
1467
1468 sub = vsub.buf;
1469 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001470 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001471 else {
1472 sub = &byte;
1473 sub_len = 1;
1474 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001475
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001476 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001477
Antoine Pitrouac65d962011-10-20 23:54:17 +02001478 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001479 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1480 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02001481
1482 if (sub_obj)
1483 PyBuffer_Release(&vsub);
1484
1485 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001486}
1487
1488
1489PyDoc_STRVAR(translate__doc__,
1490"B.translate(table[, deletechars]) -> bytes\n\
1491\n\
1492Return a copy of B, where all characters occurring in the\n\
1493optional argument deletechars are removed, and the remaining\n\
1494characters have been mapped through the given translation\n\
1495table, which must be a bytes object of length 256.");
1496
1497static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001498bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001499{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001500 char *input, *output;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001501 const char *table;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001502 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001503 PyObject *input_obj = (PyObject*)self;
1504 const char *output_start, *del_table=NULL;
1505 Py_ssize_t inlen, tablen, dellen = 0;
1506 PyObject *result;
1507 int trans_table[256];
1508 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001509
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001510 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1511 &tableobj, &delobj))
1512 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001513
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001514 if (PyBytes_Check(tableobj)) {
1515 table = PyBytes_AS_STRING(tableobj);
1516 tablen = PyBytes_GET_SIZE(tableobj);
1517 }
1518 else if (tableobj == Py_None) {
1519 table = NULL;
1520 tablen = 256;
1521 }
1522 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1523 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001524
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001525 if (tablen != 256) {
1526 PyErr_SetString(PyExc_ValueError,
1527 "translation table must be 256 characters long");
1528 return NULL;
1529 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001530
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001531 if (delobj != NULL) {
1532 if (PyBytes_Check(delobj)) {
1533 del_table = PyBytes_AS_STRING(delobj);
1534 dellen = PyBytes_GET_SIZE(delobj);
1535 }
1536 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1537 return NULL;
1538 }
1539 else {
1540 del_table = NULL;
1541 dellen = 0;
1542 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001543
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001544 inlen = PyBytes_GET_SIZE(input_obj);
1545 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1546 if (result == NULL)
1547 return NULL;
1548 output_start = output = PyBytes_AsString(result);
1549 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001550
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001551 if (dellen == 0 && table != NULL) {
1552 /* If no deletions are required, use faster code */
1553 for (i = inlen; --i >= 0; ) {
1554 c = Py_CHARMASK(*input++);
1555 if (Py_CHARMASK((*output++ = table[c])) != c)
1556 changed = 1;
1557 }
1558 if (changed || !PyBytes_CheckExact(input_obj))
1559 return result;
1560 Py_DECREF(result);
1561 Py_INCREF(input_obj);
1562 return input_obj;
1563 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001564
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001565 if (table == NULL) {
1566 for (i = 0; i < 256; i++)
1567 trans_table[i] = Py_CHARMASK(i);
1568 } else {
1569 for (i = 0; i < 256; i++)
1570 trans_table[i] = Py_CHARMASK(table[i]);
1571 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001572
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001573 for (i = 0; i < dellen; i++)
1574 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001575
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001576 for (i = inlen; --i >= 0; ) {
1577 c = Py_CHARMASK(*input++);
1578 if (trans_table[c] != -1)
1579 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1580 continue;
1581 changed = 1;
1582 }
1583 if (!changed && PyBytes_CheckExact(input_obj)) {
1584 Py_DECREF(result);
1585 Py_INCREF(input_obj);
1586 return input_obj;
1587 }
1588 /* Fix the size of the resulting string */
1589 if (inlen > 0)
1590 _PyBytes_Resize(&result, output - output_start);
1591 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001592}
1593
1594
Georg Brandlabc38772009-04-12 15:51:51 +00001595static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001596bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001597{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001598 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001599}
1600
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001601/* find and count characters and substrings */
1602
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001603#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001604 ((char *)memchr((const void *)(target), c, target_len))
1605
1606/* String ops must return a string. */
1607/* If the object is subclass of string, create a copy */
1608Py_LOCAL(PyBytesObject *)
1609return_self(PyBytesObject *self)
1610{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001611 if (PyBytes_CheckExact(self)) {
1612 Py_INCREF(self);
1613 return self;
1614 }
1615 return (PyBytesObject *)PyBytes_FromStringAndSize(
1616 PyBytes_AS_STRING(self),
1617 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001618}
1619
1620Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001621countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001622{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001623 Py_ssize_t count=0;
1624 const char *start=target;
1625 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001626
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001627 while ( (start=findchar(start, end-start, c)) != NULL ) {
1628 count++;
1629 if (count >= maxcount)
1630 break;
1631 start += 1;
1632 }
1633 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001634}
1635
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001636
1637/* Algorithms for different cases of string replacement */
1638
1639/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1640Py_LOCAL(PyBytesObject *)
1641replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001642 const char *to_s, Py_ssize_t to_len,
1643 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001644{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001645 char *self_s, *result_s;
1646 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001647 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001648 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001649
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001650 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001651
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001652 /* 1 at the end plus 1 after every character;
1653 count = min(maxcount, self_len + 1) */
1654 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001655 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001656 else
1657 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1658 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001659
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001660 /* Check for overflow */
1661 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001662 assert(count > 0);
1663 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001664 PyErr_SetString(PyExc_OverflowError,
1665 "replacement bytes are too long");
1666 return NULL;
1667 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001668 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001669
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001670 if (! (result = (PyBytesObject *)
1671 PyBytes_FromStringAndSize(NULL, result_len)) )
1672 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001673
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001674 self_s = PyBytes_AS_STRING(self);
1675 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001676
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001677 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001678
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001679 /* Lay the first one down (guaranteed this will occur) */
1680 Py_MEMCPY(result_s, to_s, to_len);
1681 result_s += to_len;
1682 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001684 for (i=0; i<count; i++) {
1685 *result_s++ = *self_s++;
1686 Py_MEMCPY(result_s, to_s, to_len);
1687 result_s += to_len;
1688 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001689
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001690 /* Copy the rest of the original string */
1691 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001692
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001693 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001694}
1695
1696/* Special case for deleting a single character */
1697/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1698Py_LOCAL(PyBytesObject *)
1699replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001700 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001701{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001702 char *self_s, *result_s;
1703 char *start, *next, *end;
1704 Py_ssize_t self_len, result_len;
1705 Py_ssize_t count;
1706 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001707
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001708 self_len = PyBytes_GET_SIZE(self);
1709 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001710
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001711 count = countchar(self_s, self_len, from_c, maxcount);
1712 if (count == 0) {
1713 return return_self(self);
1714 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001715
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001716 result_len = self_len - count; /* from_len == 1 */
1717 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001718
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001719 if ( (result = (PyBytesObject *)
1720 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1721 return NULL;
1722 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001723
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001724 start = self_s;
1725 end = self_s + self_len;
1726 while (count-- > 0) {
1727 next = findchar(start, end-start, from_c);
1728 if (next == NULL)
1729 break;
1730 Py_MEMCPY(result_s, start, next-start);
1731 result_s += (next-start);
1732 start = next+1;
1733 }
1734 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001735
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001736 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001737}
1738
1739/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1740
1741Py_LOCAL(PyBytesObject *)
1742replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001743 const char *from_s, Py_ssize_t from_len,
1744 Py_ssize_t maxcount) {
1745 char *self_s, *result_s;
1746 char *start, *next, *end;
1747 Py_ssize_t self_len, result_len;
1748 Py_ssize_t count, offset;
1749 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001750
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001751 self_len = PyBytes_GET_SIZE(self);
1752 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001753
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001754 count = stringlib_count(self_s, self_len,
1755 from_s, from_len,
1756 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001757
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001758 if (count == 0) {
1759 /* no matches */
1760 return return_self(self);
1761 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001762
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001763 result_len = self_len - (count * from_len);
1764 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001765
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001766 if ( (result = (PyBytesObject *)
1767 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1768 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001769
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001770 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001771
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001772 start = self_s;
1773 end = self_s + self_len;
1774 while (count-- > 0) {
1775 offset = stringlib_find(start, end-start,
1776 from_s, from_len,
1777 0);
1778 if (offset == -1)
1779 break;
1780 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001781
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001782 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001783
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001784 result_s += (next-start);
1785 start = next+from_len;
1786 }
1787 Py_MEMCPY(result_s, start, end-start);
1788 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001789}
1790
1791/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1792Py_LOCAL(PyBytesObject *)
1793replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001794 char from_c, char to_c,
1795 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001796{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001797 char *self_s, *result_s, *start, *end, *next;
1798 Py_ssize_t self_len;
1799 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001800
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001801 /* The result string will be the same size */
1802 self_s = PyBytes_AS_STRING(self);
1803 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001804
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001805 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001806
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001807 if (next == NULL) {
1808 /* No matches; return the original string */
1809 return return_self(self);
1810 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001811
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001812 /* Need to make a new string */
1813 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1814 if (result == NULL)
1815 return NULL;
1816 result_s = PyBytes_AS_STRING(result);
1817 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001818
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001819 /* change everything in-place, starting with this one */
1820 start = result_s + (next-self_s);
1821 *start = to_c;
1822 start++;
1823 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001824
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001825 while (--maxcount > 0) {
1826 next = findchar(start, end-start, from_c);
1827 if (next == NULL)
1828 break;
1829 *next = to_c;
1830 start = next+1;
1831 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001832
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001833 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001834}
1835
1836/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1837Py_LOCAL(PyBytesObject *)
1838replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001839 const char *from_s, Py_ssize_t from_len,
1840 const char *to_s, Py_ssize_t to_len,
1841 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001842{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001843 char *result_s, *start, *end;
1844 char *self_s;
1845 Py_ssize_t self_len, offset;
1846 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001847
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001848 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001849
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001850 self_s = PyBytes_AS_STRING(self);
1851 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001852
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001853 offset = stringlib_find(self_s, self_len,
1854 from_s, from_len,
1855 0);
1856 if (offset == -1) {
1857 /* No matches; return the original string */
1858 return return_self(self);
1859 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001860
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001861 /* Need to make a new string */
1862 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1863 if (result == NULL)
1864 return NULL;
1865 result_s = PyBytes_AS_STRING(result);
1866 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001867
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001868 /* change everything in-place, starting with this one */
1869 start = result_s + offset;
1870 Py_MEMCPY(start, to_s, from_len);
1871 start += from_len;
1872 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001873
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001874 while ( --maxcount > 0) {
1875 offset = stringlib_find(start, end-start,
1876 from_s, from_len,
1877 0);
1878 if (offset==-1)
1879 break;
1880 Py_MEMCPY(start+offset, to_s, from_len);
1881 start += offset+from_len;
1882 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001883
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001884 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001885}
1886
1887/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1888Py_LOCAL(PyBytesObject *)
1889replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001890 char from_c,
1891 const char *to_s, Py_ssize_t to_len,
1892 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001893{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001894 char *self_s, *result_s;
1895 char *start, *next, *end;
1896 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001897 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001898 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001899
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001900 self_s = PyBytes_AS_STRING(self);
1901 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001902
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001903 count = countchar(self_s, self_len, from_c, maxcount);
1904 if (count == 0) {
1905 /* no matches, return unchanged */
1906 return return_self(self);
1907 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001908
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001909 /* use the difference between current and new, hence the "-1" */
1910 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001911 assert(count > 0);
1912 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001913 PyErr_SetString(PyExc_OverflowError,
1914 "replacement bytes are too long");
1915 return NULL;
1916 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001917 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001918
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001919 if ( (result = (PyBytesObject *)
1920 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1921 return NULL;
1922 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001923
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001924 start = self_s;
1925 end = self_s + self_len;
1926 while (count-- > 0) {
1927 next = findchar(start, end-start, from_c);
1928 if (next == NULL)
1929 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001930
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001931 if (next == start) {
1932 /* replace with the 'to' */
1933 Py_MEMCPY(result_s, to_s, to_len);
1934 result_s += to_len;
1935 start += 1;
1936 } else {
1937 /* copy the unchanged old then the 'to' */
1938 Py_MEMCPY(result_s, start, next-start);
1939 result_s += (next-start);
1940 Py_MEMCPY(result_s, to_s, to_len);
1941 result_s += to_len;
1942 start = next+1;
1943 }
1944 }
1945 /* Copy the remainder of the remaining string */
1946 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001947
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001948 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001949}
1950
1951/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1952Py_LOCAL(PyBytesObject *)
1953replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001954 const char *from_s, Py_ssize_t from_len,
1955 const char *to_s, Py_ssize_t to_len,
1956 Py_ssize_t maxcount) {
1957 char *self_s, *result_s;
1958 char *start, *next, *end;
1959 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001960 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001961 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001962
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001963 self_s = PyBytes_AS_STRING(self);
1964 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001965
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001966 count = stringlib_count(self_s, self_len,
1967 from_s, from_len,
1968 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001969
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001970 if (count == 0) {
1971 /* no matches, return unchanged */
1972 return return_self(self);
1973 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001974
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001975 /* Check for overflow */
1976 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001977 assert(count > 0);
1978 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001979 PyErr_SetString(PyExc_OverflowError,
1980 "replacement bytes are too long");
1981 return NULL;
1982 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001983 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001984
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001985 if ( (result = (PyBytesObject *)
1986 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1987 return NULL;
1988 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001989
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001990 start = self_s;
1991 end = self_s + self_len;
1992 while (count-- > 0) {
1993 offset = stringlib_find(start, end-start,
1994 from_s, from_len,
1995 0);
1996 if (offset == -1)
1997 break;
1998 next = start+offset;
1999 if (next == start) {
2000 /* replace with the 'to' */
2001 Py_MEMCPY(result_s, to_s, to_len);
2002 result_s += to_len;
2003 start += from_len;
2004 } else {
2005 /* copy the unchanged old then the 'to' */
2006 Py_MEMCPY(result_s, start, next-start);
2007 result_s += (next-start);
2008 Py_MEMCPY(result_s, to_s, to_len);
2009 result_s += to_len;
2010 start = next+from_len;
2011 }
2012 }
2013 /* Copy the remainder of the remaining string */
2014 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002015
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002016 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002017}
2018
2019
2020Py_LOCAL(PyBytesObject *)
2021replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002022 const char *from_s, Py_ssize_t from_len,
2023 const char *to_s, Py_ssize_t to_len,
2024 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002025{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002026 if (maxcount < 0) {
2027 maxcount = PY_SSIZE_T_MAX;
2028 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2029 /* nothing to do; return the original string */
2030 return return_self(self);
2031 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002032
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002033 if (maxcount == 0 ||
2034 (from_len == 0 && to_len == 0)) {
2035 /* nothing to do; return the original string */
2036 return return_self(self);
2037 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002038
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002039 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002040
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002041 if (from_len == 0) {
2042 /* insert the 'to' string everywhere. */
2043 /* >>> "Python".replace("", ".") */
2044 /* '.P.y.t.h.o.n.' */
2045 return replace_interleave(self, to_s, to_len, maxcount);
2046 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002047
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002048 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2049 /* point for an empty self string to generate a non-empty string */
2050 /* Special case so the remaining code always gets a non-empty string */
2051 if (PyBytes_GET_SIZE(self) == 0) {
2052 return return_self(self);
2053 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002054
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002055 if (to_len == 0) {
2056 /* delete all occurrences of 'from' string */
2057 if (from_len == 1) {
2058 return replace_delete_single_character(
2059 self, from_s[0], maxcount);
2060 } else {
2061 return replace_delete_substring(self, from_s,
2062 from_len, maxcount);
2063 }
2064 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002065
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002066 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002067
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002068 if (from_len == to_len) {
2069 if (from_len == 1) {
2070 return replace_single_character_in_place(
2071 self,
2072 from_s[0],
2073 to_s[0],
2074 maxcount);
2075 } else {
2076 return replace_substring_in_place(
2077 self, from_s, from_len, to_s, to_len,
2078 maxcount);
2079 }
2080 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002082 /* Otherwise use the more generic algorithms */
2083 if (from_len == 1) {
2084 return replace_single_character(self, from_s[0],
2085 to_s, to_len, maxcount);
2086 } else {
2087 /* len('from')>=2, len('to')>=1 */
2088 return replace_substring(self, from_s, from_len, to_s, to_len,
2089 maxcount);
2090 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002091}
2092
2093PyDoc_STRVAR(replace__doc__,
2094"B.replace(old, new[, count]) -> bytes\n\
2095\n\
2096Return a copy of B with all occurrences of subsection\n\
2097old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002098given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002099
2100static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002101bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002102{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002103 Py_ssize_t count = -1;
2104 PyObject *from, *to;
2105 const char *from_s, *to_s;
2106 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002107
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002108 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2109 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002110
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002111 if (PyBytes_Check(from)) {
2112 from_s = PyBytes_AS_STRING(from);
2113 from_len = PyBytes_GET_SIZE(from);
2114 }
2115 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2116 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002117
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002118 if (PyBytes_Check(to)) {
2119 to_s = PyBytes_AS_STRING(to);
2120 to_len = PyBytes_GET_SIZE(to);
2121 }
2122 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2123 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002124
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002125 return (PyObject *)replace((PyBytesObject *) self,
2126 from_s, from_len,
2127 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002128}
2129
2130/** End DALKE **/
2131
2132/* Matches the end (direction >= 0) or start (direction < 0) of self
2133 * against substr, using the start and end arguments. Returns
2134 * -1 on error, 0 if not found and 1 if found.
2135 */
2136Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002137_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002138 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002139{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002140 Py_ssize_t len = PyBytes_GET_SIZE(self);
2141 Py_ssize_t slen;
2142 const char* sub;
2143 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002145 if (PyBytes_Check(substr)) {
2146 sub = PyBytes_AS_STRING(substr);
2147 slen = PyBytes_GET_SIZE(substr);
2148 }
2149 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2150 return -1;
2151 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002152
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002153 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002154
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002155 if (direction < 0) {
2156 /* startswith */
2157 if (start+slen > len)
2158 return 0;
2159 } else {
2160 /* endswith */
2161 if (end-start < slen || start > len)
2162 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002163
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002164 if (end-slen > start)
2165 start = end - slen;
2166 }
2167 if (end-start >= slen)
2168 return ! memcmp(str+start, sub, slen);
2169 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002170}
2171
2172
2173PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002174"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002175\n\
2176Return True if B starts with the specified prefix, False otherwise.\n\
2177With optional start, test B beginning at that position.\n\
2178With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002179prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002180
2181static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002182bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002183{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002184 Py_ssize_t start = 0;
2185 Py_ssize_t end = PY_SSIZE_T_MAX;
2186 PyObject *subobj;
2187 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002188
Jesus Ceaac451502011-04-20 17:09:23 +02002189 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002190 return NULL;
2191 if (PyTuple_Check(subobj)) {
2192 Py_ssize_t i;
2193 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2194 result = _bytes_tailmatch(self,
2195 PyTuple_GET_ITEM(subobj, i),
2196 start, end, -1);
2197 if (result == -1)
2198 return NULL;
2199 else if (result) {
2200 Py_RETURN_TRUE;
2201 }
2202 }
2203 Py_RETURN_FALSE;
2204 }
2205 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002206 if (result == -1) {
2207 if (PyErr_ExceptionMatches(PyExc_TypeError))
2208 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2209 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002210 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002211 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002212 else
2213 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002214}
2215
2216
2217PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002218"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002219\n\
2220Return True if B ends with the specified suffix, False otherwise.\n\
2221With optional start, test B beginning at that position.\n\
2222With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002223suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002224
2225static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002226bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002227{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002228 Py_ssize_t start = 0;
2229 Py_ssize_t end = PY_SSIZE_T_MAX;
2230 PyObject *subobj;
2231 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002232
Jesus Ceaac451502011-04-20 17:09:23 +02002233 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002234 return NULL;
2235 if (PyTuple_Check(subobj)) {
2236 Py_ssize_t i;
2237 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2238 result = _bytes_tailmatch(self,
2239 PyTuple_GET_ITEM(subobj, i),
2240 start, end, +1);
2241 if (result == -1)
2242 return NULL;
2243 else if (result) {
2244 Py_RETURN_TRUE;
2245 }
2246 }
2247 Py_RETURN_FALSE;
2248 }
2249 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002250 if (result == -1) {
2251 if (PyErr_ExceptionMatches(PyExc_TypeError))
2252 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2253 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002254 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002255 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002256 else
2257 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002258}
2259
2260
2261PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002262"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002263\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002264Decode B using the codec registered for encoding. Default encoding\n\
2265is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002266handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2267a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002268as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002269able to handle UnicodeDecodeErrors.");
2270
2271static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002272bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002273{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002274 const char *encoding = NULL;
2275 const char *errors = NULL;
2276 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002277
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002278 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2279 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002280 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002281}
2282
Guido van Rossum20188312006-05-05 15:15:40 +00002283
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002284PyDoc_STRVAR(splitlines__doc__,
2285"B.splitlines([keepends]) -> list of lines\n\
2286\n\
2287Return a list of the lines in B, breaking at line boundaries.\n\
2288Line breaks are not included in the resulting list unless keepends\n\
2289is given and true.");
2290
2291static PyObject*
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002292bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002293{
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002294 static char *kwlist[] = {"keepends", 0};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002295 int keepends = 0;
2296
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002297 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines",
2298 kwlist, &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002299 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002300
2301 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002302 (PyObject*) self, PyBytes_AS_STRING(self),
2303 PyBytes_GET_SIZE(self), keepends
2304 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002305}
2306
2307
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002308PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002309"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002310\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002311Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002312Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002313Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002314
2315static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002316hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002317{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002318 if (c >= 128)
2319 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002320 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002321 return c - '0';
2322 else {
David Malcolm96960882010-11-05 17:23:41 +00002323 if (Py_ISUPPER(c))
2324 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002325 if (c >= 'a' && c <= 'f')
2326 return c - 'a' + 10;
2327 }
2328 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002329}
2330
2331static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002332bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002333{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002334 PyObject *newstring, *hexobj;
2335 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002336 Py_ssize_t hexlen, byteslen, i, j;
2337 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002338 void *data;
2339 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002340
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002341 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2342 return NULL;
2343 assert(PyUnicode_Check(hexobj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002344 if (PyUnicode_READY(hexobj))
2345 return NULL;
2346 kind = PyUnicode_KIND(hexobj);
2347 data = PyUnicode_DATA(hexobj);
2348 hexlen = PyUnicode_GET_LENGTH(hexobj);
2349
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002350 byteslen = hexlen/2; /* This overestimates if there are spaces */
2351 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2352 if (!newstring)
2353 return NULL;
2354 buf = PyBytes_AS_STRING(newstring);
2355 for (i = j = 0; i < hexlen; i += 2) {
2356 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002357 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002358 i++;
2359 if (i >= hexlen)
2360 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002361 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
2362 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002363 if (top == -1 || bot == -1) {
2364 PyErr_Format(PyExc_ValueError,
2365 "non-hexadecimal number found in "
2366 "fromhex() arg at position %zd", i);
2367 goto error;
2368 }
2369 buf[j++] = (top << 4) + bot;
2370 }
2371 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2372 goto error;
2373 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002374
2375 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002376 Py_XDECREF(newstring);
2377 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002378}
2379
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002380PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002381"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002382
2383static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002384bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002385{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002386 Py_ssize_t res;
2387 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2388 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002389}
2390
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002391
2392static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002393bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002394{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002395 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002396}
2397
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002398
2399static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002400bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002401 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2402 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2403 _Py_capitalize__doc__},
2404 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2405 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2406 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2407 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2408 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02002409 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002410 expandtabs__doc__},
2411 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2412 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2413 fromhex_doc},
2414 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2415 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2416 _Py_isalnum__doc__},
2417 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2418 _Py_isalpha__doc__},
2419 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2420 _Py_isdigit__doc__},
2421 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2422 _Py_islower__doc__},
2423 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2424 _Py_isspace__doc__},
2425 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2426 _Py_istitle__doc__},
2427 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2428 _Py_isupper__doc__},
2429 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2430 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2431 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2432 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2433 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2434 _Py_maketrans__doc__},
2435 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2436 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2437 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2438 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2439 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2440 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2441 rpartition__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002442 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002443 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002444 {"split", (PyCFunction)bytes_split, METH_VARARGS | METH_KEYWORDS, split__doc__},
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002445 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002446 splitlines__doc__},
2447 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2448 startswith__doc__},
2449 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2450 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2451 _Py_swapcase__doc__},
2452 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2453 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2454 translate__doc__},
2455 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2456 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2457 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2458 sizeof__doc__},
2459 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002460};
2461
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002462static PyObject *
2463str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2464
2465static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002466bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002467{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002468 PyObject *x = NULL;
2469 const char *encoding = NULL;
2470 const char *errors = NULL;
2471 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002472 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002473 Py_ssize_t size;
2474 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002475 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002476
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002477 if (type != &PyBytes_Type)
2478 return str_subtype_new(type, args, kwds);
2479 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2480 &encoding, &errors))
2481 return NULL;
2482 if (x == NULL) {
2483 if (encoding != NULL || errors != NULL) {
2484 PyErr_SetString(PyExc_TypeError,
2485 "encoding or errors without sequence "
2486 "argument");
2487 return NULL;
2488 }
2489 return PyBytes_FromString("");
2490 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002491
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002492 if (PyUnicode_Check(x)) {
2493 /* Encode via the codec registry */
2494 if (encoding == NULL) {
2495 PyErr_SetString(PyExc_TypeError,
2496 "string argument without an encoding");
2497 return NULL;
2498 }
2499 new = PyUnicode_AsEncodedString(x, encoding, errors);
2500 if (new == NULL)
2501 return NULL;
2502 assert(PyBytes_Check(new));
2503 return new;
2504 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002505
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02002506 /* If it's not unicode, there can't be encoding or errors */
2507 if (encoding != NULL || errors != NULL) {
2508 PyErr_SetString(PyExc_TypeError,
2509 "encoding or errors without a string argument");
2510 return NULL;
2511 }
2512
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002513 /* We'd like to call PyObject_Bytes here, but we need to check for an
2514 integer argument before deferring to PyBytes_FromObject, something
2515 PyObject_Bytes doesn't do. */
2516 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2517 if (func != NULL) {
2518 new = PyObject_CallFunctionObjArgs(func, NULL);
2519 Py_DECREF(func);
2520 if (new == NULL)
2521 return NULL;
2522 if (!PyBytes_Check(new)) {
2523 PyErr_Format(PyExc_TypeError,
2524 "__bytes__ returned non-bytes (type %.200s)",
2525 Py_TYPE(new)->tp_name);
2526 Py_DECREF(new);
2527 return NULL;
2528 }
2529 return new;
2530 }
2531 else if (PyErr_Occurred())
2532 return NULL;
2533
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002534 /* Is it an integer? */
2535 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2536 if (size == -1 && PyErr_Occurred()) {
2537 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2538 return NULL;
2539 PyErr_Clear();
2540 }
2541 else if (size < 0) {
2542 PyErr_SetString(PyExc_ValueError, "negative count");
2543 return NULL;
2544 }
2545 else {
2546 new = PyBytes_FromStringAndSize(NULL, size);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002547 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002548 return NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002549 if (size > 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002550 memset(((PyBytesObject*)new)->ob_sval, 0, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002551 return new;
2552 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002553
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002554 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002555}
2556
2557PyObject *
2558PyBytes_FromObject(PyObject *x)
2559{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002560 PyObject *new, *it;
2561 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002562
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002563 if (x == NULL) {
2564 PyErr_BadInternalCall();
2565 return NULL;
2566 }
Larry Hastingsca28e992012-05-24 22:58:30 -07002567
2568 if (PyBytes_CheckExact(x)) {
2569 Py_INCREF(x);
2570 return x;
2571 }
2572
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002573 /* Use the modern buffer interface */
2574 if (PyObject_CheckBuffer(x)) {
2575 Py_buffer view;
2576 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2577 return NULL;
2578 new = PyBytes_FromStringAndSize(NULL, view.len);
2579 if (!new)
2580 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002581 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2582 &view, view.len, 'C') < 0)
2583 goto fail;
2584 PyBuffer_Release(&view);
2585 return new;
2586 fail:
2587 Py_XDECREF(new);
2588 PyBuffer_Release(&view);
2589 return NULL;
2590 }
2591 if (PyUnicode_Check(x)) {
2592 PyErr_SetString(PyExc_TypeError,
2593 "cannot convert unicode object to bytes");
2594 return NULL;
2595 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002596
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002597 if (PyList_CheckExact(x)) {
2598 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2599 if (new == NULL)
2600 return NULL;
2601 for (i = 0; i < Py_SIZE(x); i++) {
2602 Py_ssize_t value = PyNumber_AsSsize_t(
2603 PyList_GET_ITEM(x, i), PyExc_ValueError);
2604 if (value == -1 && PyErr_Occurred()) {
2605 Py_DECREF(new);
2606 return NULL;
2607 }
2608 if (value < 0 || value >= 256) {
2609 PyErr_SetString(PyExc_ValueError,
2610 "bytes must be in range(0, 256)");
2611 Py_DECREF(new);
2612 return NULL;
2613 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002614 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002615 }
2616 return new;
2617 }
2618 if (PyTuple_CheckExact(x)) {
2619 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2620 if (new == NULL)
2621 return NULL;
2622 for (i = 0; i < Py_SIZE(x); i++) {
2623 Py_ssize_t value = PyNumber_AsSsize_t(
2624 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2625 if (value == -1 && PyErr_Occurred()) {
2626 Py_DECREF(new);
2627 return NULL;
2628 }
2629 if (value < 0 || value >= 256) {
2630 PyErr_SetString(PyExc_ValueError,
2631 "bytes must be in range(0, 256)");
2632 Py_DECREF(new);
2633 return NULL;
2634 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002635 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002636 }
2637 return new;
2638 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002639
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002640 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002641 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002642 if (size == -1 && PyErr_Occurred())
2643 return NULL;
2644 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2645 returning a shared empty bytes string. This required because we
2646 want to call _PyBytes_Resize() the returned object, which we can
2647 only do on bytes objects with refcount == 1. */
2648 size += 1;
2649 new = PyBytes_FromStringAndSize(NULL, size);
2650 if (new == NULL)
2651 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002652
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002653 /* Get the iterator */
2654 it = PyObject_GetIter(x);
2655 if (it == NULL)
2656 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002657
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002658 /* Run the iterator to exhaustion */
2659 for (i = 0; ; i++) {
2660 PyObject *item;
2661 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002662
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002663 /* Get the next item */
2664 item = PyIter_Next(it);
2665 if (item == NULL) {
2666 if (PyErr_Occurred())
2667 goto error;
2668 break;
2669 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002670
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002671 /* Interpret it as an int (__index__) */
2672 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2673 Py_DECREF(item);
2674 if (value == -1 && PyErr_Occurred())
2675 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002676
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002677 /* Range check */
2678 if (value < 0 || value >= 256) {
2679 PyErr_SetString(PyExc_ValueError,
2680 "bytes must be in range(0, 256)");
2681 goto error;
2682 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002684 /* Append the byte */
2685 if (i >= size) {
2686 size = 2 * size + 1;
2687 if (_PyBytes_Resize(&new, size) < 0)
2688 goto error;
2689 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002690 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002691 }
2692 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002693
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002694 /* Clean up and return success */
2695 Py_DECREF(it);
2696 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002697
2698 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002699 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01002700 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002701 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002702}
2703
2704static PyObject *
2705str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2706{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002707 PyObject *tmp, *pnew;
2708 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002709
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002710 assert(PyType_IsSubtype(type, &PyBytes_Type));
2711 tmp = bytes_new(&PyBytes_Type, args, kwds);
2712 if (tmp == NULL)
2713 return NULL;
2714 assert(PyBytes_CheckExact(tmp));
2715 n = PyBytes_GET_SIZE(tmp);
2716 pnew = type->tp_alloc(type, n);
2717 if (pnew != NULL) {
2718 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2719 PyBytes_AS_STRING(tmp), n+1);
2720 ((PyBytesObject *)pnew)->ob_shash =
2721 ((PyBytesObject *)tmp)->ob_shash;
2722 }
2723 Py_DECREF(tmp);
2724 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002725}
2726
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002727PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002728"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002729bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002730bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002731bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2732bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002733\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002734Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002735 - an iterable yielding integers in range(256)\n\
2736 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002737 - any object implementing the buffer API.\n\
2738 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002739
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002740static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002741
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002742PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002743 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2744 "bytes",
2745 PyBytesObject_SIZE,
2746 sizeof(char),
2747 bytes_dealloc, /* tp_dealloc */
2748 0, /* tp_print */
2749 0, /* tp_getattr */
2750 0, /* tp_setattr */
2751 0, /* tp_reserved */
2752 (reprfunc)bytes_repr, /* tp_repr */
2753 0, /* tp_as_number */
2754 &bytes_as_sequence, /* tp_as_sequence */
2755 &bytes_as_mapping, /* tp_as_mapping */
2756 (hashfunc)bytes_hash, /* tp_hash */
2757 0, /* tp_call */
2758 bytes_str, /* tp_str */
2759 PyObject_GenericGetAttr, /* tp_getattro */
2760 0, /* tp_setattro */
2761 &bytes_as_buffer, /* tp_as_buffer */
2762 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2763 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2764 bytes_doc, /* tp_doc */
2765 0, /* tp_traverse */
2766 0, /* tp_clear */
2767 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2768 0, /* tp_weaklistoffset */
2769 bytes_iter, /* tp_iter */
2770 0, /* tp_iternext */
2771 bytes_methods, /* tp_methods */
2772 0, /* tp_members */
2773 0, /* tp_getset */
2774 &PyBaseObject_Type, /* tp_base */
2775 0, /* tp_dict */
2776 0, /* tp_descr_get */
2777 0, /* tp_descr_set */
2778 0, /* tp_dictoffset */
2779 0, /* tp_init */
2780 0, /* tp_alloc */
2781 bytes_new, /* tp_new */
2782 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002783};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002784
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002785void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002786PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002787{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002788 PyObject *v;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002789 assert(pv != NULL);
2790 if (*pv == NULL)
2791 return;
2792 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002793 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002794 return;
2795 }
2796 v = bytes_concat(*pv, w);
2797 Py_DECREF(*pv);
2798 *pv = v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002799}
2800
2801void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002802PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002803{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002804 PyBytes_Concat(pv, w);
2805 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002806}
2807
2808
2809/* The following function breaks the notion that strings are immutable:
2810 it changes the size of a string. We get away with this only if there
2811 is only one module referencing the object. You can also think of it
2812 as creating a new string object and destroying the old one, only
2813 more efficiently. In any case, don't use this if the string may
2814 already be known to some other part of the code...
2815 Note that if there's not enough memory to resize the string, the original
2816 string object at *pv is deallocated, *pv is set to NULL, an "out of
2817 memory" exception is set, and -1 is returned. Else (on success) 0 is
2818 returned, and the value in *pv may or may not be the same as on input.
2819 As always, an extra byte is allocated for a trailing \0 byte (newsize
2820 does *not* include that), and a trailing \0 byte is stored.
2821*/
2822
2823int
2824_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2825{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002826 PyObject *v;
2827 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002828 v = *pv;
2829 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2830 *pv = 0;
2831 Py_DECREF(v);
2832 PyErr_BadInternalCall();
2833 return -1;
2834 }
2835 /* XXX UNREF/NEWREF interface should be more symmetrical */
2836 _Py_DEC_REFTOTAL;
2837 _Py_ForgetReference(v);
2838 *pv = (PyObject *)
2839 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2840 if (*pv == NULL) {
2841 PyObject_Del(v);
2842 PyErr_NoMemory();
2843 return -1;
2844 }
2845 _Py_NewReference(*pv);
2846 sv = (PyBytesObject *) *pv;
2847 Py_SIZE(sv) = newsize;
2848 sv->ob_sval[newsize] = '\0';
2849 sv->ob_shash = -1; /* invalidate cached hash value */
2850 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002851}
2852
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002853void
2854PyBytes_Fini(void)
2855{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002856 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002857 for (i = 0; i < UCHAR_MAX + 1; i++)
2858 Py_CLEAR(characters[i]);
2859 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002860}
2861
Benjamin Peterson4116f362008-05-27 00:36:20 +00002862/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002863
2864typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002865 PyObject_HEAD
2866 Py_ssize_t it_index;
2867 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002868} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002869
2870static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002871striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002872{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002873 _PyObject_GC_UNTRACK(it);
2874 Py_XDECREF(it->it_seq);
2875 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002876}
2877
2878static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002879striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002880{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002881 Py_VISIT(it->it_seq);
2882 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002883}
2884
2885static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002886striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002887{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002888 PyBytesObject *seq;
2889 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002890
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002891 assert(it != NULL);
2892 seq = it->it_seq;
2893 if (seq == NULL)
2894 return NULL;
2895 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002896
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002897 if (it->it_index < PyBytes_GET_SIZE(seq)) {
2898 item = PyLong_FromLong(
2899 (unsigned char)seq->ob_sval[it->it_index]);
2900 if (item != NULL)
2901 ++it->it_index;
2902 return item;
2903 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002904
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002905 Py_DECREF(seq);
2906 it->it_seq = NULL;
2907 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002908}
2909
2910static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002911striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002912{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002913 Py_ssize_t len = 0;
2914 if (it->it_seq)
2915 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
2916 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002917}
2918
2919PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002920 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002921
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002922static PyObject *
2923striter_reduce(striterobject *it)
2924{
2925 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02002926 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002927 it->it_seq, it->it_index);
2928 } else {
2929 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
2930 if (u == NULL)
2931 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02002932 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002933 }
2934}
2935
2936PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2937
2938static PyObject *
2939striter_setstate(striterobject *it, PyObject *state)
2940{
2941 Py_ssize_t index = PyLong_AsSsize_t(state);
2942 if (index == -1 && PyErr_Occurred())
2943 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00002944 if (it->it_seq != NULL) {
2945 if (index < 0)
2946 index = 0;
2947 else if (index > PyBytes_GET_SIZE(it->it_seq))
2948 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
2949 it->it_index = index;
2950 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002951 Py_RETURN_NONE;
2952}
2953
2954PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
2955
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002956static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002957 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
2958 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002959 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
2960 reduce_doc},
2961 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
2962 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002963 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002964};
2965
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002966PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002967 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2968 "bytes_iterator", /* tp_name */
2969 sizeof(striterobject), /* tp_basicsize */
2970 0, /* tp_itemsize */
2971 /* methods */
2972 (destructor)striter_dealloc, /* tp_dealloc */
2973 0, /* tp_print */
2974 0, /* tp_getattr */
2975 0, /* tp_setattr */
2976 0, /* tp_reserved */
2977 0, /* tp_repr */
2978 0, /* tp_as_number */
2979 0, /* tp_as_sequence */
2980 0, /* tp_as_mapping */
2981 0, /* tp_hash */
2982 0, /* tp_call */
2983 0, /* tp_str */
2984 PyObject_GenericGetAttr, /* tp_getattro */
2985 0, /* tp_setattro */
2986 0, /* tp_as_buffer */
2987 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
2988 0, /* tp_doc */
2989 (traverseproc)striter_traverse, /* tp_traverse */
2990 0, /* tp_clear */
2991 0, /* tp_richcompare */
2992 0, /* tp_weaklistoffset */
2993 PyObject_SelfIter, /* tp_iter */
2994 (iternextfunc)striter_next, /* tp_iternext */
2995 striter_methods, /* tp_methods */
2996 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002997};
2998
2999static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003000bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003001{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003002 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003003
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003004 if (!PyBytes_Check(seq)) {
3005 PyErr_BadInternalCall();
3006 return NULL;
3007 }
3008 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3009 if (it == NULL)
3010 return NULL;
3011 it->it_index = 0;
3012 Py_INCREF(seq);
3013 it->it_seq = (PyBytesObject *)seq;
3014 _PyObject_GC_TRACK(it);
3015 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003016}