blob: 911a93b9caca475a5e2ee5fdd10230bf60cd56e4 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz2bad9702007-08-27 06:19:22 +000010static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000011_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000012{
Antoine Pitroucfc22b42012-10-16 21:07:23 +020013 PyBufferProcs *bufferprocs;
14 if (PyBytes_CheckExact(obj)) {
15 /* Fast path, e.g. for .join() of many bytes objects */
16 Py_INCREF(obj);
17 view->obj = obj;
18 view->buf = PyBytes_AS_STRING(obj);
19 view->len = PyBytes_GET_SIZE(obj);
20 return view->len;
21 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000022
Antoine Pitroucfc22b42012-10-16 21:07:23 +020023 bufferprocs = Py_TYPE(obj)->tp_as_buffer;
24 if (bufferprocs == NULL || bufferprocs->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000025 {
Antoine Pitroud1188562010-06-09 16:38:55 +000026 PyErr_Format(PyExc_TypeError,
27 "Type %.100s doesn't support the buffer API",
28 Py_TYPE(obj)->tp_name);
29 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000030 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000031
Antoine Pitroucfc22b42012-10-16 21:07:23 +020032 if (bufferprocs->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000033 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000034 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000035}
36
Christian Heimes2c9c7a52008-05-26 13:42:13 +000037#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000038Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000039#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000040
Christian Heimes2c9c7a52008-05-26 13:42:13 +000041static PyBytesObject *characters[UCHAR_MAX + 1];
42static PyBytesObject *nullstring;
43
Mark Dickinsonfd24b322008-12-06 15:33:31 +000044/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
45 for a string of length n should request PyBytesObject_SIZE + n bytes.
46
47 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
48 3 bytes per string allocation on a typical system.
49*/
50#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
51
Christian Heimes2c9c7a52008-05-26 13:42:13 +000052/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000053 For PyBytes_FromString(), the parameter `str' points to a null-terminated
54 string containing exactly `size' bytes.
55
56 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
57 either NULL or else points to a string containing at least `size' bytes.
58 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
59 not have to be null-terminated. (Therefore it is safe to construct a
60 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
61 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
62 bytes (setting the last byte to the null terminating character) and you can
63 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000064 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000065 alter the data yourself, since the strings may be shared.
66
67 The PyObject member `op->ob_size', which denotes the number of "extra
68 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020069 allocated for string data, not counting the null terminating character.
70 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000071 PyBytes_FromStringAndSize()) or the length of the string in the `str'
72 parameter (for PyBytes_FromString()).
73*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020074static PyObject *
75_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000076{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020077 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020078 assert(size >= 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000080#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000082#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 Py_INCREF(op);
84 return (PyObject *)op;
85 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000086
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
88 PyErr_SetString(PyExc_OverflowError,
89 "byte string is too large");
90 return NULL;
91 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000092
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000093 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020094 if (use_calloc)
95 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
96 else
97 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000098 if (op == NULL)
99 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100100 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +0200102 if (!use_calloc)
103 op->ob_sval[size] = '\0';
104 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000105 if (size == 0) {
106 nullstring = op;
107 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200108 }
109 return (PyObject *) op;
110}
111
112PyObject *
113PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
114{
115 PyBytesObject *op;
116 if (size < 0) {
117 PyErr_SetString(PyExc_SystemError,
118 "Negative size passed to PyBytes_FromStringAndSize");
119 return NULL;
120 }
121 if (size == 1 && str != NULL &&
122 (op = characters[*str & UCHAR_MAX]) != NULL)
123 {
124#ifdef COUNT_ALLOCS
125 one_strings++;
126#endif
127 Py_INCREF(op);
128 return (PyObject *)op;
129 }
130
131 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
132 if (op == NULL)
133 return NULL;
134 if (str == NULL)
135 return (PyObject *) op;
136
137 Py_MEMCPY(op->ob_sval, str, size);
138 /* share short strings */
139 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000140 characters[*str & UCHAR_MAX] = op;
141 Py_INCREF(op);
142 }
143 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000144}
145
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000146PyObject *
147PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000148{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200149 size_t size;
150 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000151
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 assert(str != NULL);
153 size = strlen(str);
154 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
155 PyErr_SetString(PyExc_OverflowError,
156 "byte string is too long");
157 return NULL;
158 }
159 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000160#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000162#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000163 Py_INCREF(op);
164 return (PyObject *)op;
165 }
166 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000168 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000169#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 Py_INCREF(op);
171 return (PyObject *)op;
172 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000173
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000174 /* Inline PyObject_NewVar */
175 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
176 if (op == NULL)
177 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100178 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000179 op->ob_shash = -1;
180 Py_MEMCPY(op->ob_sval, str, size+1);
181 /* share short strings */
182 if (size == 0) {
183 nullstring = op;
184 Py_INCREF(op);
185 } else if (size == 1) {
186 characters[*str & UCHAR_MAX] = op;
187 Py_INCREF(op);
188 }
189 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000190}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000191
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000192PyObject *
193PyBytes_FromFormatV(const char *format, va_list vargs)
194{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000195 va_list count;
196 Py_ssize_t n = 0;
197 const char* f;
198 char *s;
199 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000200
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000201 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000202 /* step 1: figure out how large a buffer we need */
203 for (f = format; *f; f++) {
204 if (*f == '%') {
205 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000206 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000207 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000208
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000209 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
210 * they don't affect the amount of space we reserve.
211 */
212 if ((*f == 'l' || *f == 'z') &&
213 (f[1] == 'd' || f[1] == 'u'))
214 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000215
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000216 switch (*f) {
217 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100218 {
219 int c = va_arg(count, int);
220 if (c < 0 || c > 255) {
221 PyErr_SetString(PyExc_OverflowError,
222 "PyBytes_FromFormatV(): %c format "
223 "expects an integer in range [0; 255]");
224 return NULL;
225 }
226 n++;
227 break;
228 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000229 case '%':
230 n++;
231 break;
232 case 'd': case 'u': case 'i': case 'x':
233 (void) va_arg(count, int);
234 /* 20 bytes is enough to hold a 64-bit
235 integer. Decimal takes the most space.
236 This isn't enough for octal. */
237 n += 20;
238 break;
239 case 's':
240 s = va_arg(count, char*);
241 n += strlen(s);
242 break;
243 case 'p':
244 (void) va_arg(count, int);
245 /* maximum 64-bit pointer representation:
246 * 0xffffffffffffffff
247 * so 19 characters is enough.
248 * XXX I count 18 -- what's the extra for?
249 */
250 n += 19;
251 break;
252 default:
253 /* if we stumble upon an unknown
254 formatting code, copy the rest of
255 the format string to the output
256 string. (we cannot just skip the
257 code, since there's no way to know
258 what's in the argument list) */
259 n += strlen(p);
260 goto expand;
261 }
262 } else
263 n++;
264 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000265 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000266 /* step 2: fill the buffer */
267 /* Since we've analyzed how much space we need for the worst case,
268 use sprintf directly instead of the slower PyOS_snprintf. */
269 string = PyBytes_FromStringAndSize(NULL, n);
270 if (!string)
271 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000272
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000274
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 for (f = format; *f; f++) {
276 if (*f == '%') {
277 const char* p = f++;
278 Py_ssize_t i;
279 int longflag = 0;
280 int size_tflag = 0;
281 /* parse the width.precision part (we're only
282 interested in the precision value, if any) */
283 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000284 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000285 n = (n*10) + *f++ - '0';
286 if (*f == '.') {
287 f++;
288 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000289 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000290 n = (n*10) + *f++ - '0';
291 }
David Malcolm96960882010-11-05 17:23:41 +0000292 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000293 f++;
294 /* handle the long flag, but only for %ld and %lu.
295 others can be added when necessary. */
296 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
297 longflag = 1;
298 ++f;
299 }
300 /* handle the size_t flag. */
301 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
302 size_tflag = 1;
303 ++f;
304 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000305
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000306 switch (*f) {
307 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100308 {
309 int c = va_arg(vargs, int);
310 /* c has been checked for overflow in the first step */
311 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000312 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100313 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000314 case 'd':
315 if (longflag)
316 sprintf(s, "%ld", va_arg(vargs, long));
317 else if (size_tflag)
318 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
319 va_arg(vargs, Py_ssize_t));
320 else
321 sprintf(s, "%d", va_arg(vargs, int));
322 s += strlen(s);
323 break;
324 case 'u':
325 if (longflag)
326 sprintf(s, "%lu",
327 va_arg(vargs, unsigned long));
328 else if (size_tflag)
329 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
330 va_arg(vargs, size_t));
331 else
332 sprintf(s, "%u",
333 va_arg(vargs, unsigned int));
334 s += strlen(s);
335 break;
336 case 'i':
337 sprintf(s, "%i", va_arg(vargs, int));
338 s += strlen(s);
339 break;
340 case 'x':
341 sprintf(s, "%x", va_arg(vargs, int));
342 s += strlen(s);
343 break;
344 case 's':
345 p = va_arg(vargs, char*);
346 i = strlen(p);
347 if (n > 0 && i > n)
348 i = n;
349 Py_MEMCPY(s, p, i);
350 s += i;
351 break;
352 case 'p':
353 sprintf(s, "%p", va_arg(vargs, void*));
354 /* %p is ill-defined: ensure leading 0x. */
355 if (s[1] == 'X')
356 s[1] = 'x';
357 else if (s[1] != 'x') {
358 memmove(s+2, s, strlen(s)+1);
359 s[0] = '0';
360 s[1] = 'x';
361 }
362 s += strlen(s);
363 break;
364 case '%':
365 *s++ = '%';
366 break;
367 default:
368 strcpy(s, p);
369 s += strlen(s);
370 goto end;
371 }
372 } else
373 *s++ = *f;
374 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000375
376 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000377 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
378 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000379}
380
381PyObject *
382PyBytes_FromFormat(const char *format, ...)
383{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000384 PyObject* ret;
385 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000386
387#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000388 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000389#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000390 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000391#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000392 ret = PyBytes_FromFormatV(format, vargs);
393 va_end(vargs);
394 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000395}
396
397static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000398bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000399{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000400 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000401}
402
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000403/* Unescape a backslash-escaped string. If unicode is non-zero,
404 the string is a u-literal. If recode_encoding is non-zero,
405 the string is UTF-8 encoded and should be re-encoded in the
406 specified encoding. */
407
408PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000409 Py_ssize_t len,
410 const char *errors,
411 Py_ssize_t unicode,
412 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000413{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000414 int c;
415 char *p, *buf;
416 const char *end;
417 PyObject *v;
418 Py_ssize_t newlen = recode_encoding ? 4*len:len;
419 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
420 if (v == NULL)
421 return NULL;
422 p = buf = PyBytes_AsString(v);
423 end = s + len;
424 while (s < end) {
425 if (*s != '\\') {
426 non_esc:
427 if (recode_encoding && (*s & 0x80)) {
428 PyObject *u, *w;
429 char *r;
430 const char* t;
431 Py_ssize_t rn;
432 t = s;
433 /* Decode non-ASCII bytes as UTF-8. */
434 while (t < end && (*t & 0x80)) t++;
435 u = PyUnicode_DecodeUTF8(s, t - s, errors);
436 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000437
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000438 /* Recode them in target encoding. */
439 w = PyUnicode_AsEncodedString(
440 u, recode_encoding, errors);
441 Py_DECREF(u);
442 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000443
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000444 /* Append bytes to output buffer. */
445 assert(PyBytes_Check(w));
446 r = PyBytes_AS_STRING(w);
447 rn = PyBytes_GET_SIZE(w);
448 Py_MEMCPY(p, r, rn);
449 p += rn;
450 Py_DECREF(w);
451 s = t;
452 } else {
453 *p++ = *s++;
454 }
455 continue;
456 }
457 s++;
458 if (s==end) {
459 PyErr_SetString(PyExc_ValueError,
460 "Trailing \\ in string");
461 goto failed;
462 }
463 switch (*s++) {
464 /* XXX This assumes ASCII! */
465 case '\n': break;
466 case '\\': *p++ = '\\'; break;
467 case '\'': *p++ = '\''; break;
468 case '\"': *p++ = '\"'; break;
469 case 'b': *p++ = '\b'; break;
470 case 'f': *p++ = '\014'; break; /* FF */
471 case 't': *p++ = '\t'; break;
472 case 'n': *p++ = '\n'; break;
473 case 'r': *p++ = '\r'; break;
474 case 'v': *p++ = '\013'; break; /* VT */
475 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
476 case '0': case '1': case '2': case '3':
477 case '4': case '5': case '6': case '7':
478 c = s[-1] - '0';
479 if (s < end && '0' <= *s && *s <= '7') {
480 c = (c<<3) + *s++ - '0';
481 if (s < end && '0' <= *s && *s <= '7')
482 c = (c<<3) + *s++ - '0';
483 }
484 *p++ = c;
485 break;
486 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000487 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000488 unsigned int x = 0;
489 c = Py_CHARMASK(*s);
490 s++;
David Malcolm96960882010-11-05 17:23:41 +0000491 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000492 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000493 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000494 x = 10 + c - 'a';
495 else
496 x = 10 + c - 'A';
497 x = x << 4;
498 c = Py_CHARMASK(*s);
499 s++;
David Malcolm96960882010-11-05 17:23:41 +0000500 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000501 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000502 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000503 x += 10 + c - 'a';
504 else
505 x += 10 + c - 'A';
506 *p++ = x;
507 break;
508 }
509 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200510 PyErr_Format(PyExc_ValueError,
511 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +0200512 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000513 goto failed;
514 }
515 if (strcmp(errors, "replace") == 0) {
516 *p++ = '?';
517 } else if (strcmp(errors, "ignore") == 0)
518 /* do nothing */;
519 else {
520 PyErr_Format(PyExc_ValueError,
521 "decoding error; unknown "
522 "error handling code: %.400s",
523 errors);
524 goto failed;
525 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +0200526 /* skip \x */
527 if (s < end && Py_ISXDIGIT(s[0]))
528 s++; /* and a hexdigit */
529 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000530 default:
531 *p++ = '\\';
532 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200533 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000534 UTF-8 bytes may follow. */
535 }
536 }
537 if (p-buf < newlen)
538 _PyBytes_Resize(&v, p - buf);
539 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000540 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000541 Py_DECREF(v);
542 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000543}
544
545/* -------------------------------------------------------------------- */
546/* object api */
547
548Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200549PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000550{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000551 if (!PyBytes_Check(op)) {
552 PyErr_Format(PyExc_TypeError,
553 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
554 return -1;
555 }
556 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000557}
558
559char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200560PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000561{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000562 if (!PyBytes_Check(op)) {
563 PyErr_Format(PyExc_TypeError,
564 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
565 return NULL;
566 }
567 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000568}
569
570int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200571PyBytes_AsStringAndSize(PyObject *obj,
572 char **s,
573 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000574{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000575 if (s == NULL) {
576 PyErr_BadInternalCall();
577 return -1;
578 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000579
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000580 if (!PyBytes_Check(obj)) {
581 PyErr_Format(PyExc_TypeError,
582 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
583 return -1;
584 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000585
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000586 *s = PyBytes_AS_STRING(obj);
587 if (len != NULL)
588 *len = PyBytes_GET_SIZE(obj);
589 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
590 PyErr_SetString(PyExc_TypeError,
591 "expected bytes with no null");
592 return -1;
593 }
594 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000595}
Neal Norwitz6968b052007-02-27 19:02:19 +0000596
597/* -------------------------------------------------------------------- */
598/* Methods */
599
Eric Smith0923d1d2009-04-16 20:16:10 +0000600#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000601
602#include "stringlib/fastsearch.h"
603#include "stringlib/count.h"
604#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +0200605#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000606#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000607#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000608#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000609
Eric Smith0f78bff2009-11-30 01:01:42 +0000610#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000611
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000612PyObject *
613PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000614{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200615 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200616 Py_ssize_t i, length = Py_SIZE(op);
617 size_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000618 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200619 unsigned char quote, *s, *p;
620
621 /* Compute size of output string */
622 squotes = dquotes = 0;
623 newsize = 3; /* b'' */
624 s = (unsigned char*)op->ob_sval;
625 for (i = 0; i < length; i++) {
626 switch(s[i]) {
627 case '\'': squotes++; newsize++; break;
628 case '"': dquotes++; newsize++; break;
629 case '\\': case '\t': case '\n': case '\r':
630 newsize += 2; break; /* \C */
631 default:
632 if (s[i] < ' ' || s[i] >= 0x7f)
633 newsize += 4; /* \xHH */
634 else
635 newsize++;
636 }
637 }
638 quote = '\'';
639 if (smartquotes && squotes && !dquotes)
640 quote = '"';
641 if (squotes && quote == '\'')
642 newsize += squotes;
Victor Stinner6430fd52011-09-29 04:02:13 +0200643
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200644 if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000645 PyErr_SetString(PyExc_OverflowError,
646 "bytes object is too large to make repr");
647 return NULL;
648 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200649
650 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000651 if (v == NULL) {
652 return NULL;
653 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200654 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000655
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200656 *p++ = 'b', *p++ = quote;
657 for (i = 0; i < length; i++) {
658 unsigned char c = op->ob_sval[i];
659 if (c == quote || c == '\\')
660 *p++ = '\\', *p++ = c;
661 else if (c == '\t')
662 *p++ = '\\', *p++ = 't';
663 else if (c == '\n')
664 *p++ = '\\', *p++ = 'n';
665 else if (c == '\r')
666 *p++ = '\\', *p++ = 'r';
667 else if (c < ' ' || c >= 0x7f) {
668 *p++ = '\\';
669 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200670 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
671 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000672 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200673 else
674 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000675 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200676 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +0200677 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200678 return v;
Neal Norwitz6968b052007-02-27 19:02:19 +0000679}
680
Neal Norwitz6968b052007-02-27 19:02:19 +0000681static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000682bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000683{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000684 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000685}
686
Neal Norwitz6968b052007-02-27 19:02:19 +0000687static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000688bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000689{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000690 if (Py_BytesWarningFlag) {
691 if (PyErr_WarnEx(PyExc_BytesWarning,
692 "str() on a bytes instance", 1))
693 return NULL;
694 }
695 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000696}
697
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000698static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000699bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000700{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000701 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000702}
Neal Norwitz6968b052007-02-27 19:02:19 +0000703
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000704/* This is also used by PyBytes_Concat() */
705static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000706bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000707{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000708 Py_ssize_t size;
709 Py_buffer va, vb;
710 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000711
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000712 va.len = -1;
713 vb.len = -1;
714 if (_getbuffer(a, &va) < 0 ||
715 _getbuffer(b, &vb) < 0) {
716 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
717 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
718 goto done;
719 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000720
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000721 /* Optimize end cases */
722 if (va.len == 0 && PyBytes_CheckExact(b)) {
723 result = b;
724 Py_INCREF(result);
725 goto done;
726 }
727 if (vb.len == 0 && PyBytes_CheckExact(a)) {
728 result = a;
729 Py_INCREF(result);
730 goto done;
731 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000732
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000733 size = va.len + vb.len;
734 if (size < 0) {
735 PyErr_NoMemory();
736 goto done;
737 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000738
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000739 result = PyBytes_FromStringAndSize(NULL, size);
740 if (result != NULL) {
741 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
742 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
743 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000744
745 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000746 if (va.len != -1)
747 PyBuffer_Release(&va);
748 if (vb.len != -1)
749 PyBuffer_Release(&vb);
750 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000751}
Neal Norwitz6968b052007-02-27 19:02:19 +0000752
753static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200754bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000755{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200756 Py_ssize_t i;
757 Py_ssize_t j;
758 Py_ssize_t size;
759 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000760 size_t nbytes;
761 if (n < 0)
762 n = 0;
763 /* watch out for overflows: the size can overflow int,
764 * and the # of bytes needed can overflow size_t
765 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000766 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000767 PyErr_SetString(PyExc_OverflowError,
768 "repeated bytes are too long");
769 return NULL;
770 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000771 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000772 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
773 Py_INCREF(a);
774 return (PyObject *)a;
775 }
776 nbytes = (size_t)size;
777 if (nbytes + PyBytesObject_SIZE <= nbytes) {
778 PyErr_SetString(PyExc_OverflowError,
779 "repeated bytes are too long");
780 return NULL;
781 }
782 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
783 if (op == NULL)
784 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100785 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000786 op->ob_shash = -1;
787 op->ob_sval[size] = '\0';
788 if (Py_SIZE(a) == 1 && n > 0) {
789 memset(op->ob_sval, a->ob_sval[0] , n);
790 return (PyObject *) op;
791 }
792 i = 0;
793 if (i < size) {
794 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
795 i = Py_SIZE(a);
796 }
797 while (i < size) {
798 j = (i <= size-i) ? i : size-i;
799 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
800 i += j;
801 }
802 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000803}
804
Guido van Rossum98297ee2007-11-06 21:34:58 +0000805static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000806bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000807{
808 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
809 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000810 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000811 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000812 PyErr_Clear();
813 if (_getbuffer(arg, &varg) < 0)
814 return -1;
815 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
816 varg.buf, varg.len, 0);
817 PyBuffer_Release(&varg);
818 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000819 }
820 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000821 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
822 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000823 }
824
Antoine Pitrou0010d372010-08-15 17:12:55 +0000825 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000826}
827
Neal Norwitz6968b052007-02-27 19:02:19 +0000828static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200829bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000830{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000831 if (i < 0 || i >= Py_SIZE(a)) {
832 PyErr_SetString(PyExc_IndexError, "index out of range");
833 return NULL;
834 }
835 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000836}
837
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100838Py_LOCAL(int)
839bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
840{
841 int cmp;
842 Py_ssize_t len;
843
844 len = Py_SIZE(a);
845 if (Py_SIZE(b) != len)
846 return 0;
847
848 if (a->ob_sval[0] != b->ob_sval[0])
849 return 0;
850
851 cmp = memcmp(a->ob_sval, b->ob_sval, len);
852 return (cmp == 0);
853}
854
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000855static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000856bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000857{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000858 int c;
859 Py_ssize_t len_a, len_b;
860 Py_ssize_t min_len;
861 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000862
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000863 /* Make sure both arguments are strings. */
864 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
865 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
866 (PyObject_IsInstance((PyObject*)a,
867 (PyObject*)&PyUnicode_Type) ||
868 PyObject_IsInstance((PyObject*)b,
869 (PyObject*)&PyUnicode_Type))) {
870 if (PyErr_WarnEx(PyExc_BytesWarning,
871 "Comparison between bytes and string", 1))
872 return NULL;
873 }
874 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000875 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100876 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000877 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100878 case Py_EQ:
879 case Py_LE:
880 case Py_GE:
881 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000882 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100883 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100884 case Py_NE:
885 case Py_LT:
886 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000887 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100888 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100889 default:
890 PyErr_BadArgument();
891 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000892 }
893 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100894 else if (op == Py_EQ || op == Py_NE) {
895 int eq = bytes_compare_eq(a, b);
896 eq ^= (op == Py_NE);
897 result = eq ? Py_True : Py_False;
898 }
899 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100900 len_a = Py_SIZE(a);
901 len_b = Py_SIZE(b);
902 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100903 if (min_len > 0) {
904 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100905 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100906 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000907 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100908 else
909 c = 0;
910 if (c == 0)
911 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
912 switch (op) {
913 case Py_LT: c = c < 0; break;
914 case Py_LE: c = c <= 0; break;
915 case Py_GT: c = c > 0; break;
916 case Py_GE: c = c >= 0; break;
917 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100918 PyErr_BadArgument();
919 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100920 }
921 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000922 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100923
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000924 Py_INCREF(result);
925 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000926}
927
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000928static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000929bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000930{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100931 if (a->ob_shash == -1) {
932 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +0100933 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100934 }
935 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +0000936}
937
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000938static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000939bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000940{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000941 if (PyIndex_Check(item)) {
942 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
943 if (i == -1 && PyErr_Occurred())
944 return NULL;
945 if (i < 0)
946 i += PyBytes_GET_SIZE(self);
947 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
948 PyErr_SetString(PyExc_IndexError,
949 "index out of range");
950 return NULL;
951 }
952 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
953 }
954 else if (PySlice_Check(item)) {
955 Py_ssize_t start, stop, step, slicelength, cur, i;
956 char* source_buf;
957 char* result_buf;
958 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000959
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000960 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000961 PyBytes_GET_SIZE(self),
962 &start, &stop, &step, &slicelength) < 0) {
963 return NULL;
964 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000965
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000966 if (slicelength <= 0) {
967 return PyBytes_FromStringAndSize("", 0);
968 }
969 else if (start == 0 && step == 1 &&
970 slicelength == PyBytes_GET_SIZE(self) &&
971 PyBytes_CheckExact(self)) {
972 Py_INCREF(self);
973 return (PyObject *)self;
974 }
975 else if (step == 1) {
976 return PyBytes_FromStringAndSize(
977 PyBytes_AS_STRING(self) + start,
978 slicelength);
979 }
980 else {
981 source_buf = PyBytes_AS_STRING(self);
982 result = PyBytes_FromStringAndSize(NULL, slicelength);
983 if (result == NULL)
984 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000985
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000986 result_buf = PyBytes_AS_STRING(result);
987 for (cur = start, i = 0; i < slicelength;
988 cur += step, i++) {
989 result_buf[i] = source_buf[cur];
990 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000991
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000992 return result;
993 }
994 }
995 else {
996 PyErr_Format(PyExc_TypeError,
997 "byte indices must be integers, not %.200s",
998 Py_TYPE(item)->tp_name);
999 return NULL;
1000 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001001}
1002
1003static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001004bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001005{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001006 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1007 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001008}
1009
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001010static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001011 (lenfunc)bytes_length, /*sq_length*/
1012 (binaryfunc)bytes_concat, /*sq_concat*/
1013 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1014 (ssizeargfunc)bytes_item, /*sq_item*/
1015 0, /*sq_slice*/
1016 0, /*sq_ass_item*/
1017 0, /*sq_ass_slice*/
1018 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001019};
1020
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001021static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001022 (lenfunc)bytes_length,
1023 (binaryfunc)bytes_subscript,
1024 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001025};
1026
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001027static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001028 (getbufferproc)bytes_buffer_getbuffer,
1029 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001030};
1031
1032
1033#define LEFTSTRIP 0
1034#define RIGHTSTRIP 1
1035#define BOTHSTRIP 2
1036
1037/* Arrays indexed by above */
1038static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1039
1040#define STRIPNAME(i) (stripformat[i]+3)
1041
Neal Norwitz6968b052007-02-27 19:02:19 +00001042PyDoc_STRVAR(split__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001043"B.split(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001044\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001045Return a list of the sections in B, using sep as the delimiter.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001046If sep is not specified or is None, B is split on ASCII whitespace\n\
1047characters (space, tab, return, newline, formfeed, vertical tab).\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001048If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001049
1050static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001051bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +00001052{
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001053 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001054 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1055 Py_ssize_t maxsplit = -1;
1056 const char *s = PyBytes_AS_STRING(self), *sub;
1057 Py_buffer vsub;
1058 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001059
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001060 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split",
1061 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001062 return NULL;
1063 if (maxsplit < 0)
1064 maxsplit = PY_SSIZE_T_MAX;
1065 if (subobj == Py_None)
1066 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1067 if (_getbuffer(subobj, &vsub) < 0)
1068 return NULL;
1069 sub = vsub.buf;
1070 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001072 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1073 PyBuffer_Release(&vsub);
1074 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001075}
1076
Neal Norwitz6968b052007-02-27 19:02:19 +00001077PyDoc_STRVAR(partition__doc__,
1078"B.partition(sep) -> (head, sep, tail)\n\
1079\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001080Search for the separator sep in B, and return the part before it,\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001081the separator itself, and the part after it. If the separator is not\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001082found, returns B and two empty bytes objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001083
1084static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001085bytes_partition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001086{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001087 const char *sep;
1088 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001089
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001090 if (PyBytes_Check(sep_obj)) {
1091 sep = PyBytes_AS_STRING(sep_obj);
1092 sep_len = PyBytes_GET_SIZE(sep_obj);
1093 }
1094 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1095 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 return stringlib_partition(
1098 (PyObject*) self,
1099 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1100 sep_obj, sep, sep_len
1101 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001102}
1103
1104PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti5b2b2422010-01-25 11:58:28 +00001105"B.rpartition(sep) -> (head, sep, tail)\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001106\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001107Search for the separator sep in B, starting at the end of B,\n\
1108and return the part before it, the separator itself, and the\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001109part after it. If the separator is not found, returns two empty\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001110bytes objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001111
1112static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001113bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
Neal Norwitz6968b052007-02-27 19:02:19 +00001114{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001115 const char *sep;
1116 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001117
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001118 if (PyBytes_Check(sep_obj)) {
1119 sep = PyBytes_AS_STRING(sep_obj);
1120 sep_len = PyBytes_GET_SIZE(sep_obj);
1121 }
1122 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1123 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001124
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001125 return stringlib_rpartition(
1126 (PyObject*) self,
1127 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1128 sep_obj, sep, sep_len
1129 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001130}
1131
Neal Norwitz6968b052007-02-27 19:02:19 +00001132PyDoc_STRVAR(rsplit__doc__,
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001133"B.rsplit(sep=None, maxsplit=-1) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001134\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001135Return a list of the sections in B, using sep as the delimiter,\n\
1136starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001137If sep is not given, B is split on ASCII whitespace characters\n\
1138(space, tab, return, newline, formfeed, vertical tab).\n\
1139If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001140
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001141
Neal Norwitz6968b052007-02-27 19:02:19 +00001142static PyObject *
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001143bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds)
Neal Norwitz6968b052007-02-27 19:02:19 +00001144{
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001145 static char *kwlist[] = {"sep", "maxsplit", 0};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001146 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1147 Py_ssize_t maxsplit = -1;
1148 const char *s = PyBytes_AS_STRING(self), *sub;
1149 Py_buffer vsub;
1150 PyObject *list, *subobj = Py_None;
Neal Norwitz6968b052007-02-27 19:02:19 +00001151
Ezio Melotticda6b6d2012-02-26 09:39:55 +02001152 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit",
1153 kwlist, &subobj, &maxsplit))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001154 return NULL;
1155 if (maxsplit < 0)
1156 maxsplit = PY_SSIZE_T_MAX;
1157 if (subobj == Py_None)
1158 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1159 if (_getbuffer(subobj, &vsub) < 0)
1160 return NULL;
1161 sub = vsub.buf;
1162 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001163
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001164 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1165 PyBuffer_Release(&vsub);
1166 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001167}
1168
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001169
1170PyDoc_STRVAR(join__doc__,
1171"B.join(iterable_of_bytes) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001172\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00001173Concatenate any number of bytes objects, with B in between each pair.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001174Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1175
Neal Norwitz6968b052007-02-27 19:02:19 +00001176static PyObject *
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001177bytes_join(PyObject *self, PyObject *iterable)
Neal Norwitz6968b052007-02-27 19:02:19 +00001178{
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001179 return stringlib_bytes_join(self, iterable);
Neal Norwitz6968b052007-02-27 19:02:19 +00001180}
1181
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001182PyObject *
1183_PyBytes_Join(PyObject *sep, PyObject *x)
1184{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001185 assert(sep != NULL && PyBytes_Check(sep));
1186 assert(x != NULL);
1187 return bytes_join(sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001188}
1189
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001190/* helper macro to fixup start/end slice values */
1191#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001192 if (end > len) \
1193 end = len; \
1194 else if (end < 0) { \
1195 end += len; \
1196 if (end < 0) \
1197 end = 0; \
1198 } \
1199 if (start < 0) { \
1200 start += len; \
1201 if (start < 0) \
1202 start = 0; \
1203 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001204
1205Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001206bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001207{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001208 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001209 char byte;
1210 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001211 const char *sub;
1212 Py_ssize_t sub_len;
1213 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001214 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001215
Antoine Pitrouac65d962011-10-20 23:54:17 +02001216 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1217 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001218 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001219
Antoine Pitrouac65d962011-10-20 23:54:17 +02001220 if (subobj) {
1221 if (_getbuffer(subobj, &subbuf) < 0)
1222 return -2;
1223
1224 sub = subbuf.buf;
1225 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001227 else {
1228 sub = &byte;
1229 sub_len = 1;
1230 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001231
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001232 if (dir > 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001233 res = stringlib_find_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001234 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1235 sub, sub_len, start, end);
1236 else
Antoine Pitrouac65d962011-10-20 23:54:17 +02001237 res = stringlib_rfind_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001238 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1239 sub, sub_len, start, end);
Antoine Pitrouac65d962011-10-20 23:54:17 +02001240
1241 if (subobj)
1242 PyBuffer_Release(&subbuf);
1243
1244 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001245}
1246
1247
1248PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001249"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001250\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001251Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001252such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001253arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001254\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001255Return -1 on failure.");
1256
Neal Norwitz6968b052007-02-27 19:02:19 +00001257static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001258bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001259{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001260 Py_ssize_t result = bytes_find_internal(self, args, +1);
1261 if (result == -2)
1262 return NULL;
1263 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001264}
1265
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001266
1267PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001268"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001269\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001270Like B.find() but raise ValueError when the substring is not found.");
1271
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001272static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001273bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001274{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001275 Py_ssize_t result = bytes_find_internal(self, args, +1);
1276 if (result == -2)
1277 return NULL;
1278 if (result == -1) {
1279 PyErr_SetString(PyExc_ValueError,
1280 "substring not found");
1281 return NULL;
1282 }
1283 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001284}
1285
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001286
1287PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001288"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001289\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001290Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001291such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001292arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001293\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001294Return -1 on failure.");
1295
Neal Norwitz6968b052007-02-27 19:02:19 +00001296static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001297bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001298{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001299 Py_ssize_t result = bytes_find_internal(self, args, -1);
1300 if (result == -2)
1301 return NULL;
1302 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001303}
1304
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001305
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001306PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001307"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001308\n\
1309Like B.rfind() but raise ValueError when the substring is not found.");
1310
1311static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001312bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001313{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001314 Py_ssize_t result = bytes_find_internal(self, args, -1);
1315 if (result == -2)
1316 return NULL;
1317 if (result == -1) {
1318 PyErr_SetString(PyExc_ValueError,
1319 "substring not found");
1320 return NULL;
1321 }
1322 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001323}
1324
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001325
1326Py_LOCAL_INLINE(PyObject *)
1327do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001328{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001329 Py_buffer vsep;
1330 char *s = PyBytes_AS_STRING(self);
1331 Py_ssize_t len = PyBytes_GET_SIZE(self);
1332 char *sep;
1333 Py_ssize_t seplen;
1334 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001335
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001336 if (_getbuffer(sepobj, &vsep) < 0)
1337 return NULL;
1338 sep = vsep.buf;
1339 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001340
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001341 i = 0;
1342 if (striptype != RIGHTSTRIP) {
1343 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1344 i++;
1345 }
1346 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001347
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001348 j = len;
1349 if (striptype != LEFTSTRIP) {
1350 do {
1351 j--;
1352 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1353 j++;
1354 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001355
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001356 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001357
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001358 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1359 Py_INCREF(self);
1360 return (PyObject*)self;
1361 }
1362 else
1363 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001364}
1365
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001366
1367Py_LOCAL_INLINE(PyObject *)
1368do_strip(PyBytesObject *self, int striptype)
1369{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 char *s = PyBytes_AS_STRING(self);
1371 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001372
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001373 i = 0;
1374 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001375 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001376 i++;
1377 }
1378 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001379
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001380 j = len;
1381 if (striptype != LEFTSTRIP) {
1382 do {
1383 j--;
David Malcolm96960882010-11-05 17:23:41 +00001384 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001385 j++;
1386 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001387
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001388 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1389 Py_INCREF(self);
1390 return (PyObject*)self;
1391 }
1392 else
1393 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001394}
1395
1396
1397Py_LOCAL_INLINE(PyObject *)
1398do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1399{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001400 PyObject *sep = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001401
Serhiy Storchakac6792272013-10-19 21:03:34 +03001402 if (!PyArg_ParseTuple(args, stripformat[striptype], &sep))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001403 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001404
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001405 if (sep != NULL && sep != Py_None) {
1406 return do_xstrip(self, striptype, sep);
1407 }
1408 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001409}
1410
1411
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001412PyDoc_STRVAR(strip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001413"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001414\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001415Strip leading and trailing bytes contained in the argument.\n\
Georg Brandlbeca27a2012-01-22 21:31:21 +01001416If the argument is omitted, strip leading and trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001417static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001418bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001419{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001420 if (PyTuple_GET_SIZE(args) == 0)
1421 return do_strip(self, BOTHSTRIP); /* Common case */
1422 else
1423 return do_argstrip(self, BOTHSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001424}
1425
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001426
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001427PyDoc_STRVAR(lstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001428"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001429\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001430Strip leading bytes contained in the argument.\n\
1431If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001432static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001433bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001434{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001435 if (PyTuple_GET_SIZE(args) == 0)
1436 return do_strip(self, LEFTSTRIP); /* Common case */
1437 else
1438 return do_argstrip(self, LEFTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001439}
1440
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001441
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001442PyDoc_STRVAR(rstrip__doc__,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001443"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001444\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00001445Strip trailing bytes contained in the argument.\n\
1446If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001447static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001448bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001449{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001450 if (PyTuple_GET_SIZE(args) == 0)
1451 return do_strip(self, RIGHTSTRIP); /* Common case */
1452 else
1453 return do_argstrip(self, RIGHTSTRIP, args);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001454}
Neal Norwitz6968b052007-02-27 19:02:19 +00001455
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001456
1457PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001458"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001459\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001460Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001461string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001462as in slice notation.");
1463
1464static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001465bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001466{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001467 PyObject *sub_obj;
1468 const char *str = PyBytes_AS_STRING(self), *sub;
1469 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001470 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001471 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001472
Antoine Pitrouac65d962011-10-20 23:54:17 +02001473 Py_buffer vsub;
1474 PyObject *count_obj;
1475
1476 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
1477 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001478 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001479
Antoine Pitrouac65d962011-10-20 23:54:17 +02001480 if (sub_obj) {
1481 if (_getbuffer(sub_obj, &vsub) < 0)
1482 return NULL;
1483
1484 sub = vsub.buf;
1485 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001486 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001487 else {
1488 sub = &byte;
1489 sub_len = 1;
1490 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001491
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001492 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001493
Antoine Pitrouac65d962011-10-20 23:54:17 +02001494 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001495 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1496 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02001497
1498 if (sub_obj)
1499 PyBuffer_Release(&vsub);
1500
1501 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001502}
1503
1504
1505PyDoc_STRVAR(translate__doc__,
1506"B.translate(table[, deletechars]) -> bytes\n\
1507\n\
1508Return a copy of B, where all characters occurring in the\n\
1509optional argument deletechars are removed, and the remaining\n\
1510characters have been mapped through the given translation\n\
1511table, which must be a bytes object of length 256.");
1512
1513static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001514bytes_translate(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001515{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001516 char *input, *output;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001517 const char *table;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001518 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001519 PyObject *input_obj = (PyObject*)self;
1520 const char *output_start, *del_table=NULL;
1521 Py_ssize_t inlen, tablen, dellen = 0;
1522 PyObject *result;
1523 int trans_table[256];
1524 PyObject *tableobj, *delobj = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001525
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001526 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1527 &tableobj, &delobj))
1528 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001529
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001530 if (PyBytes_Check(tableobj)) {
1531 table = PyBytes_AS_STRING(tableobj);
1532 tablen = PyBytes_GET_SIZE(tableobj);
1533 }
1534 else if (tableobj == Py_None) {
1535 table = NULL;
1536 tablen = 256;
1537 }
1538 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1539 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001540
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001541 if (tablen != 256) {
1542 PyErr_SetString(PyExc_ValueError,
1543 "translation table must be 256 characters long");
1544 return NULL;
1545 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001546
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001547 if (delobj != NULL) {
1548 if (PyBytes_Check(delobj)) {
1549 del_table = PyBytes_AS_STRING(delobj);
1550 dellen = PyBytes_GET_SIZE(delobj);
1551 }
1552 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1553 return NULL;
1554 }
1555 else {
1556 del_table = NULL;
1557 dellen = 0;
1558 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001559
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001560 inlen = PyBytes_GET_SIZE(input_obj);
1561 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1562 if (result == NULL)
1563 return NULL;
1564 output_start = output = PyBytes_AsString(result);
1565 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001566
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001567 if (dellen == 0 && table != NULL) {
1568 /* If no deletions are required, use faster code */
1569 for (i = inlen; --i >= 0; ) {
1570 c = Py_CHARMASK(*input++);
1571 if (Py_CHARMASK((*output++ = table[c])) != c)
1572 changed = 1;
1573 }
1574 if (changed || !PyBytes_CheckExact(input_obj))
1575 return result;
1576 Py_DECREF(result);
1577 Py_INCREF(input_obj);
1578 return input_obj;
1579 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001580
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001581 if (table == NULL) {
1582 for (i = 0; i < 256; i++)
1583 trans_table[i] = Py_CHARMASK(i);
1584 } else {
1585 for (i = 0; i < 256; i++)
1586 trans_table[i] = Py_CHARMASK(table[i]);
1587 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001588
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001589 for (i = 0; i < dellen; i++)
1590 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001591
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001592 for (i = inlen; --i >= 0; ) {
1593 c = Py_CHARMASK(*input++);
1594 if (trans_table[c] != -1)
1595 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1596 continue;
1597 changed = 1;
1598 }
1599 if (!changed && PyBytes_CheckExact(input_obj)) {
1600 Py_DECREF(result);
1601 Py_INCREF(input_obj);
1602 return input_obj;
1603 }
1604 /* Fix the size of the resulting string */
1605 if (inlen > 0)
1606 _PyBytes_Resize(&result, output - output_start);
1607 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001608}
1609
1610
Georg Brandlabc38772009-04-12 15:51:51 +00001611static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001612bytes_maketrans(PyObject *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001613{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001614 return _Py_bytes_maketrans(args);
Georg Brandlabc38772009-04-12 15:51:51 +00001615}
1616
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001617/* find and count characters and substrings */
1618
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001619#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001620 ((char *)memchr((const void *)(target), c, target_len))
1621
1622/* String ops must return a string. */
1623/* If the object is subclass of string, create a copy */
1624Py_LOCAL(PyBytesObject *)
1625return_self(PyBytesObject *self)
1626{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001627 if (PyBytes_CheckExact(self)) {
1628 Py_INCREF(self);
1629 return self;
1630 }
1631 return (PyBytesObject *)PyBytes_FromStringAndSize(
1632 PyBytes_AS_STRING(self),
1633 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001634}
1635
1636Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001637countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001638{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001639 Py_ssize_t count=0;
1640 const char *start=target;
1641 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001642
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643 while ( (start=findchar(start, end-start, c)) != NULL ) {
1644 count++;
1645 if (count >= maxcount)
1646 break;
1647 start += 1;
1648 }
1649 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001650}
1651
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001652
1653/* Algorithms for different cases of string replacement */
1654
1655/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1656Py_LOCAL(PyBytesObject *)
1657replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001658 const char *to_s, Py_ssize_t to_len,
1659 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001660{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001661 char *self_s, *result_s;
1662 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001663 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001664 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001665
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001666 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001667
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001668 /* 1 at the end plus 1 after every character;
1669 count = min(maxcount, self_len + 1) */
1670 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001671 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001672 else
1673 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1674 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001675
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001676 /* Check for overflow */
1677 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001678 assert(count > 0);
1679 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001680 PyErr_SetString(PyExc_OverflowError,
1681 "replacement bytes are too long");
1682 return NULL;
1683 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001684 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001685
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001686 if (! (result = (PyBytesObject *)
1687 PyBytes_FromStringAndSize(NULL, result_len)) )
1688 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001689
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001690 self_s = PyBytes_AS_STRING(self);
1691 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001692
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001693 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001694
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001695 /* Lay the first one down (guaranteed this will occur) */
1696 Py_MEMCPY(result_s, to_s, to_len);
1697 result_s += to_len;
1698 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001699
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001700 for (i=0; i<count; i++) {
1701 *result_s++ = *self_s++;
1702 Py_MEMCPY(result_s, to_s, to_len);
1703 result_s += to_len;
1704 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001705
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001706 /* Copy the rest of the original string */
1707 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001708
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001709 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001710}
1711
1712/* Special case for deleting a single character */
1713/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1714Py_LOCAL(PyBytesObject *)
1715replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001716 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001717{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001718 char *self_s, *result_s;
1719 char *start, *next, *end;
1720 Py_ssize_t self_len, result_len;
1721 Py_ssize_t count;
1722 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001723
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001724 self_len = PyBytes_GET_SIZE(self);
1725 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001726
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001727 count = countchar(self_s, self_len, from_c, maxcount);
1728 if (count == 0) {
1729 return return_self(self);
1730 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001731
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001732 result_len = self_len - count; /* from_len == 1 */
1733 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001734
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001735 if ( (result = (PyBytesObject *)
1736 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1737 return NULL;
1738 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001739
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001740 start = self_s;
1741 end = self_s + self_len;
1742 while (count-- > 0) {
1743 next = findchar(start, end-start, from_c);
1744 if (next == NULL)
1745 break;
1746 Py_MEMCPY(result_s, start, next-start);
1747 result_s += (next-start);
1748 start = next+1;
1749 }
1750 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001751
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001752 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001753}
1754
1755/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1756
1757Py_LOCAL(PyBytesObject *)
1758replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001759 const char *from_s, Py_ssize_t from_len,
1760 Py_ssize_t maxcount) {
1761 char *self_s, *result_s;
1762 char *start, *next, *end;
1763 Py_ssize_t self_len, result_len;
1764 Py_ssize_t count, offset;
1765 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001766
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001767 self_len = PyBytes_GET_SIZE(self);
1768 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001769
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001770 count = stringlib_count(self_s, self_len,
1771 from_s, from_len,
1772 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001773
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001774 if (count == 0) {
1775 /* no matches */
1776 return return_self(self);
1777 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001778
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001779 result_len = self_len - (count * from_len);
1780 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001781
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001782 if ( (result = (PyBytesObject *)
1783 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1784 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001785
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001786 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001787
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001788 start = self_s;
1789 end = self_s + self_len;
1790 while (count-- > 0) {
1791 offset = stringlib_find(start, end-start,
1792 from_s, from_len,
1793 0);
1794 if (offset == -1)
1795 break;
1796 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001797
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001798 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001799
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001800 result_s += (next-start);
1801 start = next+from_len;
1802 }
1803 Py_MEMCPY(result_s, start, end-start);
1804 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001805}
1806
1807/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1808Py_LOCAL(PyBytesObject *)
1809replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001810 char from_c, char to_c,
1811 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001812{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001813 char *self_s, *result_s, *start, *end, *next;
1814 Py_ssize_t self_len;
1815 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001816
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001817 /* The result string will be the same size */
1818 self_s = PyBytes_AS_STRING(self);
1819 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001820
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001821 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001822
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001823 if (next == NULL) {
1824 /* No matches; return the original string */
1825 return return_self(self);
1826 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001827
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001828 /* Need to make a new string */
1829 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1830 if (result == NULL)
1831 return NULL;
1832 result_s = PyBytes_AS_STRING(result);
1833 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001834
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001835 /* change everything in-place, starting with this one */
1836 start = result_s + (next-self_s);
1837 *start = to_c;
1838 start++;
1839 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001840
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001841 while (--maxcount > 0) {
1842 next = findchar(start, end-start, from_c);
1843 if (next == NULL)
1844 break;
1845 *next = to_c;
1846 start = next+1;
1847 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001848
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001849 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001850}
1851
1852/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1853Py_LOCAL(PyBytesObject *)
1854replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001855 const char *from_s, Py_ssize_t from_len,
1856 const char *to_s, Py_ssize_t to_len,
1857 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001858{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001859 char *result_s, *start, *end;
1860 char *self_s;
1861 Py_ssize_t self_len, offset;
1862 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001863
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001864 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001865
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001866 self_s = PyBytes_AS_STRING(self);
1867 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001868
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001869 offset = stringlib_find(self_s, self_len,
1870 from_s, from_len,
1871 0);
1872 if (offset == -1) {
1873 /* No matches; return the original string */
1874 return return_self(self);
1875 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001876
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001877 /* Need to make a new string */
1878 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1879 if (result == NULL)
1880 return NULL;
1881 result_s = PyBytes_AS_STRING(result);
1882 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001883
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001884 /* change everything in-place, starting with this one */
1885 start = result_s + offset;
1886 Py_MEMCPY(start, to_s, from_len);
1887 start += from_len;
1888 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001889
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001890 while ( --maxcount > 0) {
1891 offset = stringlib_find(start, end-start,
1892 from_s, from_len,
1893 0);
1894 if (offset==-1)
1895 break;
1896 Py_MEMCPY(start+offset, to_s, from_len);
1897 start += offset+from_len;
1898 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001899
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001900 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001901}
1902
1903/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1904Py_LOCAL(PyBytesObject *)
1905replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001906 char from_c,
1907 const char *to_s, Py_ssize_t to_len,
1908 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001909{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001910 char *self_s, *result_s;
1911 char *start, *next, *end;
1912 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001913 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001914 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001915
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001916 self_s = PyBytes_AS_STRING(self);
1917 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001918
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001919 count = countchar(self_s, self_len, from_c, maxcount);
1920 if (count == 0) {
1921 /* no matches, return unchanged */
1922 return return_self(self);
1923 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001924
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001925 /* use the difference between current and new, hence the "-1" */
1926 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001927 assert(count > 0);
1928 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001929 PyErr_SetString(PyExc_OverflowError,
1930 "replacement bytes are too long");
1931 return NULL;
1932 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001933 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001934
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001935 if ( (result = (PyBytesObject *)
1936 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1937 return NULL;
1938 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001939
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001940 start = self_s;
1941 end = self_s + self_len;
1942 while (count-- > 0) {
1943 next = findchar(start, end-start, from_c);
1944 if (next == NULL)
1945 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001946
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001947 if (next == start) {
1948 /* replace with the 'to' */
1949 Py_MEMCPY(result_s, to_s, to_len);
1950 result_s += to_len;
1951 start += 1;
1952 } else {
1953 /* copy the unchanged old then the 'to' */
1954 Py_MEMCPY(result_s, start, next-start);
1955 result_s += (next-start);
1956 Py_MEMCPY(result_s, to_s, to_len);
1957 result_s += to_len;
1958 start = next+1;
1959 }
1960 }
1961 /* Copy the remainder of the remaining string */
1962 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001963
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001964 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001965}
1966
1967/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1968Py_LOCAL(PyBytesObject *)
1969replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001970 const char *from_s, Py_ssize_t from_len,
1971 const char *to_s, Py_ssize_t to_len,
1972 Py_ssize_t maxcount) {
1973 char *self_s, *result_s;
1974 char *start, *next, *end;
1975 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001976 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001977 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001978
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001979 self_s = PyBytes_AS_STRING(self);
1980 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001981
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001982 count = stringlib_count(self_s, self_len,
1983 from_s, from_len,
1984 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001985
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001986 if (count == 0) {
1987 /* no matches, return unchanged */
1988 return return_self(self);
1989 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001990
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001991 /* Check for overflow */
1992 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001993 assert(count > 0);
1994 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001995 PyErr_SetString(PyExc_OverflowError,
1996 "replacement bytes are too long");
1997 return NULL;
1998 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001999 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002000
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002001 if ( (result = (PyBytesObject *)
2002 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2003 return NULL;
2004 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002005
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002006 start = self_s;
2007 end = self_s + self_len;
2008 while (count-- > 0) {
2009 offset = stringlib_find(start, end-start,
2010 from_s, from_len,
2011 0);
2012 if (offset == -1)
2013 break;
2014 next = start+offset;
2015 if (next == start) {
2016 /* replace with the 'to' */
2017 Py_MEMCPY(result_s, to_s, to_len);
2018 result_s += to_len;
2019 start += from_len;
2020 } else {
2021 /* copy the unchanged old then the 'to' */
2022 Py_MEMCPY(result_s, start, next-start);
2023 result_s += (next-start);
2024 Py_MEMCPY(result_s, to_s, to_len);
2025 result_s += to_len;
2026 start = next+from_len;
2027 }
2028 }
2029 /* Copy the remainder of the remaining string */
2030 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002031
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002032 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002033}
2034
2035
2036Py_LOCAL(PyBytesObject *)
2037replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002038 const char *from_s, Py_ssize_t from_len,
2039 const char *to_s, Py_ssize_t to_len,
2040 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002041{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002042 if (maxcount < 0) {
2043 maxcount = PY_SSIZE_T_MAX;
2044 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2045 /* nothing to do; return the original string */
2046 return return_self(self);
2047 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002048
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002049 if (maxcount == 0 ||
2050 (from_len == 0 && to_len == 0)) {
2051 /* nothing to do; return the original string */
2052 return return_self(self);
2053 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002054
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002055 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002056
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002057 if (from_len == 0) {
2058 /* insert the 'to' string everywhere. */
2059 /* >>> "Python".replace("", ".") */
2060 /* '.P.y.t.h.o.n.' */
2061 return replace_interleave(self, to_s, to_len, maxcount);
2062 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002063
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002064 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2065 /* point for an empty self string to generate a non-empty string */
2066 /* Special case so the remaining code always gets a non-empty string */
2067 if (PyBytes_GET_SIZE(self) == 0) {
2068 return return_self(self);
2069 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002070
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002071 if (to_len == 0) {
2072 /* delete all occurrences of 'from' string */
2073 if (from_len == 1) {
2074 return replace_delete_single_character(
2075 self, from_s[0], maxcount);
2076 } else {
2077 return replace_delete_substring(self, from_s,
2078 from_len, maxcount);
2079 }
2080 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002082 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002083
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002084 if (from_len == to_len) {
2085 if (from_len == 1) {
2086 return replace_single_character_in_place(
2087 self,
2088 from_s[0],
2089 to_s[0],
2090 maxcount);
2091 } else {
2092 return replace_substring_in_place(
2093 self, from_s, from_len, to_s, to_len,
2094 maxcount);
2095 }
2096 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002097
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002098 /* Otherwise use the more generic algorithms */
2099 if (from_len == 1) {
2100 return replace_single_character(self, from_s[0],
2101 to_s, to_len, maxcount);
2102 } else {
2103 /* len('from')>=2, len('to')>=1 */
2104 return replace_substring(self, from_s, from_len, to_s, to_len,
2105 maxcount);
2106 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002107}
2108
2109PyDoc_STRVAR(replace__doc__,
2110"B.replace(old, new[, count]) -> bytes\n\
2111\n\
2112Return a copy of B with all occurrences of subsection\n\
2113old replaced by new. If the optional argument count is\n\
Senthil Kumaran9a9dd1c2010-09-08 12:50:29 +00002114given, only first count occurances are replaced.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002115
2116static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002117bytes_replace(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002118{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002119 Py_ssize_t count = -1;
2120 PyObject *from, *to;
2121 const char *from_s, *to_s;
2122 Py_ssize_t from_len, to_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002123
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002124 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2125 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002127 if (PyBytes_Check(from)) {
2128 from_s = PyBytes_AS_STRING(from);
2129 from_len = PyBytes_GET_SIZE(from);
2130 }
2131 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2132 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002134 if (PyBytes_Check(to)) {
2135 to_s = PyBytes_AS_STRING(to);
2136 to_len = PyBytes_GET_SIZE(to);
2137 }
2138 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2139 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002141 return (PyObject *)replace((PyBytesObject *) self,
2142 from_s, from_len,
2143 to_s, to_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002144}
2145
2146/** End DALKE **/
2147
2148/* Matches the end (direction >= 0) or start (direction < 0) of self
2149 * against substr, using the start and end arguments. Returns
2150 * -1 on error, 0 if not found and 1 if found.
2151 */
2152Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002153_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002154 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002155{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002156 Py_ssize_t len = PyBytes_GET_SIZE(self);
2157 Py_ssize_t slen;
2158 const char* sub;
2159 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002160
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002161 if (PyBytes_Check(substr)) {
2162 sub = PyBytes_AS_STRING(substr);
2163 slen = PyBytes_GET_SIZE(substr);
2164 }
2165 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2166 return -1;
2167 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002168
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002169 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002170
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002171 if (direction < 0) {
2172 /* startswith */
2173 if (start+slen > len)
2174 return 0;
2175 } else {
2176 /* endswith */
2177 if (end-start < slen || start > len)
2178 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002180 if (end-slen > start)
2181 start = end - slen;
2182 }
2183 if (end-start >= slen)
2184 return ! memcmp(str+start, sub, slen);
2185 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002186}
2187
2188
2189PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002190"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002191\n\
2192Return True if B starts with the specified prefix, False otherwise.\n\
2193With optional start, test B beginning at that position.\n\
2194With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002195prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002196
2197static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002198bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002199{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002200 Py_ssize_t start = 0;
2201 Py_ssize_t end = PY_SSIZE_T_MAX;
2202 PyObject *subobj;
2203 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002204
Jesus Ceaac451502011-04-20 17:09:23 +02002205 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002206 return NULL;
2207 if (PyTuple_Check(subobj)) {
2208 Py_ssize_t i;
2209 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2210 result = _bytes_tailmatch(self,
2211 PyTuple_GET_ITEM(subobj, i),
2212 start, end, -1);
2213 if (result == -1)
2214 return NULL;
2215 else if (result) {
2216 Py_RETURN_TRUE;
2217 }
2218 }
2219 Py_RETURN_FALSE;
2220 }
2221 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002222 if (result == -1) {
2223 if (PyErr_ExceptionMatches(PyExc_TypeError))
2224 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2225 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002226 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002227 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002228 else
2229 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002230}
2231
2232
2233PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002234"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002235\n\
2236Return True if B ends with the specified suffix, False otherwise.\n\
2237With optional start, test B beginning at that position.\n\
2238With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002239suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002240
2241static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002242bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002243{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002244 Py_ssize_t start = 0;
2245 Py_ssize_t end = PY_SSIZE_T_MAX;
2246 PyObject *subobj;
2247 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002248
Jesus Ceaac451502011-04-20 17:09:23 +02002249 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002250 return NULL;
2251 if (PyTuple_Check(subobj)) {
2252 Py_ssize_t i;
2253 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2254 result = _bytes_tailmatch(self,
2255 PyTuple_GET_ITEM(subobj, i),
2256 start, end, +1);
2257 if (result == -1)
2258 return NULL;
2259 else if (result) {
2260 Py_RETURN_TRUE;
2261 }
2262 }
2263 Py_RETURN_FALSE;
2264 }
2265 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002266 if (result == -1) {
2267 if (PyErr_ExceptionMatches(PyExc_TypeError))
2268 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2269 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002270 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002271 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002272 else
2273 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002274}
2275
2276
2277PyDoc_STRVAR(decode__doc__,
Victor Stinnerc911bbf2010-11-07 19:04:46 +00002278"B.decode(encoding='utf-8', errors='strict') -> str\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002279\n\
Victor Stinnere14e2122010-11-07 18:41:46 +00002280Decode B using the codec registered for encoding. Default encoding\n\
2281is 'utf-8'. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002282handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2283a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002284as well as any other name registerd with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002285able to handle UnicodeDecodeErrors.");
2286
2287static PyObject *
Benjamin Peterson308d6372009-09-18 21:42:35 +00002288bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002289{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002290 const char *encoding = NULL;
2291 const char *errors = NULL;
2292 static char *kwlist[] = {"encoding", "errors", 0};
Guido van Rossumd624f182006-04-24 13:47:05 +00002293
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002294 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2295 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002296 return PyUnicode_FromEncodedObject(self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002297}
2298
Guido van Rossum20188312006-05-05 15:15:40 +00002299
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002300PyDoc_STRVAR(splitlines__doc__,
2301"B.splitlines([keepends]) -> list of lines\n\
2302\n\
2303Return a list of the lines in B, breaking at line boundaries.\n\
2304Line breaks are not included in the resulting list unless keepends\n\
2305is given and true.");
2306
2307static PyObject*
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002308bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002309{
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002310 static char *kwlist[] = {"keepends", 0};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002311 int keepends = 0;
2312
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002313 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines",
2314 kwlist, &keepends))
Antoine Pitroud1188562010-06-09 16:38:55 +00002315 return NULL;
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002316
2317 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002318 (PyObject*) self, PyBytes_AS_STRING(self),
2319 PyBytes_GET_SIZE(self), keepends
2320 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002321}
2322
2323
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002324PyDoc_STRVAR(fromhex_doc,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002325"bytes.fromhex(string) -> bytes\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002326\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002327Create a bytes object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002328Spaces between two numbers are accepted.\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002329Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002330
2331static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002332hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002333{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002334 if (c >= 128)
2335 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002336 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002337 return c - '0';
2338 else {
David Malcolm96960882010-11-05 17:23:41 +00002339 if (Py_ISUPPER(c))
2340 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002341 if (c >= 'a' && c <= 'f')
2342 return c - 'a' + 10;
2343 }
2344 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002345}
2346
2347static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002348bytes_fromhex(PyObject *cls, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002349{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002350 PyObject *newstring, *hexobj;
2351 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002352 Py_ssize_t hexlen, byteslen, i, j;
2353 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002354 void *data;
2355 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002356
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002357 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2358 return NULL;
2359 assert(PyUnicode_Check(hexobj));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002360 if (PyUnicode_READY(hexobj))
2361 return NULL;
2362 kind = PyUnicode_KIND(hexobj);
2363 data = PyUnicode_DATA(hexobj);
2364 hexlen = PyUnicode_GET_LENGTH(hexobj);
2365
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002366 byteslen = hexlen/2; /* This overestimates if there are spaces */
2367 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2368 if (!newstring)
2369 return NULL;
2370 buf = PyBytes_AS_STRING(newstring);
2371 for (i = j = 0; i < hexlen; i += 2) {
2372 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002373 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002374 i++;
2375 if (i >= hexlen)
2376 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002377 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
2378 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002379 if (top == -1 || bot == -1) {
2380 PyErr_Format(PyExc_ValueError,
2381 "non-hexadecimal number found in "
2382 "fromhex() arg at position %zd", i);
2383 goto error;
2384 }
2385 buf[j++] = (top << 4) + bot;
2386 }
2387 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2388 goto error;
2389 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002390
2391 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002392 Py_XDECREF(newstring);
2393 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002394}
2395
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002396PyDoc_STRVAR(sizeof__doc__,
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002397"B.__sizeof__() -> size of B in memory, in bytes");
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002398
2399static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002400bytes_sizeof(PyBytesObject *v)
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002401{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002402 Py_ssize_t res;
2403 res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2404 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002405}
2406
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002407
2408static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002409bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002410{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002411 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002412}
2413
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002414
2415static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002416bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002417 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2418 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2419 _Py_capitalize__doc__},
2420 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2421 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2422 {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2423 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2424 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02002425 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002426 expandtabs__doc__},
2427 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2428 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2429 fromhex_doc},
2430 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2431 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2432 _Py_isalnum__doc__},
2433 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2434 _Py_isalpha__doc__},
2435 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2436 _Py_isdigit__doc__},
2437 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2438 _Py_islower__doc__},
2439 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2440 _Py_isspace__doc__},
2441 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2442 _Py_istitle__doc__},
2443 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2444 _Py_isupper__doc__},
2445 {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2446 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2447 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2448 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2449 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2450 _Py_maketrans__doc__},
2451 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2452 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2453 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2454 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2455 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2456 {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2457 rpartition__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002458 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002459 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
Ezio Melotticda6b6d2012-02-26 09:39:55 +02002460 {"split", (PyCFunction)bytes_split, METH_VARARGS | METH_KEYWORDS, split__doc__},
Mark Dickinson0d5f6ad2011-09-24 09:14:39 +01002461 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002462 splitlines__doc__},
2463 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2464 startswith__doc__},
2465 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2466 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2467 _Py_swapcase__doc__},
2468 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2469 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2470 translate__doc__},
2471 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2472 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2473 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2474 sizeof__doc__},
2475 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002476};
2477
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002478static PyObject *
2479str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2480
2481static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002482bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002483{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002484 PyObject *x = NULL;
2485 const char *encoding = NULL;
2486 const char *errors = NULL;
2487 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002488 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002489 Py_ssize_t size;
2490 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002491 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002492
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002493 if (type != &PyBytes_Type)
2494 return str_subtype_new(type, args, kwds);
2495 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2496 &encoding, &errors))
2497 return NULL;
2498 if (x == NULL) {
2499 if (encoding != NULL || errors != NULL) {
2500 PyErr_SetString(PyExc_TypeError,
2501 "encoding or errors without sequence "
2502 "argument");
2503 return NULL;
2504 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02002505 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002506 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002507
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002508 if (PyUnicode_Check(x)) {
2509 /* Encode via the codec registry */
2510 if (encoding == NULL) {
2511 PyErr_SetString(PyExc_TypeError,
2512 "string argument without an encoding");
2513 return NULL;
2514 }
2515 new = PyUnicode_AsEncodedString(x, encoding, errors);
2516 if (new == NULL)
2517 return NULL;
2518 assert(PyBytes_Check(new));
2519 return new;
2520 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002521
2522 /* We'd like to call PyObject_Bytes here, but we need to check for an
2523 integer argument before deferring to PyBytes_FromObject, something
2524 PyObject_Bytes doesn't do. */
2525 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2526 if (func != NULL) {
2527 new = PyObject_CallFunctionObjArgs(func, NULL);
2528 Py_DECREF(func);
2529 if (new == NULL)
2530 return NULL;
2531 if (!PyBytes_Check(new)) {
2532 PyErr_Format(PyExc_TypeError,
2533 "__bytes__ returned non-bytes (type %.200s)",
2534 Py_TYPE(new)->tp_name);
2535 Py_DECREF(new);
2536 return NULL;
2537 }
2538 return new;
2539 }
2540 else if (PyErr_Occurred())
2541 return NULL;
2542
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002543 /* Is it an integer? */
2544 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2545 if (size == -1 && PyErr_Occurred()) {
2546 if (PyErr_ExceptionMatches(PyExc_OverflowError))
2547 return NULL;
2548 PyErr_Clear();
2549 }
2550 else if (size < 0) {
2551 PyErr_SetString(PyExc_ValueError, "negative count");
2552 return NULL;
2553 }
2554 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02002555 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002556 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002557 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002558 return new;
2559 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002560
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002561 /* If it's not unicode, there can't be encoding or errors */
2562 if (encoding != NULL || errors != NULL) {
2563 PyErr_SetString(PyExc_TypeError,
2564 "encoding or errors without a string argument");
2565 return NULL;
2566 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06002567
2568 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00002569}
2570
2571PyObject *
2572PyBytes_FromObject(PyObject *x)
2573{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002574 PyObject *new, *it;
2575 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002576
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002577 if (x == NULL) {
2578 PyErr_BadInternalCall();
2579 return NULL;
2580 }
Larry Hastingsca28e992012-05-24 22:58:30 -07002581
2582 if (PyBytes_CheckExact(x)) {
2583 Py_INCREF(x);
2584 return x;
2585 }
2586
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002587 /* Use the modern buffer interface */
2588 if (PyObject_CheckBuffer(x)) {
2589 Py_buffer view;
2590 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2591 return NULL;
2592 new = PyBytes_FromStringAndSize(NULL, view.len);
2593 if (!new)
2594 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002595 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2596 &view, view.len, 'C') < 0)
2597 goto fail;
2598 PyBuffer_Release(&view);
2599 return new;
2600 fail:
2601 Py_XDECREF(new);
2602 PyBuffer_Release(&view);
2603 return NULL;
2604 }
2605 if (PyUnicode_Check(x)) {
2606 PyErr_SetString(PyExc_TypeError,
2607 "cannot convert unicode object to bytes");
2608 return NULL;
2609 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002610
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002611 if (PyList_CheckExact(x)) {
2612 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2613 if (new == NULL)
2614 return NULL;
2615 for (i = 0; i < Py_SIZE(x); i++) {
2616 Py_ssize_t value = PyNumber_AsSsize_t(
2617 PyList_GET_ITEM(x, i), PyExc_ValueError);
2618 if (value == -1 && PyErr_Occurred()) {
2619 Py_DECREF(new);
2620 return NULL;
2621 }
2622 if (value < 0 || value >= 256) {
2623 PyErr_SetString(PyExc_ValueError,
2624 "bytes must be in range(0, 256)");
2625 Py_DECREF(new);
2626 return NULL;
2627 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002628 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002629 }
2630 return new;
2631 }
2632 if (PyTuple_CheckExact(x)) {
2633 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2634 if (new == NULL)
2635 return NULL;
2636 for (i = 0; i < Py_SIZE(x); i++) {
2637 Py_ssize_t value = PyNumber_AsSsize_t(
2638 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2639 if (value == -1 && PyErr_Occurred()) {
2640 Py_DECREF(new);
2641 return NULL;
2642 }
2643 if (value < 0 || value >= 256) {
2644 PyErr_SetString(PyExc_ValueError,
2645 "bytes must be in range(0, 256)");
2646 Py_DECREF(new);
2647 return NULL;
2648 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002649 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002650 }
2651 return new;
2652 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00002653
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002654 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02002655 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002656 if (size == -1 && PyErr_Occurred())
2657 return NULL;
2658 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2659 returning a shared empty bytes string. This required because we
2660 want to call _PyBytes_Resize() the returned object, which we can
2661 only do on bytes objects with refcount == 1. */
2662 size += 1;
2663 new = PyBytes_FromStringAndSize(NULL, size);
2664 if (new == NULL)
2665 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002666
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002667 /* Get the iterator */
2668 it = PyObject_GetIter(x);
2669 if (it == NULL)
2670 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002672 /* Run the iterator to exhaustion */
2673 for (i = 0; ; i++) {
2674 PyObject *item;
2675 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002676
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002677 /* Get the next item */
2678 item = PyIter_Next(it);
2679 if (item == NULL) {
2680 if (PyErr_Occurred())
2681 goto error;
2682 break;
2683 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002684
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002685 /* Interpret it as an int (__index__) */
2686 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2687 Py_DECREF(item);
2688 if (value == -1 && PyErr_Occurred())
2689 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002691 /* Range check */
2692 if (value < 0 || value >= 256) {
2693 PyErr_SetString(PyExc_ValueError,
2694 "bytes must be in range(0, 256)");
2695 goto error;
2696 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002697
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002698 /* Append the byte */
2699 if (i >= size) {
2700 size = 2 * size + 1;
2701 if (_PyBytes_Resize(&new, size) < 0)
2702 goto error;
2703 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00002704 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002705 }
2706 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002707
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002708 /* Clean up and return success */
2709 Py_DECREF(it);
2710 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002711
2712 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002713 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01002714 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002715 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002716}
2717
2718static PyObject *
2719str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2720{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002721 PyObject *tmp, *pnew;
2722 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002723
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002724 assert(PyType_IsSubtype(type, &PyBytes_Type));
2725 tmp = bytes_new(&PyBytes_Type, args, kwds);
2726 if (tmp == NULL)
2727 return NULL;
2728 assert(PyBytes_CheckExact(tmp));
2729 n = PyBytes_GET_SIZE(tmp);
2730 pnew = type->tp_alloc(type, n);
2731 if (pnew != NULL) {
2732 Py_MEMCPY(PyBytes_AS_STRING(pnew),
2733 PyBytes_AS_STRING(tmp), n+1);
2734 ((PyBytesObject *)pnew)->ob_shash =
2735 ((PyBytesObject *)tmp)->ob_shash;
2736 }
2737 Py_DECREF(tmp);
2738 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002739}
2740
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002741PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002742"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002743bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00002744bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002745bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2746bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002747\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002748Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002749 - an iterable yielding integers in range(256)\n\
2750 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01002751 - any object implementing the buffer API.\n\
2752 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00002753
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002754static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002755
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002756PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002757 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2758 "bytes",
2759 PyBytesObject_SIZE,
2760 sizeof(char),
2761 bytes_dealloc, /* tp_dealloc */
2762 0, /* tp_print */
2763 0, /* tp_getattr */
2764 0, /* tp_setattr */
2765 0, /* tp_reserved */
2766 (reprfunc)bytes_repr, /* tp_repr */
2767 0, /* tp_as_number */
2768 &bytes_as_sequence, /* tp_as_sequence */
2769 &bytes_as_mapping, /* tp_as_mapping */
2770 (hashfunc)bytes_hash, /* tp_hash */
2771 0, /* tp_call */
2772 bytes_str, /* tp_str */
2773 PyObject_GenericGetAttr, /* tp_getattro */
2774 0, /* tp_setattro */
2775 &bytes_as_buffer, /* tp_as_buffer */
2776 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2777 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2778 bytes_doc, /* tp_doc */
2779 0, /* tp_traverse */
2780 0, /* tp_clear */
2781 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2782 0, /* tp_weaklistoffset */
2783 bytes_iter, /* tp_iter */
2784 0, /* tp_iternext */
2785 bytes_methods, /* tp_methods */
2786 0, /* tp_members */
2787 0, /* tp_getset */
2788 &PyBaseObject_Type, /* tp_base */
2789 0, /* tp_dict */
2790 0, /* tp_descr_get */
2791 0, /* tp_descr_set */
2792 0, /* tp_dictoffset */
2793 0, /* tp_init */
2794 0, /* tp_alloc */
2795 bytes_new, /* tp_new */
2796 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002797};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002798
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002799void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002800PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002801{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002802 assert(pv != NULL);
2803 if (*pv == NULL)
2804 return;
2805 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002806 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002807 return;
2808 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02002809
2810 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2811 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05002812 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02002813 Py_buffer wb;
2814
2815 wb.len = -1;
2816 if (_getbuffer(w, &wb) < 0) {
2817 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2818 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2819 Py_CLEAR(*pv);
2820 return;
2821 }
2822
2823 oldsize = PyBytes_GET_SIZE(*pv);
2824 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2825 PyErr_NoMemory();
2826 goto error;
2827 }
2828 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2829 goto error;
2830
2831 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2832 PyBuffer_Release(&wb);
2833 return;
2834
2835 error:
2836 PyBuffer_Release(&wb);
2837 Py_CLEAR(*pv);
2838 return;
2839 }
2840
2841 else {
2842 /* Multiple references, need to create new object */
2843 PyObject *v;
2844 v = bytes_concat(*pv, w);
2845 Py_DECREF(*pv);
2846 *pv = v;
2847 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002848}
2849
2850void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002851PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002852{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002853 PyBytes_Concat(pv, w);
2854 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002855}
2856
2857
2858/* The following function breaks the notion that strings are immutable:
2859 it changes the size of a string. We get away with this only if there
2860 is only one module referencing the object. You can also think of it
2861 as creating a new string object and destroying the old one, only
2862 more efficiently. In any case, don't use this if the string may
2863 already be known to some other part of the code...
2864 Note that if there's not enough memory to resize the string, the original
2865 string object at *pv is deallocated, *pv is set to NULL, an "out of
2866 memory" exception is set, and -1 is returned. Else (on success) 0 is
2867 returned, and the value in *pv may or may not be the same as on input.
2868 As always, an extra byte is allocated for a trailing \0 byte (newsize
2869 does *not* include that), and a trailing \0 byte is stored.
2870*/
2871
2872int
2873_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2874{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002875 PyObject *v;
2876 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002877 v = *pv;
2878 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2879 *pv = 0;
2880 Py_DECREF(v);
2881 PyErr_BadInternalCall();
2882 return -1;
2883 }
2884 /* XXX UNREF/NEWREF interface should be more symmetrical */
2885 _Py_DEC_REFTOTAL;
2886 _Py_ForgetReference(v);
2887 *pv = (PyObject *)
2888 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2889 if (*pv == NULL) {
2890 PyObject_Del(v);
2891 PyErr_NoMemory();
2892 return -1;
2893 }
2894 _Py_NewReference(*pv);
2895 sv = (PyBytesObject *) *pv;
2896 Py_SIZE(sv) = newsize;
2897 sv->ob_sval[newsize] = '\0';
2898 sv->ob_shash = -1; /* invalidate cached hash value */
2899 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002900}
2901
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002902void
2903PyBytes_Fini(void)
2904{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002905 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02002906 for (i = 0; i < UCHAR_MAX + 1; i++)
2907 Py_CLEAR(characters[i]);
2908 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002909}
2910
Benjamin Peterson4116f362008-05-27 00:36:20 +00002911/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002912
2913typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002914 PyObject_HEAD
2915 Py_ssize_t it_index;
2916 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002917} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002918
2919static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002920striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002921{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002922 _PyObject_GC_UNTRACK(it);
2923 Py_XDECREF(it->it_seq);
2924 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002925}
2926
2927static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002928striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002929{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002930 Py_VISIT(it->it_seq);
2931 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002932}
2933
2934static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002935striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002936{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002937 PyBytesObject *seq;
2938 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002939
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002940 assert(it != NULL);
2941 seq = it->it_seq;
2942 if (seq == NULL)
2943 return NULL;
2944 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002945
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002946 if (it->it_index < PyBytes_GET_SIZE(seq)) {
2947 item = PyLong_FromLong(
2948 (unsigned char)seq->ob_sval[it->it_index]);
2949 if (item != NULL)
2950 ++it->it_index;
2951 return item;
2952 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002953
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002954 Py_DECREF(seq);
2955 it->it_seq = NULL;
2956 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002957}
2958
2959static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002960striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002961{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002962 Py_ssize_t len = 0;
2963 if (it->it_seq)
2964 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
2965 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002966}
2967
2968PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002969 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00002970
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002971static PyObject *
2972striter_reduce(striterobject *it)
2973{
2974 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02002975 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002976 it->it_seq, it->it_index);
2977 } else {
2978 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
2979 if (u == NULL)
2980 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02002981 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00002982 }
2983}
2984
2985PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2986
2987static PyObject *
2988striter_setstate(striterobject *it, PyObject *state)
2989{
2990 Py_ssize_t index = PyLong_AsSsize_t(state);
2991 if (index == -1 && PyErr_Occurred())
2992 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00002993 if (it->it_seq != NULL) {
2994 if (index < 0)
2995 index = 0;
2996 else if (index > PyBytes_GET_SIZE(it->it_seq))
2997 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
2998 it->it_index = index;
2999 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003000 Py_RETURN_NONE;
3001}
3002
3003PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3004
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003005static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003006 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3007 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003008 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3009 reduce_doc},
3010 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3011 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003012 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003013};
3014
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003015PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003016 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3017 "bytes_iterator", /* tp_name */
3018 sizeof(striterobject), /* tp_basicsize */
3019 0, /* tp_itemsize */
3020 /* methods */
3021 (destructor)striter_dealloc, /* tp_dealloc */
3022 0, /* tp_print */
3023 0, /* tp_getattr */
3024 0, /* tp_setattr */
3025 0, /* tp_reserved */
3026 0, /* tp_repr */
3027 0, /* tp_as_number */
3028 0, /* tp_as_sequence */
3029 0, /* tp_as_mapping */
3030 0, /* tp_hash */
3031 0, /* tp_call */
3032 0, /* tp_str */
3033 PyObject_GenericGetAttr, /* tp_getattro */
3034 0, /* tp_setattro */
3035 0, /* tp_as_buffer */
3036 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3037 0, /* tp_doc */
3038 (traverseproc)striter_traverse, /* tp_traverse */
3039 0, /* tp_clear */
3040 0, /* tp_richcompare */
3041 0, /* tp_weaklistoffset */
3042 PyObject_SelfIter, /* tp_iter */
3043 (iternextfunc)striter_next, /* tp_iternext */
3044 striter_methods, /* tp_methods */
3045 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003046};
3047
3048static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003049bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003050{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003051 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003052
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003053 if (!PyBytes_Check(seq)) {
3054 PyErr_BadInternalCall();
3055 return NULL;
3056 }
3057 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3058 if (it == NULL)
3059 return NULL;
3060 it->it_index = 0;
3061 Py_INCREF(seq);
3062 it->it_seq = (PyBytesObject *)seq;
3063 _PyObject_GC_TRACK(it);
3064 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003065}