blob: ca565eb5666de86de2f1bcf60f7858259047ef21 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020010/*[clinic input]
Martin v. Löwis0efea322014-07-27 17:29:17 +020011class bytes "PyBytesObject*" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020012[clinic start generated code]*/
Martin v. Löwis0efea322014-07-27 17:29:17 +020013/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020014
Neal Norwitz2bad9702007-08-27 06:19:22 +000015static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000016_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000017{
Antoine Pitroucfc22b42012-10-16 21:07:23 +020018 PyBufferProcs *bufferprocs;
19 if (PyBytes_CheckExact(obj)) {
20 /* Fast path, e.g. for .join() of many bytes objects */
21 Py_INCREF(obj);
22 view->obj = obj;
23 view->buf = PyBytes_AS_STRING(obj);
24 view->len = PyBytes_GET_SIZE(obj);
25 return view->len;
26 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000027
Antoine Pitroucfc22b42012-10-16 21:07:23 +020028 bufferprocs = Py_TYPE(obj)->tp_as_buffer;
29 if (bufferprocs == NULL || bufferprocs->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000030 {
Antoine Pitroud1188562010-06-09 16:38:55 +000031 PyErr_Format(PyExc_TypeError,
32 "Type %.100s doesn't support the buffer API",
33 Py_TYPE(obj)->tp_name);
34 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000035 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000036
Antoine Pitroucfc22b42012-10-16 21:07:23 +020037 if (bufferprocs->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000038 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000039 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000040}
41
Christian Heimes2c9c7a52008-05-26 13:42:13 +000042#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000043Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000045
Christian Heimes2c9c7a52008-05-26 13:42:13 +000046static PyBytesObject *characters[UCHAR_MAX + 1];
47static PyBytesObject *nullstring;
48
Mark Dickinsonfd24b322008-12-06 15:33:31 +000049/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
50 for a string of length n should request PyBytesObject_SIZE + n bytes.
51
52 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
53 3 bytes per string allocation on a typical system.
54*/
55#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
56
Christian Heimes2c9c7a52008-05-26 13:42:13 +000057/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000058 For PyBytes_FromString(), the parameter `str' points to a null-terminated
59 string containing exactly `size' bytes.
60
61 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
62 either NULL or else points to a string containing at least `size' bytes.
63 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
64 not have to be null-terminated. (Therefore it is safe to construct a
65 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
66 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
67 bytes (setting the last byte to the null terminating character) and you can
68 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000069 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070 alter the data yourself, since the strings may be shared.
71
72 The PyObject member `op->ob_size', which denotes the number of "extra
73 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020074 allocated for string data, not counting the null terminating character.
75 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000076 PyBytes_FromStringAndSize()) or the length of the string in the `str'
77 parameter (for PyBytes_FromString()).
78*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020079static PyObject *
80_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000081{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020082 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020083 assert(size >= 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000085#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000086 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000087#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000088 Py_INCREF(op);
89 return (PyObject *)op;
90 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000092 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
93 PyErr_SetString(PyExc_OverflowError,
94 "byte string is too large");
95 return NULL;
96 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000097
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000098 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020099 if (use_calloc)
100 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
101 else
102 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000103 if (op == NULL)
104 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100105 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +0200107 if (!use_calloc)
108 op->ob_sval[size] = '\0';
109 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000110 if (size == 0) {
111 nullstring = op;
112 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200113 }
114 return (PyObject *) op;
115}
116
117PyObject *
118PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
119{
120 PyBytesObject *op;
121 if (size < 0) {
122 PyErr_SetString(PyExc_SystemError,
123 "Negative size passed to PyBytes_FromStringAndSize");
124 return NULL;
125 }
126 if (size == 1 && str != NULL &&
127 (op = characters[*str & UCHAR_MAX]) != NULL)
128 {
129#ifdef COUNT_ALLOCS
130 one_strings++;
131#endif
132 Py_INCREF(op);
133 return (PyObject *)op;
134 }
135
136 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
137 if (op == NULL)
138 return NULL;
139 if (str == NULL)
140 return (PyObject *) op;
141
142 Py_MEMCPY(op->ob_sval, str, size);
143 /* share short strings */
144 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 characters[*str & UCHAR_MAX] = op;
146 Py_INCREF(op);
147 }
148 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000149}
150
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000151PyObject *
152PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000153{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200154 size_t size;
155 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000156
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000157 assert(str != NULL);
158 size = strlen(str);
159 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
160 PyErr_SetString(PyExc_OverflowError,
161 "byte string is too long");
162 return NULL;
163 }
164 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000165#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000167#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000168 Py_INCREF(op);
169 return (PyObject *)op;
170 }
171 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000172#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000173 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000174#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 Py_INCREF(op);
176 return (PyObject *)op;
177 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000178
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000179 /* Inline PyObject_NewVar */
180 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
181 if (op == NULL)
182 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100183 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 op->ob_shash = -1;
185 Py_MEMCPY(op->ob_sval, str, size+1);
186 /* share short strings */
187 if (size == 0) {
188 nullstring = op;
189 Py_INCREF(op);
190 } else if (size == 1) {
191 characters[*str & UCHAR_MAX] = op;
192 Py_INCREF(op);
193 }
194 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000195}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000196
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000197PyObject *
198PyBytes_FromFormatV(const char *format, va_list vargs)
199{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000200 va_list count;
201 Py_ssize_t n = 0;
202 const char* f;
203 char *s;
204 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000205
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000206 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000207 /* step 1: figure out how large a buffer we need */
208 for (f = format; *f; f++) {
209 if (*f == '%') {
210 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000211 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000212 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000213
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000214 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
215 * they don't affect the amount of space we reserve.
216 */
217 if ((*f == 'l' || *f == 'z') &&
218 (f[1] == 'd' || f[1] == 'u'))
219 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000220
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000221 switch (*f) {
222 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100223 {
224 int c = va_arg(count, int);
225 if (c < 0 || c > 255) {
226 PyErr_SetString(PyExc_OverflowError,
227 "PyBytes_FromFormatV(): %c format "
228 "expects an integer in range [0; 255]");
229 return NULL;
230 }
231 n++;
232 break;
233 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000234 case '%':
235 n++;
236 break;
237 case 'd': case 'u': case 'i': case 'x':
238 (void) va_arg(count, int);
239 /* 20 bytes is enough to hold a 64-bit
240 integer. Decimal takes the most space.
241 This isn't enough for octal. */
242 n += 20;
243 break;
244 case 's':
245 s = va_arg(count, char*);
246 n += strlen(s);
247 break;
248 case 'p':
249 (void) va_arg(count, int);
250 /* maximum 64-bit pointer representation:
251 * 0xffffffffffffffff
252 * so 19 characters is enough.
253 * XXX I count 18 -- what's the extra for?
254 */
255 n += 19;
256 break;
257 default:
258 /* if we stumble upon an unknown
259 formatting code, copy the rest of
260 the format string to the output
261 string. (we cannot just skip the
262 code, since there's no way to know
263 what's in the argument list) */
264 n += strlen(p);
265 goto expand;
266 }
267 } else
268 n++;
269 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000270 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000271 /* step 2: fill the buffer */
272 /* Since we've analyzed how much space we need for the worst case,
273 use sprintf directly instead of the slower PyOS_snprintf. */
274 string = PyBytes_FromStringAndSize(NULL, n);
275 if (!string)
276 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000277
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000278 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000279
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000280 for (f = format; *f; f++) {
281 if (*f == '%') {
282 const char* p = f++;
283 Py_ssize_t i;
284 int longflag = 0;
285 int size_tflag = 0;
286 /* parse the width.precision part (we're only
287 interested in the precision value, if any) */
288 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000289 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000290 n = (n*10) + *f++ - '0';
291 if (*f == '.') {
292 f++;
293 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000294 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000295 n = (n*10) + *f++ - '0';
296 }
David Malcolm96960882010-11-05 17:23:41 +0000297 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000298 f++;
299 /* handle the long flag, but only for %ld and %lu.
300 others can be added when necessary. */
301 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
302 longflag = 1;
303 ++f;
304 }
305 /* handle the size_t flag. */
306 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
307 size_tflag = 1;
308 ++f;
309 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000310
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000311 switch (*f) {
312 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100313 {
314 int c = va_arg(vargs, int);
315 /* c has been checked for overflow in the first step */
316 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000317 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100318 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000319 case 'd':
320 if (longflag)
321 sprintf(s, "%ld", va_arg(vargs, long));
322 else if (size_tflag)
323 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
324 va_arg(vargs, Py_ssize_t));
325 else
326 sprintf(s, "%d", va_arg(vargs, int));
327 s += strlen(s);
328 break;
329 case 'u':
330 if (longflag)
331 sprintf(s, "%lu",
332 va_arg(vargs, unsigned long));
333 else if (size_tflag)
334 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
335 va_arg(vargs, size_t));
336 else
337 sprintf(s, "%u",
338 va_arg(vargs, unsigned int));
339 s += strlen(s);
340 break;
341 case 'i':
342 sprintf(s, "%i", va_arg(vargs, int));
343 s += strlen(s);
344 break;
345 case 'x':
346 sprintf(s, "%x", va_arg(vargs, int));
347 s += strlen(s);
348 break;
349 case 's':
350 p = va_arg(vargs, char*);
351 i = strlen(p);
352 if (n > 0 && i > n)
353 i = n;
354 Py_MEMCPY(s, p, i);
355 s += i;
356 break;
357 case 'p':
358 sprintf(s, "%p", va_arg(vargs, void*));
359 /* %p is ill-defined: ensure leading 0x. */
360 if (s[1] == 'X')
361 s[1] = 'x';
362 else if (s[1] != 'x') {
363 memmove(s+2, s, strlen(s)+1);
364 s[0] = '0';
365 s[1] = 'x';
366 }
367 s += strlen(s);
368 break;
369 case '%':
370 *s++ = '%';
371 break;
372 default:
373 strcpy(s, p);
374 s += strlen(s);
375 goto end;
376 }
377 } else
378 *s++ = *f;
379 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000380
381 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000382 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
383 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000384}
385
386PyObject *
387PyBytes_FromFormat(const char *format, ...)
388{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000389 PyObject* ret;
390 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000391
392#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000393 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000394#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000395 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000396#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000397 ret = PyBytes_FromFormatV(format, vargs);
398 va_end(vargs);
399 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000400}
401
402static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000403bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000404{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000405 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000406}
407
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000408/* Unescape a backslash-escaped string. If unicode is non-zero,
409 the string is a u-literal. If recode_encoding is non-zero,
410 the string is UTF-8 encoded and should be re-encoded in the
411 specified encoding. */
412
413PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000414 Py_ssize_t len,
415 const char *errors,
416 Py_ssize_t unicode,
417 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000418{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000419 int c;
420 char *p, *buf;
421 const char *end;
422 PyObject *v;
423 Py_ssize_t newlen = recode_encoding ? 4*len:len;
424 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
425 if (v == NULL)
426 return NULL;
427 p = buf = PyBytes_AsString(v);
428 end = s + len;
429 while (s < end) {
430 if (*s != '\\') {
431 non_esc:
432 if (recode_encoding && (*s & 0x80)) {
433 PyObject *u, *w;
434 char *r;
435 const char* t;
436 Py_ssize_t rn;
437 t = s;
438 /* Decode non-ASCII bytes as UTF-8. */
439 while (t < end && (*t & 0x80)) t++;
440 u = PyUnicode_DecodeUTF8(s, t - s, errors);
441 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000442
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000443 /* Recode them in target encoding. */
444 w = PyUnicode_AsEncodedString(
445 u, recode_encoding, errors);
446 Py_DECREF(u);
447 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000448
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000449 /* Append bytes to output buffer. */
450 assert(PyBytes_Check(w));
451 r = PyBytes_AS_STRING(w);
452 rn = PyBytes_GET_SIZE(w);
453 Py_MEMCPY(p, r, rn);
454 p += rn;
455 Py_DECREF(w);
456 s = t;
457 } else {
458 *p++ = *s++;
459 }
460 continue;
461 }
462 s++;
463 if (s==end) {
464 PyErr_SetString(PyExc_ValueError,
465 "Trailing \\ in string");
466 goto failed;
467 }
468 switch (*s++) {
469 /* XXX This assumes ASCII! */
470 case '\n': break;
471 case '\\': *p++ = '\\'; break;
472 case '\'': *p++ = '\''; break;
473 case '\"': *p++ = '\"'; break;
474 case 'b': *p++ = '\b'; break;
475 case 'f': *p++ = '\014'; break; /* FF */
476 case 't': *p++ = '\t'; break;
477 case 'n': *p++ = '\n'; break;
478 case 'r': *p++ = '\r'; break;
479 case 'v': *p++ = '\013'; break; /* VT */
480 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
481 case '0': case '1': case '2': case '3':
482 case '4': case '5': case '6': case '7':
483 c = s[-1] - '0';
484 if (s < end && '0' <= *s && *s <= '7') {
485 c = (c<<3) + *s++ - '0';
486 if (s < end && '0' <= *s && *s <= '7')
487 c = (c<<3) + *s++ - '0';
488 }
489 *p++ = c;
490 break;
491 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000492 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000493 unsigned int x = 0;
494 c = Py_CHARMASK(*s);
495 s++;
David Malcolm96960882010-11-05 17:23:41 +0000496 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000497 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000498 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000499 x = 10 + c - 'a';
500 else
501 x = 10 + c - 'A';
502 x = x << 4;
503 c = Py_CHARMASK(*s);
504 s++;
David Malcolm96960882010-11-05 17:23:41 +0000505 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000506 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000507 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000508 x += 10 + c - 'a';
509 else
510 x += 10 + c - 'A';
511 *p++ = x;
512 break;
513 }
514 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200515 PyErr_Format(PyExc_ValueError,
516 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +0200517 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000518 goto failed;
519 }
520 if (strcmp(errors, "replace") == 0) {
521 *p++ = '?';
522 } else if (strcmp(errors, "ignore") == 0)
523 /* do nothing */;
524 else {
525 PyErr_Format(PyExc_ValueError,
526 "decoding error; unknown "
527 "error handling code: %.400s",
528 errors);
529 goto failed;
530 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +0200531 /* skip \x */
532 if (s < end && Py_ISXDIGIT(s[0]))
533 s++; /* and a hexdigit */
534 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000535 default:
536 *p++ = '\\';
537 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200538 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 UTF-8 bytes may follow. */
540 }
541 }
542 if (p-buf < newlen)
543 _PyBytes_Resize(&v, p - buf);
544 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000545 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000546 Py_DECREF(v);
547 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000548}
549
550/* -------------------------------------------------------------------- */
551/* object api */
552
553Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200554PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000555{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000556 if (!PyBytes_Check(op)) {
557 PyErr_Format(PyExc_TypeError,
558 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
559 return -1;
560 }
561 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000562}
563
564char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200565PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000566{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000567 if (!PyBytes_Check(op)) {
568 PyErr_Format(PyExc_TypeError,
569 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
570 return NULL;
571 }
572 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000573}
574
575int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200576PyBytes_AsStringAndSize(PyObject *obj,
577 char **s,
578 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000579{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000580 if (s == NULL) {
581 PyErr_BadInternalCall();
582 return -1;
583 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000584
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000585 if (!PyBytes_Check(obj)) {
586 PyErr_Format(PyExc_TypeError,
587 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
588 return -1;
589 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000590
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000591 *s = PyBytes_AS_STRING(obj);
592 if (len != NULL)
593 *len = PyBytes_GET_SIZE(obj);
594 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
595 PyErr_SetString(PyExc_TypeError,
596 "expected bytes with no null");
597 return -1;
598 }
599 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000600}
Neal Norwitz6968b052007-02-27 19:02:19 +0000601
602/* -------------------------------------------------------------------- */
603/* Methods */
604
Eric Smith0923d1d2009-04-16 20:16:10 +0000605#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000606
607#include "stringlib/fastsearch.h"
608#include "stringlib/count.h"
609#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +0200610#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000611#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000612#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000613#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000614
Eric Smith0f78bff2009-11-30 01:01:42 +0000615#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000616
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000617PyObject *
618PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000619{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200620 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200621 Py_ssize_t i, length = Py_SIZE(op);
622 size_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000623 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200624 unsigned char quote, *s, *p;
625
626 /* Compute size of output string */
627 squotes = dquotes = 0;
628 newsize = 3; /* b'' */
629 s = (unsigned char*)op->ob_sval;
630 for (i = 0; i < length; i++) {
631 switch(s[i]) {
632 case '\'': squotes++; newsize++; break;
633 case '"': dquotes++; newsize++; break;
634 case '\\': case '\t': case '\n': case '\r':
635 newsize += 2; break; /* \C */
636 default:
637 if (s[i] < ' ' || s[i] >= 0x7f)
638 newsize += 4; /* \xHH */
639 else
640 newsize++;
641 }
642 }
643 quote = '\'';
644 if (smartquotes && squotes && !dquotes)
645 quote = '"';
646 if (squotes && quote == '\'')
647 newsize += squotes;
Victor Stinner6430fd52011-09-29 04:02:13 +0200648
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200649 if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000650 PyErr_SetString(PyExc_OverflowError,
651 "bytes object is too large to make repr");
652 return NULL;
653 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200654
655 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000656 if (v == NULL) {
657 return NULL;
658 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200659 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000660
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200661 *p++ = 'b', *p++ = quote;
662 for (i = 0; i < length; i++) {
663 unsigned char c = op->ob_sval[i];
664 if (c == quote || c == '\\')
665 *p++ = '\\', *p++ = c;
666 else if (c == '\t')
667 *p++ = '\\', *p++ = 't';
668 else if (c == '\n')
669 *p++ = '\\', *p++ = 'n';
670 else if (c == '\r')
671 *p++ = '\\', *p++ = 'r';
672 else if (c < ' ' || c >= 0x7f) {
673 *p++ = '\\';
674 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200675 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
676 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000677 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200678 else
679 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000680 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200681 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +0200682 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200683 return v;
Neal Norwitz6968b052007-02-27 19:02:19 +0000684}
685
Neal Norwitz6968b052007-02-27 19:02:19 +0000686static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000687bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000688{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000689 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000690}
691
Neal Norwitz6968b052007-02-27 19:02:19 +0000692static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000693bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000694{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000695 if (Py_BytesWarningFlag) {
696 if (PyErr_WarnEx(PyExc_BytesWarning,
697 "str() on a bytes instance", 1))
698 return NULL;
699 }
700 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000701}
702
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000703static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000704bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000705{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000706 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000707}
Neal Norwitz6968b052007-02-27 19:02:19 +0000708
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000709/* This is also used by PyBytes_Concat() */
710static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000711bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000712{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000713 Py_ssize_t size;
714 Py_buffer va, vb;
715 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000716
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000717 va.len = -1;
718 vb.len = -1;
719 if (_getbuffer(a, &va) < 0 ||
720 _getbuffer(b, &vb) < 0) {
721 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
722 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
723 goto done;
724 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000725
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000726 /* Optimize end cases */
727 if (va.len == 0 && PyBytes_CheckExact(b)) {
728 result = b;
729 Py_INCREF(result);
730 goto done;
731 }
732 if (vb.len == 0 && PyBytes_CheckExact(a)) {
733 result = a;
734 Py_INCREF(result);
735 goto done;
736 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000737
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000738 size = va.len + vb.len;
739 if (size < 0) {
740 PyErr_NoMemory();
741 goto done;
742 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000743
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000744 result = PyBytes_FromStringAndSize(NULL, size);
745 if (result != NULL) {
746 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
747 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
748 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000749
750 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000751 if (va.len != -1)
752 PyBuffer_Release(&va);
753 if (vb.len != -1)
754 PyBuffer_Release(&vb);
755 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000756}
Neal Norwitz6968b052007-02-27 19:02:19 +0000757
758static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200759bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000760{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200761 Py_ssize_t i;
762 Py_ssize_t j;
763 Py_ssize_t size;
764 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000765 size_t nbytes;
766 if (n < 0)
767 n = 0;
768 /* watch out for overflows: the size can overflow int,
769 * and the # of bytes needed can overflow size_t
770 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000771 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000772 PyErr_SetString(PyExc_OverflowError,
773 "repeated bytes are too long");
774 return NULL;
775 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000776 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000777 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
778 Py_INCREF(a);
779 return (PyObject *)a;
780 }
781 nbytes = (size_t)size;
782 if (nbytes + PyBytesObject_SIZE <= nbytes) {
783 PyErr_SetString(PyExc_OverflowError,
784 "repeated bytes are too long");
785 return NULL;
786 }
787 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
788 if (op == NULL)
789 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100790 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000791 op->ob_shash = -1;
792 op->ob_sval[size] = '\0';
793 if (Py_SIZE(a) == 1 && n > 0) {
794 memset(op->ob_sval, a->ob_sval[0] , n);
795 return (PyObject *) op;
796 }
797 i = 0;
798 if (i < size) {
799 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
800 i = Py_SIZE(a);
801 }
802 while (i < size) {
803 j = (i <= size-i) ? i : size-i;
804 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
805 i += j;
806 }
807 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000808}
809
Guido van Rossum98297ee2007-11-06 21:34:58 +0000810static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000811bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000812{
813 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
814 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000815 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000816 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000817 PyErr_Clear();
818 if (_getbuffer(arg, &varg) < 0)
819 return -1;
820 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
821 varg.buf, varg.len, 0);
822 PyBuffer_Release(&varg);
823 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000824 }
825 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000826 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
827 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000828 }
829
Antoine Pitrou0010d372010-08-15 17:12:55 +0000830 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000831}
832
Neal Norwitz6968b052007-02-27 19:02:19 +0000833static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200834bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000835{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000836 if (i < 0 || i >= Py_SIZE(a)) {
837 PyErr_SetString(PyExc_IndexError, "index out of range");
838 return NULL;
839 }
840 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000841}
842
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100843Py_LOCAL(int)
844bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
845{
846 int cmp;
847 Py_ssize_t len;
848
849 len = Py_SIZE(a);
850 if (Py_SIZE(b) != len)
851 return 0;
852
853 if (a->ob_sval[0] != b->ob_sval[0])
854 return 0;
855
856 cmp = memcmp(a->ob_sval, b->ob_sval, len);
857 return (cmp == 0);
858}
859
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000860static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000861bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000862{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000863 int c;
864 Py_ssize_t len_a, len_b;
865 Py_ssize_t min_len;
866 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000867
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000868 /* Make sure both arguments are strings. */
869 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
870 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
871 (PyObject_IsInstance((PyObject*)a,
872 (PyObject*)&PyUnicode_Type) ||
873 PyObject_IsInstance((PyObject*)b,
874 (PyObject*)&PyUnicode_Type))) {
875 if (PyErr_WarnEx(PyExc_BytesWarning,
876 "Comparison between bytes and string", 1))
877 return NULL;
878 }
879 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000880 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100881 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000882 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100883 case Py_EQ:
884 case Py_LE:
885 case Py_GE:
886 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000887 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100888 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100889 case Py_NE:
890 case Py_LT:
891 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000892 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100893 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100894 default:
895 PyErr_BadArgument();
896 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000897 }
898 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100899 else if (op == Py_EQ || op == Py_NE) {
900 int eq = bytes_compare_eq(a, b);
901 eq ^= (op == Py_NE);
902 result = eq ? Py_True : Py_False;
903 }
904 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100905 len_a = Py_SIZE(a);
906 len_b = Py_SIZE(b);
907 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100908 if (min_len > 0) {
909 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100910 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100911 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000912 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100913 else
914 c = 0;
915 if (c == 0)
916 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
917 switch (op) {
918 case Py_LT: c = c < 0; break;
919 case Py_LE: c = c <= 0; break;
920 case Py_GT: c = c > 0; break;
921 case Py_GE: c = c >= 0; break;
922 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100923 PyErr_BadArgument();
924 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100925 }
926 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000927 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100928
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000929 Py_INCREF(result);
930 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000931}
932
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000933static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000934bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000935{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100936 if (a->ob_shash == -1) {
937 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +0100938 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100939 }
940 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +0000941}
942
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000943static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000944bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000945{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000946 if (PyIndex_Check(item)) {
947 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
948 if (i == -1 && PyErr_Occurred())
949 return NULL;
950 if (i < 0)
951 i += PyBytes_GET_SIZE(self);
952 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
953 PyErr_SetString(PyExc_IndexError,
954 "index out of range");
955 return NULL;
956 }
957 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
958 }
959 else if (PySlice_Check(item)) {
960 Py_ssize_t start, stop, step, slicelength, cur, i;
961 char* source_buf;
962 char* result_buf;
963 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000964
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000965 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000966 PyBytes_GET_SIZE(self),
967 &start, &stop, &step, &slicelength) < 0) {
968 return NULL;
969 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000970
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000971 if (slicelength <= 0) {
972 return PyBytes_FromStringAndSize("", 0);
973 }
974 else if (start == 0 && step == 1 &&
975 slicelength == PyBytes_GET_SIZE(self) &&
976 PyBytes_CheckExact(self)) {
977 Py_INCREF(self);
978 return (PyObject *)self;
979 }
980 else if (step == 1) {
981 return PyBytes_FromStringAndSize(
982 PyBytes_AS_STRING(self) + start,
983 slicelength);
984 }
985 else {
986 source_buf = PyBytes_AS_STRING(self);
987 result = PyBytes_FromStringAndSize(NULL, slicelength);
988 if (result == NULL)
989 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000990
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000991 result_buf = PyBytes_AS_STRING(result);
992 for (cur = start, i = 0; i < slicelength;
993 cur += step, i++) {
994 result_buf[i] = source_buf[cur];
995 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000996
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000997 return result;
998 }
999 }
1000 else {
1001 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001002 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001003 Py_TYPE(item)->tp_name);
1004 return NULL;
1005 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001006}
1007
1008static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001009bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001010{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001011 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1012 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001013}
1014
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001015static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001016 (lenfunc)bytes_length, /*sq_length*/
1017 (binaryfunc)bytes_concat, /*sq_concat*/
1018 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1019 (ssizeargfunc)bytes_item, /*sq_item*/
1020 0, /*sq_slice*/
1021 0, /*sq_ass_item*/
1022 0, /*sq_ass_slice*/
1023 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001024};
1025
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001026static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001027 (lenfunc)bytes_length,
1028 (binaryfunc)bytes_subscript,
1029 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001030};
1031
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001032static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001033 (getbufferproc)bytes_buffer_getbuffer,
1034 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001035};
1036
1037
1038#define LEFTSTRIP 0
1039#define RIGHTSTRIP 1
1040#define BOTHSTRIP 2
1041
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001042/*[clinic input]
1043bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001044
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001045 sep: object = None
1046 The delimiter according which to split the bytes.
1047 None (the default value) means split on ASCII whitespace characters
1048 (space, tab, return, newline, formfeed, vertical tab).
1049 maxsplit: Py_ssize_t = -1
1050 Maximum number of splits to do.
1051 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001052
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001053Return a list of the sections in the bytes, using sep as the delimiter.
1054[clinic start generated code]*/
1055
1056PyDoc_STRVAR(bytes_split__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001057"split($self, /, sep=None, maxsplit=-1)\n"
1058"--\n"
1059"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001060"Return a list of the sections in the bytes, using sep as the delimiter.\n"
1061"\n"
1062" sep\n"
1063" The delimiter according which to split the bytes.\n"
1064" None (the default value) means split on ASCII whitespace characters\n"
1065" (space, tab, return, newline, formfeed, vertical tab).\n"
1066" maxsplit\n"
1067" Maximum number of splits to do.\n"
1068" -1 (the default value) means no limit.");
1069
1070#define BYTES_SPLIT_METHODDEF \
1071 {"split", (PyCFunction)bytes_split, METH_VARARGS|METH_KEYWORDS, bytes_split__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001072
1073static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001074bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001075
1076static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001077bytes_split(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Neal Norwitz6968b052007-02-27 19:02:19 +00001078{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001079 PyObject *return_value = NULL;
1080 static char *_keywords[] = {"sep", "maxsplit", NULL};
1081 PyObject *sep = Py_None;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001082 Py_ssize_t maxsplit = -1;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001083
1084 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
1085 "|On:split", _keywords,
1086 &sep, &maxsplit))
1087 goto exit;
1088 return_value = bytes_split_impl(self, sep, maxsplit);
1089
1090exit:
1091 return return_value;
1092}
1093
1094static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001095bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
1096/*[clinic end generated code: output=c80a47afdd505975 input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001097{
1098 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001099 const char *s = PyBytes_AS_STRING(self), *sub;
1100 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001101 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001102
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001103 if (maxsplit < 0)
1104 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001105 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001106 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001107 if (_getbuffer(sep, &vsub) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 return NULL;
1109 sub = vsub.buf;
1110 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001111
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001112 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1113 PyBuffer_Release(&vsub);
1114 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001115}
1116
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001117/*[clinic input]
1118bytes.partition
1119
1120 self: self(type="PyBytesObject *")
1121 sep: object
1122 /
1123
1124Partition the bytes into three parts using the given separator.
1125
1126This will search for the separator sep in the bytes. If the separator is found,
1127returns a 3-tuple containing the part before the separator, the separator
1128itself, and the part after it.
1129
1130If the separator is not found, returns a 3-tuple containing the original bytes
1131object and two empty bytes objects.
1132[clinic start generated code]*/
1133
1134PyDoc_STRVAR(bytes_partition__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001135"partition($self, sep, /)\n"
1136"--\n"
1137"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001138"Partition the bytes into three parts using the given separator.\n"
1139"\n"
1140"This will search for the separator sep in the bytes. If the separator is found,\n"
1141"returns a 3-tuple containing the part before the separator, the separator\n"
1142"itself, and the part after it.\n"
1143"\n"
1144"If the separator is not found, returns a 3-tuple containing the original bytes\n"
1145"object and two empty bytes objects.");
1146
1147#define BYTES_PARTITION_METHODDEF \
1148 {"partition", (PyCFunction)bytes_partition, METH_O, bytes_partition__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001149
1150static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001151bytes_partition(PyBytesObject *self, PyObject *sep)
Martin v. Löwis0efea322014-07-27 17:29:17 +02001152/*[clinic end generated code: output=b41e119c873c08bc input=6c5b9dcc5a9fd62e]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001153{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001154 const char *sep_chars;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001155 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001156
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001157 if (PyBytes_Check(sep)) {
1158 sep_chars = PyBytes_AS_STRING(sep);
1159 sep_len = PyBytes_GET_SIZE(sep);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001160 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001161 else if (PyObject_AsCharBuffer(sep, &sep_chars, &sep_len))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001162 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001163
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001164 return stringlib_partition(
1165 (PyObject*) self,
1166 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001167 sep, sep_chars, sep_len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001168 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001169}
1170
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001171/*[clinic input]
1172bytes.rpartition
1173
1174 self: self(type="PyBytesObject *")
1175 sep: object
1176 /
1177
1178Partition the bytes into three parts using the given separator.
1179
1180This will search for the separator sep in the bytes, starting and the end. If
1181the separator is found, returns a 3-tuple containing the part before the
1182separator, the separator itself, and the part after it.
1183
1184If the separator is not found, returns a 3-tuple containing two empty bytes
1185objects and the original bytes object.
1186[clinic start generated code]*/
1187
1188PyDoc_STRVAR(bytes_rpartition__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001189"rpartition($self, sep, /)\n"
1190"--\n"
1191"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001192"Partition the bytes into three parts using the given separator.\n"
1193"\n"
1194"This will search for the separator sep in the bytes, starting and the end. If\n"
1195"the separator is found, returns a 3-tuple containing the part before the\n"
1196"separator, the separator itself, and the part after it.\n"
1197"\n"
1198"If the separator is not found, returns a 3-tuple containing two empty bytes\n"
1199"objects and the original bytes object.");
1200
1201#define BYTES_RPARTITION_METHODDEF \
1202 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, bytes_rpartition__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001203
1204static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001205bytes_rpartition(PyBytesObject *self, PyObject *sep)
Martin v. Löwis0efea322014-07-27 17:29:17 +02001206/*[clinic end generated code: output=3a620803657196ee input=79bc2932e78e5ce0]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001207{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001208 const char *sep_chars;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001209 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001210
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001211 if (PyBytes_Check(sep)) {
1212 sep_chars = PyBytes_AS_STRING(sep);
1213 sep_len = PyBytes_GET_SIZE(sep);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001214 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001215 else if (PyObject_AsCharBuffer(sep, &sep_chars, &sep_len))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001216 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001217
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001218 return stringlib_rpartition(
1219 (PyObject*) self,
1220 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001221 sep, sep_chars, sep_len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001222 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001223}
1224
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001225/*[clinic input]
1226bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001227
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001228Return a list of the sections in the bytes, using sep as the delimiter.
1229
1230Splitting is done starting at the end of the bytes and working to the front.
1231[clinic start generated code]*/
1232
1233PyDoc_STRVAR(bytes_rsplit__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001234"rsplit($self, /, sep=None, maxsplit=-1)\n"
1235"--\n"
1236"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001237"Return a list of the sections in the bytes, using sep as the delimiter.\n"
1238"\n"
1239" sep\n"
1240" The delimiter according which to split the bytes.\n"
1241" None (the default value) means split on ASCII whitespace characters\n"
1242" (space, tab, return, newline, formfeed, vertical tab).\n"
1243" maxsplit\n"
1244" Maximum number of splits to do.\n"
1245" -1 (the default value) means no limit.\n"
1246"\n"
1247"Splitting is done starting at the end of the bytes and working to the front.");
1248
1249#define BYTES_RSPLIT_METHODDEF \
1250 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS|METH_KEYWORDS, bytes_rsplit__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001251
Neal Norwitz6968b052007-02-27 19:02:19 +00001252static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001253bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001254
1255static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001256bytes_rsplit(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Neal Norwitz6968b052007-02-27 19:02:19 +00001257{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001258 PyObject *return_value = NULL;
1259 static char *_keywords[] = {"sep", "maxsplit", NULL};
1260 PyObject *sep = Py_None;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001261 Py_ssize_t maxsplit = -1;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001262
1263 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
1264 "|On:rsplit", _keywords,
1265 &sep, &maxsplit))
1266 goto exit;
1267 return_value = bytes_rsplit_impl(self, sep, maxsplit);
1268
1269exit:
1270 return return_value;
1271}
1272
1273static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001274bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
1275/*[clinic end generated code: output=f86feddedbd7b26d input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001276{
1277 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001278 const char *s = PyBytes_AS_STRING(self), *sub;
1279 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001280 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001281
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001282 if (maxsplit < 0)
1283 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001284 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001285 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001286 if (_getbuffer(sep, &vsub) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001287 return NULL;
1288 sub = vsub.buf;
1289 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001290
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001291 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1292 PyBuffer_Release(&vsub);
1293 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001294}
1295
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001296
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001297/*[clinic input]
1298bytes.join
1299
1300 iterable_of_bytes: object
1301 /
1302
1303Concatenate any number of bytes objects.
1304
1305The bytes whose method is called is inserted in between each pair.
1306
1307The result is returned as a new bytes object.
1308
1309Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1310[clinic start generated code]*/
1311
1312PyDoc_STRVAR(bytes_join__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001313"join($self, iterable_of_bytes, /)\n"
1314"--\n"
1315"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001316"Concatenate any number of bytes objects.\n"
1317"\n"
1318"The bytes whose method is called is inserted in between each pair.\n"
1319"\n"
1320"The result is returned as a new bytes object.\n"
1321"\n"
1322"Example: b\'.\'.join([b\'ab\', b\'pq\', b\'rs\']) -> b\'ab.pq.rs\'.");
1323
1324#define BYTES_JOIN_METHODDEF \
1325 {"join", (PyCFunction)bytes_join, METH_O, bytes_join__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001326
Neal Norwitz6968b052007-02-27 19:02:19 +00001327static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001328bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
1329/*[clinic end generated code: output=e541a14a8da97908 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001330{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001331 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001332}
1333
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001334PyObject *
1335_PyBytes_Join(PyObject *sep, PyObject *x)
1336{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001337 assert(sep != NULL && PyBytes_Check(sep));
1338 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001339 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001340}
1341
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001342/* helper macro to fixup start/end slice values */
1343#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001344 if (end > len) \
1345 end = len; \
1346 else if (end < 0) { \
1347 end += len; \
1348 if (end < 0) \
1349 end = 0; \
1350 } \
1351 if (start < 0) { \
1352 start += len; \
1353 if (start < 0) \
1354 start = 0; \
1355 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001356
1357Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001358bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001359{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001360 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001361 char byte;
1362 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001363 const char *sub;
1364 Py_ssize_t sub_len;
1365 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001366 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001367
Antoine Pitrouac65d962011-10-20 23:54:17 +02001368 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1369 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001371
Antoine Pitrouac65d962011-10-20 23:54:17 +02001372 if (subobj) {
1373 if (_getbuffer(subobj, &subbuf) < 0)
1374 return -2;
1375
1376 sub = subbuf.buf;
1377 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001378 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001379 else {
1380 sub = &byte;
1381 sub_len = 1;
1382 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001383
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001384 if (dir > 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001385 res = stringlib_find_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001386 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1387 sub, sub_len, start, end);
1388 else
Antoine Pitrouac65d962011-10-20 23:54:17 +02001389 res = stringlib_rfind_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1391 sub, sub_len, start, end);
Antoine Pitrouac65d962011-10-20 23:54:17 +02001392
1393 if (subobj)
1394 PyBuffer_Release(&subbuf);
1395
1396 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001397}
1398
1399
1400PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001401"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001402\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001403Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001404such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001405arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001406\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001407Return -1 on failure.");
1408
Neal Norwitz6968b052007-02-27 19:02:19 +00001409static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001410bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001411{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001412 Py_ssize_t result = bytes_find_internal(self, args, +1);
1413 if (result == -2)
1414 return NULL;
1415 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001416}
1417
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001418
1419PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001420"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001421\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001422Like B.find() but raise ValueError when the substring is not found.");
1423
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001424static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001425bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001426{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001427 Py_ssize_t result = bytes_find_internal(self, args, +1);
1428 if (result == -2)
1429 return NULL;
1430 if (result == -1) {
1431 PyErr_SetString(PyExc_ValueError,
1432 "substring not found");
1433 return NULL;
1434 }
1435 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001436}
1437
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001438
1439PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001440"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001441\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001442Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001443such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001444arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001445\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001446Return -1 on failure.");
1447
Neal Norwitz6968b052007-02-27 19:02:19 +00001448static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001449bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001450{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001451 Py_ssize_t result = bytes_find_internal(self, args, -1);
1452 if (result == -2)
1453 return NULL;
1454 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001455}
1456
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001457
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001458PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001459"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001460\n\
1461Like B.rfind() but raise ValueError when the substring is not found.");
1462
1463static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001464bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001465{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001466 Py_ssize_t result = bytes_find_internal(self, args, -1);
1467 if (result == -2)
1468 return NULL;
1469 if (result == -1) {
1470 PyErr_SetString(PyExc_ValueError,
1471 "substring not found");
1472 return NULL;
1473 }
1474 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001475}
1476
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001477
1478Py_LOCAL_INLINE(PyObject *)
1479do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001480{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001481 Py_buffer vsep;
1482 char *s = PyBytes_AS_STRING(self);
1483 Py_ssize_t len = PyBytes_GET_SIZE(self);
1484 char *sep;
1485 Py_ssize_t seplen;
1486 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001487
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001488 if (_getbuffer(sepobj, &vsep) < 0)
1489 return NULL;
1490 sep = vsep.buf;
1491 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001492
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001493 i = 0;
1494 if (striptype != RIGHTSTRIP) {
1495 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1496 i++;
1497 }
1498 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001499
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001500 j = len;
1501 if (striptype != LEFTSTRIP) {
1502 do {
1503 j--;
1504 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1505 j++;
1506 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001507
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001508 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001509
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001510 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1511 Py_INCREF(self);
1512 return (PyObject*)self;
1513 }
1514 else
1515 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001516}
1517
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001518
1519Py_LOCAL_INLINE(PyObject *)
1520do_strip(PyBytesObject *self, int striptype)
1521{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001522 char *s = PyBytes_AS_STRING(self);
1523 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001524
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001525 i = 0;
1526 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001527 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001528 i++;
1529 }
1530 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001531
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001532 j = len;
1533 if (striptype != LEFTSTRIP) {
1534 do {
1535 j--;
David Malcolm96960882010-11-05 17:23:41 +00001536 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001537 j++;
1538 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001539
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001540 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1541 Py_INCREF(self);
1542 return (PyObject*)self;
1543 }
1544 else
1545 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001546}
1547
1548
1549Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001550do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001551{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001552 if (bytes != NULL && bytes != Py_None) {
1553 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001554 }
1555 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001556}
1557
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001558/*[clinic input]
1559bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001560
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001561 self: self(type="PyBytesObject *")
1562 bytes: object = None
1563 /
1564
1565Strip leading and trailing bytes contained in the argument.
1566
1567If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1568[clinic start generated code]*/
1569
1570PyDoc_STRVAR(bytes_strip__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001571"strip($self, bytes=None, /)\n"
1572"--\n"
1573"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001574"Strip leading and trailing bytes contained in the argument.\n"
1575"\n"
1576"If the argument is omitted or None, strip leading and trailing ASCII whitespace.");
1577
1578#define BYTES_STRIP_METHODDEF \
1579 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, bytes_strip__doc__},
1580
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001581static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001582bytes_strip_impl(PyBytesObject *self, PyObject *bytes);
1583
1584static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001585bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001586{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001587 PyObject *return_value = NULL;
1588 PyObject *bytes = Py_None;
1589
1590 if (!PyArg_UnpackTuple(args, "strip",
1591 0, 1,
1592 &bytes))
1593 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02001594 return_value = bytes_strip_impl(self, bytes);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001595
1596exit:
1597 return return_value;
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001598}
1599
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001600static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001601bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Martin v. Löwis0efea322014-07-27 17:29:17 +02001602/*[clinic end generated code: output=c8234a599ba5ec35 input=37daa5fad1395d95]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001603{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001604 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001605}
1606
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001607/*[clinic input]
1608bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001609
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001610 self: self(type="PyBytesObject *")
1611 bytes: object = None
1612 /
1613
1614Strip leading bytes contained in the argument.
1615
1616If the argument is omitted or None, strip leading ASCII whitespace.
1617[clinic start generated code]*/
1618
1619PyDoc_STRVAR(bytes_lstrip__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001620"lstrip($self, bytes=None, /)\n"
1621"--\n"
1622"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001623"Strip leading bytes contained in the argument.\n"
1624"\n"
1625"If the argument is omitted or None, strip leading ASCII whitespace.");
1626
1627#define BYTES_LSTRIP_METHODDEF \
1628 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, bytes_lstrip__doc__},
1629
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001630static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001631bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes);
1632
1633static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001634bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001635{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001636 PyObject *return_value = NULL;
1637 PyObject *bytes = Py_None;
1638
1639 if (!PyArg_UnpackTuple(args, "lstrip",
1640 0, 1,
1641 &bytes))
1642 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02001643 return_value = bytes_lstrip_impl(self, bytes);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001644
1645exit:
1646 return return_value;
1647}
1648
1649static PyObject *
1650bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Martin v. Löwis0efea322014-07-27 17:29:17 +02001651/*[clinic end generated code: output=529e8511ab6f1115 input=88811b09dfbc2988]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001652{
1653 return do_argstrip(self, LEFTSTRIP, bytes);
1654}
1655
1656/*[clinic input]
1657bytes.rstrip
1658
1659 self: self(type="PyBytesObject *")
1660 bytes: object = None
1661 /
1662
1663Strip trailing bytes contained in the argument.
1664
1665If the argument is omitted or None, strip trailing ASCII whitespace.
1666[clinic start generated code]*/
1667
1668PyDoc_STRVAR(bytes_rstrip__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001669"rstrip($self, bytes=None, /)\n"
1670"--\n"
1671"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001672"Strip trailing bytes contained in the argument.\n"
1673"\n"
1674"If the argument is omitted or None, strip trailing ASCII whitespace.");
1675
1676#define BYTES_RSTRIP_METHODDEF \
1677 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, bytes_rstrip__doc__},
1678
1679static PyObject *
1680bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes);
1681
1682static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001683bytes_rstrip(PyBytesObject *self, PyObject *args)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001684{
1685 PyObject *return_value = NULL;
1686 PyObject *bytes = Py_None;
1687
1688 if (!PyArg_UnpackTuple(args, "rstrip",
1689 0, 1,
1690 &bytes))
1691 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02001692 return_value = bytes_rstrip_impl(self, bytes);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001693
1694exit:
1695 return return_value;
1696}
1697
1698static PyObject *
1699bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Martin v. Löwis0efea322014-07-27 17:29:17 +02001700/*[clinic end generated code: output=e98730bd133e6593 input=8f93c9cd361f0140]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001701{
1702 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001703}
Neal Norwitz6968b052007-02-27 19:02:19 +00001704
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001705
1706PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001707"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001708\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001709Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001710string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001711as in slice notation.");
1712
1713static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001714bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001715{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001716 PyObject *sub_obj;
1717 const char *str = PyBytes_AS_STRING(self), *sub;
1718 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001719 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001720 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001721
Antoine Pitrouac65d962011-10-20 23:54:17 +02001722 Py_buffer vsub;
1723 PyObject *count_obj;
1724
1725 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
1726 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001727 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001728
Antoine Pitrouac65d962011-10-20 23:54:17 +02001729 if (sub_obj) {
1730 if (_getbuffer(sub_obj, &vsub) < 0)
1731 return NULL;
1732
1733 sub = vsub.buf;
1734 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001735 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001736 else {
1737 sub = &byte;
1738 sub_len = 1;
1739 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001740
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001741 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001742
Antoine Pitrouac65d962011-10-20 23:54:17 +02001743 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001744 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1745 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02001746
1747 if (sub_obj)
1748 PyBuffer_Release(&vsub);
1749
1750 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001751}
1752
1753
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001754/*[clinic input]
1755bytes.translate
1756
1757 self: self(type="PyBytesObject *")
1758 table: object
1759 Translation table, which must be a bytes object of length 256.
1760 [
1761 deletechars: object
1762 ]
1763 /
1764
1765Return a copy with each character mapped by the given translation table.
1766
1767All characters occurring in the optional argument deletechars are removed.
1768The remaining characters are mapped through the given translation table.
1769[clinic start generated code]*/
1770
1771PyDoc_STRVAR(bytes_translate__doc__,
1772"translate(table, [deletechars])\n"
1773"Return a copy with each character mapped by the given translation table.\n"
1774"\n"
1775" table\n"
1776" Translation table, which must be a bytes object of length 256.\n"
1777"\n"
1778"All characters occurring in the optional argument deletechars are removed.\n"
1779"The remaining characters are mapped through the given translation table.");
1780
1781#define BYTES_TRANSLATE_METHODDEF \
1782 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, bytes_translate__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001783
1784static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001785bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1, PyObject *deletechars);
1786
1787static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001788bytes_translate(PyBytesObject *self, PyObject *args)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001789{
1790 PyObject *return_value = NULL;
1791 PyObject *table;
1792 int group_right_1 = 0;
1793 PyObject *deletechars = NULL;
1794
1795 switch (PyTuple_GET_SIZE(args)) {
1796 case 1:
1797 if (!PyArg_ParseTuple(args, "O:translate", &table))
Martin v. Löwis0efea322014-07-27 17:29:17 +02001798 goto exit;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001799 break;
1800 case 2:
1801 if (!PyArg_ParseTuple(args, "OO:translate", &table, &deletechars))
Martin v. Löwis0efea322014-07-27 17:29:17 +02001802 goto exit;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001803 group_right_1 = 1;
1804 break;
1805 default:
1806 PyErr_SetString(PyExc_TypeError, "bytes.translate requires 1 to 2 arguments");
Martin v. Löwis0efea322014-07-27 17:29:17 +02001807 goto exit;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001808 }
Martin v. Löwis0efea322014-07-27 17:29:17 +02001809 return_value = bytes_translate_impl(self, table, group_right_1, deletechars);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001810
Martin v. Löwis0efea322014-07-27 17:29:17 +02001811exit:
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001812 return return_value;
1813}
1814
1815static PyObject *
1816bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1, PyObject *deletechars)
Martin v. Löwis0efea322014-07-27 17:29:17 +02001817/*[clinic end generated code: output=f0f29a57f41df5d8 input=a90fad893c3c88d7]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001818{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001819 char *input, *output;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001820 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001821 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001822 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001823 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001824 Py_ssize_t inlen, tablen, dellen = 0;
1825 PyObject *result;
1826 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001827
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001828 if (PyBytes_Check(table)) {
1829 table_chars = PyBytes_AS_STRING(table);
1830 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001831 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001832 else if (table == Py_None) {
1833 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001834 tablen = 256;
1835 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001836 else if (PyObject_AsCharBuffer(table, &table_chars, &tablen))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001837 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001838
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001839 if (tablen != 256) {
1840 PyErr_SetString(PyExc_ValueError,
1841 "translation table must be 256 characters long");
1842 return NULL;
1843 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001844
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001845 if (deletechars != NULL) {
1846 if (PyBytes_Check(deletechars)) {
1847 del_table_chars = PyBytes_AS_STRING(deletechars);
1848 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001849 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001850 else if (PyObject_AsCharBuffer(deletechars, &del_table_chars, &dellen))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001851 return NULL;
1852 }
1853 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001854 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001855 dellen = 0;
1856 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001857
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001858 inlen = PyBytes_GET_SIZE(input_obj);
1859 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1860 if (result == NULL)
1861 return NULL;
1862 output_start = output = PyBytes_AsString(result);
1863 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001864
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001865 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001866 /* If no deletions are required, use faster code */
1867 for (i = inlen; --i >= 0; ) {
1868 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001869 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001870 changed = 1;
1871 }
1872 if (changed || !PyBytes_CheckExact(input_obj))
1873 return result;
1874 Py_DECREF(result);
1875 Py_INCREF(input_obj);
1876 return input_obj;
1877 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001878
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001879 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001880 for (i = 0; i < 256; i++)
1881 trans_table[i] = Py_CHARMASK(i);
1882 } else {
1883 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001884 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001885 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001886
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001887 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001888 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001889
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001890 for (i = inlen; --i >= 0; ) {
1891 c = Py_CHARMASK(*input++);
1892 if (trans_table[c] != -1)
1893 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1894 continue;
1895 changed = 1;
1896 }
1897 if (!changed && PyBytes_CheckExact(input_obj)) {
1898 Py_DECREF(result);
1899 Py_INCREF(input_obj);
1900 return input_obj;
1901 }
1902 /* Fix the size of the resulting string */
1903 if (inlen > 0)
1904 _PyBytes_Resize(&result, output - output_start);
1905 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001906}
1907
1908
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001909/*[clinic input]
1910
1911@staticmethod
1912bytes.maketrans
1913
1914 frm: object
1915 to: object
1916 /
1917
1918Return a translation table useable for the bytes or bytearray translate method.
1919
1920The returned table will be one where each byte in frm is mapped to the byte at
1921the same position in to.
1922
1923The bytes objects frm and to must be of the same length.
1924[clinic start generated code]*/
1925
1926PyDoc_STRVAR(bytes_maketrans__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001927"maketrans(frm, to, /)\n"
1928"--\n"
1929"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001930"Return a translation table useable for the bytes or bytearray translate method.\n"
1931"\n"
1932"The returned table will be one where each byte in frm is mapped to the byte at\n"
1933"the same position in to.\n"
1934"\n"
1935"The bytes objects frm and to must be of the same length.");
1936
1937#define BYTES_MAKETRANS_METHODDEF \
1938 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC, bytes_maketrans__doc__},
1939
Georg Brandlabc38772009-04-12 15:51:51 +00001940static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001941bytes_maketrans_impl(PyObject *frm, PyObject *to);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001942
1943static PyObject *
1944bytes_maketrans(void *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001945{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001946 PyObject *return_value = NULL;
1947 PyObject *frm;
1948 PyObject *to;
1949
1950 if (!PyArg_UnpackTuple(args, "maketrans",
1951 2, 2,
1952 &frm, &to))
1953 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02001954 return_value = bytes_maketrans_impl(frm, to);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001955
1956exit:
1957 return return_value;
1958}
1959
1960static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001961bytes_maketrans_impl(PyObject *frm, PyObject *to)
1962/*[clinic end generated code: output=89a3c3556975e466 input=d204f680f85da382]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001963{
1964 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00001965}
1966
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001967/* find and count characters and substrings */
1968
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001969#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001970 ((char *)memchr((const void *)(target), c, target_len))
1971
1972/* String ops must return a string. */
1973/* If the object is subclass of string, create a copy */
1974Py_LOCAL(PyBytesObject *)
1975return_self(PyBytesObject *self)
1976{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001977 if (PyBytes_CheckExact(self)) {
1978 Py_INCREF(self);
1979 return self;
1980 }
1981 return (PyBytesObject *)PyBytes_FromStringAndSize(
1982 PyBytes_AS_STRING(self),
1983 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001984}
1985
1986Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001987countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001988{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001989 Py_ssize_t count=0;
1990 const char *start=target;
1991 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001992
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001993 while ( (start=findchar(start, end-start, c)) != NULL ) {
1994 count++;
1995 if (count >= maxcount)
1996 break;
1997 start += 1;
1998 }
1999 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002000}
2001
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002002
2003/* Algorithms for different cases of string replacement */
2004
2005/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2006Py_LOCAL(PyBytesObject *)
2007replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002008 const char *to_s, Py_ssize_t to_len,
2009 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002010{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002011 char *self_s, *result_s;
2012 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002013 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002014 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002015
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002016 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002017
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002018 /* 1 at the end plus 1 after every character;
2019 count = min(maxcount, self_len + 1) */
2020 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002021 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002022 else
2023 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2024 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002025
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002026 /* Check for overflow */
2027 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002028 assert(count > 0);
2029 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002030 PyErr_SetString(PyExc_OverflowError,
2031 "replacement bytes are too long");
2032 return NULL;
2033 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002034 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002035
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002036 if (! (result = (PyBytesObject *)
2037 PyBytes_FromStringAndSize(NULL, result_len)) )
2038 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002039
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002040 self_s = PyBytes_AS_STRING(self);
2041 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002042
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002043 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002044
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002045 /* Lay the first one down (guaranteed this will occur) */
2046 Py_MEMCPY(result_s, to_s, to_len);
2047 result_s += to_len;
2048 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002049
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002050 for (i=0; i<count; i++) {
2051 *result_s++ = *self_s++;
2052 Py_MEMCPY(result_s, to_s, to_len);
2053 result_s += to_len;
2054 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002055
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002056 /* Copy the rest of the original string */
2057 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002058
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002059 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002060}
2061
2062/* Special case for deleting a single character */
2063/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2064Py_LOCAL(PyBytesObject *)
2065replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002066 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002067{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002068 char *self_s, *result_s;
2069 char *start, *next, *end;
2070 Py_ssize_t self_len, result_len;
2071 Py_ssize_t count;
2072 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002073
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002074 self_len = PyBytes_GET_SIZE(self);
2075 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002077 count = countchar(self_s, self_len, from_c, maxcount);
2078 if (count == 0) {
2079 return return_self(self);
2080 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002082 result_len = self_len - count; /* from_len == 1 */
2083 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002085 if ( (result = (PyBytesObject *)
2086 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2087 return NULL;
2088 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002089
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002090 start = self_s;
2091 end = self_s + self_len;
2092 while (count-- > 0) {
2093 next = findchar(start, end-start, from_c);
2094 if (next == NULL)
2095 break;
2096 Py_MEMCPY(result_s, start, next-start);
2097 result_s += (next-start);
2098 start = next+1;
2099 }
2100 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002101
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002102 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002103}
2104
2105/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2106
2107Py_LOCAL(PyBytesObject *)
2108replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002109 const char *from_s, Py_ssize_t from_len,
2110 Py_ssize_t maxcount) {
2111 char *self_s, *result_s;
2112 char *start, *next, *end;
2113 Py_ssize_t self_len, result_len;
2114 Py_ssize_t count, offset;
2115 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002116
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002117 self_len = PyBytes_GET_SIZE(self);
2118 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002119
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002120 count = stringlib_count(self_s, self_len,
2121 from_s, from_len,
2122 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002123
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002124 if (count == 0) {
2125 /* no matches */
2126 return return_self(self);
2127 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002128
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002129 result_len = self_len - (count * from_len);
2130 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002131
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002132 if ( (result = (PyBytesObject *)
2133 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2134 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002135
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002136 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002138 start = self_s;
2139 end = self_s + self_len;
2140 while (count-- > 0) {
2141 offset = stringlib_find(start, end-start,
2142 from_s, from_len,
2143 0);
2144 if (offset == -1)
2145 break;
2146 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002147
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002148 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002149
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002150 result_s += (next-start);
2151 start = next+from_len;
2152 }
2153 Py_MEMCPY(result_s, start, end-start);
2154 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002155}
2156
2157/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2158Py_LOCAL(PyBytesObject *)
2159replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002160 char from_c, char to_c,
2161 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002162{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002163 char *self_s, *result_s, *start, *end, *next;
2164 Py_ssize_t self_len;
2165 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002166
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002167 /* The result string will be the same size */
2168 self_s = PyBytes_AS_STRING(self);
2169 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002170
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002171 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002172
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002173 if (next == NULL) {
2174 /* No matches; return the original string */
2175 return return_self(self);
2176 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002177
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002178 /* Need to make a new string */
2179 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2180 if (result == NULL)
2181 return NULL;
2182 result_s = PyBytes_AS_STRING(result);
2183 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002184
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002185 /* change everything in-place, starting with this one */
2186 start = result_s + (next-self_s);
2187 *start = to_c;
2188 start++;
2189 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002191 while (--maxcount > 0) {
2192 next = findchar(start, end-start, from_c);
2193 if (next == NULL)
2194 break;
2195 *next = to_c;
2196 start = next+1;
2197 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002198
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002199 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002200}
2201
2202/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2203Py_LOCAL(PyBytesObject *)
2204replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002205 const char *from_s, Py_ssize_t from_len,
2206 const char *to_s, Py_ssize_t to_len,
2207 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002208{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002209 char *result_s, *start, *end;
2210 char *self_s;
2211 Py_ssize_t self_len, offset;
2212 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002213
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002214 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002215
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002216 self_s = PyBytes_AS_STRING(self);
2217 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002218
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002219 offset = stringlib_find(self_s, self_len,
2220 from_s, from_len,
2221 0);
2222 if (offset == -1) {
2223 /* No matches; return the original string */
2224 return return_self(self);
2225 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002226
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002227 /* Need to make a new string */
2228 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2229 if (result == NULL)
2230 return NULL;
2231 result_s = PyBytes_AS_STRING(result);
2232 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002233
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002234 /* change everything in-place, starting with this one */
2235 start = result_s + offset;
2236 Py_MEMCPY(start, to_s, from_len);
2237 start += from_len;
2238 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002239
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002240 while ( --maxcount > 0) {
2241 offset = stringlib_find(start, end-start,
2242 from_s, from_len,
2243 0);
2244 if (offset==-1)
2245 break;
2246 Py_MEMCPY(start+offset, to_s, from_len);
2247 start += offset+from_len;
2248 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002249
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002250 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002251}
2252
2253/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2254Py_LOCAL(PyBytesObject *)
2255replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002256 char from_c,
2257 const char *to_s, Py_ssize_t to_len,
2258 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002259{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002260 char *self_s, *result_s;
2261 char *start, *next, *end;
2262 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002263 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002264 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002265
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002266 self_s = PyBytes_AS_STRING(self);
2267 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002268
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002269 count = countchar(self_s, self_len, from_c, maxcount);
2270 if (count == 0) {
2271 /* no matches, return unchanged */
2272 return return_self(self);
2273 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002274
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002275 /* use the difference between current and new, hence the "-1" */
2276 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002277 assert(count > 0);
2278 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002279 PyErr_SetString(PyExc_OverflowError,
2280 "replacement bytes are too long");
2281 return NULL;
2282 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002283 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002284
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002285 if ( (result = (PyBytesObject *)
2286 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2287 return NULL;
2288 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002289
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002290 start = self_s;
2291 end = self_s + self_len;
2292 while (count-- > 0) {
2293 next = findchar(start, end-start, from_c);
2294 if (next == NULL)
2295 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002296
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002297 if (next == start) {
2298 /* replace with the 'to' */
2299 Py_MEMCPY(result_s, to_s, to_len);
2300 result_s += to_len;
2301 start += 1;
2302 } else {
2303 /* copy the unchanged old then the 'to' */
2304 Py_MEMCPY(result_s, start, next-start);
2305 result_s += (next-start);
2306 Py_MEMCPY(result_s, to_s, to_len);
2307 result_s += to_len;
2308 start = next+1;
2309 }
2310 }
2311 /* Copy the remainder of the remaining string */
2312 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002313
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002314 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002315}
2316
2317/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2318Py_LOCAL(PyBytesObject *)
2319replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002320 const char *from_s, Py_ssize_t from_len,
2321 const char *to_s, Py_ssize_t to_len,
2322 Py_ssize_t maxcount) {
2323 char *self_s, *result_s;
2324 char *start, *next, *end;
2325 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002326 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002327 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002328
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002329 self_s = PyBytes_AS_STRING(self);
2330 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002331
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002332 count = stringlib_count(self_s, self_len,
2333 from_s, from_len,
2334 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002335
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002336 if (count == 0) {
2337 /* no matches, return unchanged */
2338 return return_self(self);
2339 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002340
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002341 /* Check for overflow */
2342 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002343 assert(count > 0);
2344 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002345 PyErr_SetString(PyExc_OverflowError,
2346 "replacement bytes are too long");
2347 return NULL;
2348 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002349 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002350
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002351 if ( (result = (PyBytesObject *)
2352 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2353 return NULL;
2354 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002355
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002356 start = self_s;
2357 end = self_s + self_len;
2358 while (count-- > 0) {
2359 offset = stringlib_find(start, end-start,
2360 from_s, from_len,
2361 0);
2362 if (offset == -1)
2363 break;
2364 next = start+offset;
2365 if (next == start) {
2366 /* replace with the 'to' */
2367 Py_MEMCPY(result_s, to_s, to_len);
2368 result_s += to_len;
2369 start += from_len;
2370 } else {
2371 /* copy the unchanged old then the 'to' */
2372 Py_MEMCPY(result_s, start, next-start);
2373 result_s += (next-start);
2374 Py_MEMCPY(result_s, to_s, to_len);
2375 result_s += to_len;
2376 start = next+from_len;
2377 }
2378 }
2379 /* Copy the remainder of the remaining string */
2380 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002381
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002382 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002383}
2384
2385
2386Py_LOCAL(PyBytesObject *)
2387replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002388 const char *from_s, Py_ssize_t from_len,
2389 const char *to_s, Py_ssize_t to_len,
2390 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002391{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002392 if (maxcount < 0) {
2393 maxcount = PY_SSIZE_T_MAX;
2394 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2395 /* nothing to do; return the original string */
2396 return return_self(self);
2397 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002398
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002399 if (maxcount == 0 ||
2400 (from_len == 0 && to_len == 0)) {
2401 /* nothing to do; return the original string */
2402 return return_self(self);
2403 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002404
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002405 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002406
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002407 if (from_len == 0) {
2408 /* insert the 'to' string everywhere. */
2409 /* >>> "Python".replace("", ".") */
2410 /* '.P.y.t.h.o.n.' */
2411 return replace_interleave(self, to_s, to_len, maxcount);
2412 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002413
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002414 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2415 /* point for an empty self string to generate a non-empty string */
2416 /* Special case so the remaining code always gets a non-empty string */
2417 if (PyBytes_GET_SIZE(self) == 0) {
2418 return return_self(self);
2419 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002420
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002421 if (to_len == 0) {
2422 /* delete all occurrences of 'from' string */
2423 if (from_len == 1) {
2424 return replace_delete_single_character(
2425 self, from_s[0], maxcount);
2426 } else {
2427 return replace_delete_substring(self, from_s,
2428 from_len, maxcount);
2429 }
2430 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002431
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002432 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002433
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002434 if (from_len == to_len) {
2435 if (from_len == 1) {
2436 return replace_single_character_in_place(
2437 self,
2438 from_s[0],
2439 to_s[0],
2440 maxcount);
2441 } else {
2442 return replace_substring_in_place(
2443 self, from_s, from_len, to_s, to_len,
2444 maxcount);
2445 }
2446 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002447
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002448 /* Otherwise use the more generic algorithms */
2449 if (from_len == 1) {
2450 return replace_single_character(self, from_s[0],
2451 to_s, to_len, maxcount);
2452 } else {
2453 /* len('from')>=2, len('to')>=1 */
2454 return replace_substring(self, from_s, from_len, to_s, to_len,
2455 maxcount);
2456 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002457}
2458
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002459
2460/*[clinic input]
2461bytes.replace
2462
2463 old: object
2464 new: object
2465 count: Py_ssize_t = -1
2466 Maximum number of occurrences to replace.
2467 -1 (the default value) means replace all occurrences.
2468 /
2469
2470Return a copy with all occurrences of substring old replaced by new.
2471
2472If the optional argument count is given, only the first count occurrences are
2473replaced.
2474[clinic start generated code]*/
2475
2476PyDoc_STRVAR(bytes_replace__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002477"replace($self, old, new, count=-1, /)\n"
2478"--\n"
2479"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002480"Return a copy with all occurrences of substring old replaced by new.\n"
2481"\n"
2482" count\n"
2483" Maximum number of occurrences to replace.\n"
2484" -1 (the default value) means replace all occurrences.\n"
2485"\n"
2486"If the optional argument count is given, only the first count occurrences are\n"
2487"replaced.");
2488
2489#define BYTES_REPLACE_METHODDEF \
2490 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, bytes_replace__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002491
2492static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002493bytes_replace_impl(PyBytesObject*self, PyObject *old, PyObject *new, Py_ssize_t count);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002494
2495static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002496bytes_replace(PyBytesObject*self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002497{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002498 PyObject *return_value = NULL;
2499 PyObject *old;
2500 PyObject *new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002501 Py_ssize_t count = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002502
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002503 if (!PyArg_ParseTuple(args,
2504 "OO|n:replace",
2505 &old, &new, &count))
2506 goto exit;
2507 return_value = bytes_replace_impl(self, old, new, count);
2508
2509exit:
2510 return return_value;
2511}
2512
2513static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002514bytes_replace_impl(PyBytesObject*self, PyObject *old, PyObject *new, Py_ssize_t count)
2515/*[clinic end generated code: output=14ce72f4f9cb91cf input=d3ac254ea50f4ac1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002516{
2517 const char *old_s, *new_s;
2518 Py_ssize_t old_len, new_len;
2519
2520 if (PyBytes_Check(old)) {
2521 old_s = PyBytes_AS_STRING(old);
2522 old_len = PyBytes_GET_SIZE(old);
2523 }
2524 else if (PyObject_AsCharBuffer(old, &old_s, &old_len))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002525 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002526
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002527 if (PyBytes_Check(new)) {
2528 new_s = PyBytes_AS_STRING(new);
2529 new_len = PyBytes_GET_SIZE(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002530 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002531 else if (PyObject_AsCharBuffer(new, &new_s, &new_len))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002532 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002533
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002534 return (PyObject *)replace((PyBytesObject *) self,
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002535 old_s, old_len,
2536 new_s, new_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002537}
2538
2539/** End DALKE **/
2540
2541/* Matches the end (direction >= 0) or start (direction < 0) of self
2542 * against substr, using the start and end arguments. Returns
2543 * -1 on error, 0 if not found and 1 if found.
2544 */
2545Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002546_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002547 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002548{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002549 Py_ssize_t len = PyBytes_GET_SIZE(self);
2550 Py_ssize_t slen;
2551 const char* sub;
2552 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002553
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002554 if (PyBytes_Check(substr)) {
2555 sub = PyBytes_AS_STRING(substr);
2556 slen = PyBytes_GET_SIZE(substr);
2557 }
2558 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2559 return -1;
2560 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002561
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002562 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002563
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002564 if (direction < 0) {
2565 /* startswith */
2566 if (start+slen > len)
2567 return 0;
2568 } else {
2569 /* endswith */
2570 if (end-start < slen || start > len)
2571 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002572
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002573 if (end-slen > start)
2574 start = end - slen;
2575 }
2576 if (end-start >= slen)
2577 return ! memcmp(str+start, sub, slen);
2578 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002579}
2580
2581
2582PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002583"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002584\n\
2585Return True if B starts with the specified prefix, False otherwise.\n\
2586With optional start, test B beginning at that position.\n\
2587With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002588prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002589
2590static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002591bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002592{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002593 Py_ssize_t start = 0;
2594 Py_ssize_t end = PY_SSIZE_T_MAX;
2595 PyObject *subobj;
2596 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002597
Jesus Ceaac451502011-04-20 17:09:23 +02002598 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002599 return NULL;
2600 if (PyTuple_Check(subobj)) {
2601 Py_ssize_t i;
2602 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2603 result = _bytes_tailmatch(self,
2604 PyTuple_GET_ITEM(subobj, i),
2605 start, end, -1);
2606 if (result == -1)
2607 return NULL;
2608 else if (result) {
2609 Py_RETURN_TRUE;
2610 }
2611 }
2612 Py_RETURN_FALSE;
2613 }
2614 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002615 if (result == -1) {
2616 if (PyErr_ExceptionMatches(PyExc_TypeError))
2617 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2618 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002619 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002620 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002621 else
2622 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002623}
2624
2625
2626PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002627"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002628\n\
2629Return True if B ends with the specified suffix, False otherwise.\n\
2630With optional start, test B beginning at that position.\n\
2631With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002632suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002633
2634static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002635bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002636{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002637 Py_ssize_t start = 0;
2638 Py_ssize_t end = PY_SSIZE_T_MAX;
2639 PyObject *subobj;
2640 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002641
Jesus Ceaac451502011-04-20 17:09:23 +02002642 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002643 return NULL;
2644 if (PyTuple_Check(subobj)) {
2645 Py_ssize_t i;
2646 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2647 result = _bytes_tailmatch(self,
2648 PyTuple_GET_ITEM(subobj, i),
2649 start, end, +1);
2650 if (result == -1)
2651 return NULL;
2652 else if (result) {
2653 Py_RETURN_TRUE;
2654 }
2655 }
2656 Py_RETURN_FALSE;
2657 }
2658 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002659 if (result == -1) {
2660 if (PyErr_ExceptionMatches(PyExc_TypeError))
2661 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2662 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002663 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002664 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002665 else
2666 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002667}
2668
2669
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002670/*[clinic input]
2671bytes.decode
2672
2673 encoding: str(c_default="NULL") = 'utf-8'
2674 The encoding with which to decode the bytes.
2675 errors: str(c_default="NULL") = 'strict'
2676 The error handling scheme to use for the handling of decoding errors.
2677 The default is 'strict' meaning that decoding errors raise a
2678 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2679 as well as any other name registered with codecs.register_error that
2680 can handle UnicodeDecodeErrors.
2681
2682Decode the bytes using the codec registered for encoding.
2683[clinic start generated code]*/
2684
2685PyDoc_STRVAR(bytes_decode__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002686"decode($self, /, encoding=\'utf-8\', errors=\'strict\')\n"
2687"--\n"
2688"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002689"Decode the bytes using the codec registered for encoding.\n"
2690"\n"
2691" encoding\n"
2692" The encoding with which to decode the bytes.\n"
2693" errors\n"
2694" The error handling scheme to use for the handling of decoding errors.\n"
2695" The default is \'strict\' meaning that decoding errors raise a\n"
2696" UnicodeDecodeError. Other possible values are \'ignore\' and \'replace\'\n"
2697" as well as any other name registered with codecs.register_error that\n"
2698" can handle UnicodeDecodeErrors.");
2699
2700#define BYTES_DECODE_METHODDEF \
2701 {"decode", (PyCFunction)bytes_decode, METH_VARARGS|METH_KEYWORDS, bytes_decode__doc__},
2702
2703static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002704bytes_decode_impl(PyBytesObject*self, const char *encoding, const char *errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002705
2706static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002707bytes_decode(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002708{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002709 PyObject *return_value = NULL;
2710 static char *_keywords[] = {"encoding", "errors", NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002711 const char *encoding = NULL;
2712 const char *errors = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +00002713
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002714 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
2715 "|ss:decode", _keywords,
2716 &encoding, &errors))
2717 goto exit;
2718 return_value = bytes_decode_impl(self, encoding, errors);
2719
2720exit:
2721 return return_value;
2722}
2723
2724static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002725bytes_decode_impl(PyBytesObject*self, const char *encoding, const char *errors)
2726/*[clinic end generated code: output=61a80290bbfce696 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002727{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002728 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002729}
2730
Guido van Rossum20188312006-05-05 15:15:40 +00002731
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002732/*[clinic input]
2733bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002734
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002735 keepends: int(py_default="False") = 0
2736
2737Return a list of the lines in the bytes, breaking at line boundaries.
2738
2739Line breaks are not included in the resulting list unless keepends is given and
2740true.
2741[clinic start generated code]*/
2742
2743PyDoc_STRVAR(bytes_splitlines__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002744"splitlines($self, /, keepends=False)\n"
2745"--\n"
2746"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002747"Return a list of the lines in the bytes, breaking at line boundaries.\n"
2748"\n"
2749"Line breaks are not included in the resulting list unless keepends is given and\n"
2750"true.");
2751
2752#define BYTES_SPLITLINES_METHODDEF \
2753 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS|METH_KEYWORDS, bytes_splitlines__doc__},
2754
2755static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002756bytes_splitlines_impl(PyBytesObject*self, int keepends);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002757
2758static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002759bytes_splitlines(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002760{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002761 PyObject *return_value = NULL;
2762 static char *_keywords[] = {"keepends", NULL};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002763 int keepends = 0;
2764
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002765 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
2766 "|i:splitlines", _keywords,
2767 &keepends))
2768 goto exit;
2769 return_value = bytes_splitlines_impl(self, keepends);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002770
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002771exit:
2772 return return_value;
2773}
2774
2775static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002776bytes_splitlines_impl(PyBytesObject*self, int keepends)
2777/*[clinic end generated code: output=79da057d05d126de input=ddb93e3351080c8c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002778{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002779 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002780 (PyObject*) self, PyBytes_AS_STRING(self),
2781 PyBytes_GET_SIZE(self), keepends
2782 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002783}
2784
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002785static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002786hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002787{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002788 if (c >= 128)
2789 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002790 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002791 return c - '0';
2792 else {
David Malcolm96960882010-11-05 17:23:41 +00002793 if (Py_ISUPPER(c))
2794 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002795 if (c >= 'a' && c <= 'f')
2796 return c - 'a' + 10;
2797 }
2798 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002799}
2800
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002801/*[clinic input]
2802@classmethod
2803bytes.fromhex
2804
2805 string: unicode
2806 /
2807
2808Create a bytes object from a string of hexadecimal numbers.
2809
2810Spaces between two numbers are accepted.
2811Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2812[clinic start generated code]*/
2813
2814PyDoc_STRVAR(bytes_fromhex__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002815"fromhex($type, string, /)\n"
2816"--\n"
2817"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002818"Create a bytes object from a string of hexadecimal numbers.\n"
2819"\n"
2820"Spaces between two numbers are accepted.\n"
Martin v. Löwis0efea322014-07-27 17:29:17 +02002821"Example: bytes.fromhex(\'B9 01EF\') -> b\'\\\\xb9\\\\x01\\\\xef\'.");
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002822
2823#define BYTES_FROMHEX_METHODDEF \
2824 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS, bytes_fromhex__doc__},
2825
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002826static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002827bytes_fromhex_impl(PyTypeObject *type, PyObject *string);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002828
2829static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002830bytes_fromhex(PyTypeObject *type, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002831{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002832 PyObject *return_value = NULL;
2833 PyObject *string;
2834
2835 if (!PyArg_ParseTuple(args,
2836 "U:fromhex",
2837 &string))
2838 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02002839 return_value = bytes_fromhex_impl(type, string);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002840
2841exit:
2842 return return_value;
2843}
2844
2845static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002846bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2847/*[clinic end generated code: output=09e6cbef56cbbb65 input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002848{
2849 PyObject *newstring;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002850 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002851 Py_ssize_t hexlen, byteslen, i, j;
2852 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002853 void *data;
2854 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002855
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002856 assert(PyUnicode_Check(string));
2857 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002858 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002859 kind = PyUnicode_KIND(string);
2860 data = PyUnicode_DATA(string);
2861 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002862
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002863 byteslen = hexlen/2; /* This overestimates if there are spaces */
2864 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2865 if (!newstring)
2866 return NULL;
2867 buf = PyBytes_AS_STRING(newstring);
2868 for (i = j = 0; i < hexlen; i += 2) {
2869 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002870 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002871 i++;
2872 if (i >= hexlen)
2873 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002874 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
2875 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002876 if (top == -1 || bot == -1) {
2877 PyErr_Format(PyExc_ValueError,
2878 "non-hexadecimal number found in "
2879 "fromhex() arg at position %zd", i);
2880 goto error;
2881 }
2882 buf[j++] = (top << 4) + bot;
2883 }
2884 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2885 goto error;
2886 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002887
2888 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002889 Py_XDECREF(newstring);
2890 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002891}
2892
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002893/*[clinic input]
2894bytes.__sizeof__ as bytes_sizeof
2895
2896 self: self(type="PyBytesObject *")
2897
2898Returns the size of the bytes object in memory, in bytes.
2899[clinic start generated code]*/
2900
2901PyDoc_STRVAR(bytes_sizeof__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002902"__sizeof__($self, /)\n"
2903"--\n"
2904"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002905"Returns the size of the bytes object in memory, in bytes.");
2906
2907#define BYTES_SIZEOF_METHODDEF \
2908 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS, bytes_sizeof__doc__},
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002909
2910static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002911bytes_sizeof_impl(PyBytesObject *self);
2912
2913static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002914bytes_sizeof(PyBytesObject *self, PyObject *Py_UNUSED(ignored))
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002915{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002916 return bytes_sizeof_impl(self);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002917}
2918
2919static PyObject *
2920bytes_sizeof_impl(PyBytesObject *self)
Martin v. Löwis0efea322014-07-27 17:29:17 +02002921/*[clinic end generated code: output=44933279343f24ae input=bee4c64bb42078ed]*/
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002922{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002923 Py_ssize_t res;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002924 res = PyBytesObject_SIZE + Py_SIZE(self) * Py_TYPE(self)->tp_itemsize;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002925 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002926}
2927
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002928
2929static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002930bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002931{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002932 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002933}
2934
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002935
2936static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002937bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002938 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2939 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2940 _Py_capitalize__doc__},
2941 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2942 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002943 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002944 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2945 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02002946 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002947 expandtabs__doc__},
2948 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002949 BYTES_FROMHEX_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002950 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2951 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2952 _Py_isalnum__doc__},
2953 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2954 _Py_isalpha__doc__},
2955 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2956 _Py_isdigit__doc__},
2957 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2958 _Py_islower__doc__},
2959 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2960 _Py_isspace__doc__},
2961 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2962 _Py_istitle__doc__},
2963 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2964 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002965 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002966 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2967 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002968 BYTES_LSTRIP_METHODDEF
2969 BYTES_MAKETRANS_METHODDEF
2970 BYTES_PARTITION_METHODDEF
2971 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002972 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2973 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2974 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002975 BYTES_RPARTITION_METHODDEF
2976 BYTES_RSPLIT_METHODDEF
2977 BYTES_RSTRIP_METHODDEF
2978 BYTES_SPLIT_METHODDEF
2979 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002980 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2981 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002982 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002983 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2984 _Py_swapcase__doc__},
2985 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002986 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002987 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2988 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002989 BYTES_SIZEOF_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002990 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002991};
2992
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002993static PyObject *
2994str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2995
2996static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002997bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002998{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002999 PyObject *x = NULL;
3000 const char *encoding = NULL;
3001 const char *errors = NULL;
3002 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003003 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003004 Py_ssize_t size;
3005 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003006 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003007
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003008 if (type != &PyBytes_Type)
3009 return str_subtype_new(type, args, kwds);
3010 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3011 &encoding, &errors))
3012 return NULL;
3013 if (x == NULL) {
3014 if (encoding != NULL || errors != NULL) {
3015 PyErr_SetString(PyExc_TypeError,
3016 "encoding or errors without sequence "
3017 "argument");
3018 return NULL;
3019 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003020 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003021 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003022
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003023 if (PyUnicode_Check(x)) {
3024 /* Encode via the codec registry */
3025 if (encoding == NULL) {
3026 PyErr_SetString(PyExc_TypeError,
3027 "string argument without an encoding");
3028 return NULL;
3029 }
3030 new = PyUnicode_AsEncodedString(x, encoding, errors);
3031 if (new == NULL)
3032 return NULL;
3033 assert(PyBytes_Check(new));
3034 return new;
3035 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003036
3037 /* We'd like to call PyObject_Bytes here, but we need to check for an
3038 integer argument before deferring to PyBytes_FromObject, something
3039 PyObject_Bytes doesn't do. */
3040 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3041 if (func != NULL) {
3042 new = PyObject_CallFunctionObjArgs(func, NULL);
3043 Py_DECREF(func);
3044 if (new == NULL)
3045 return NULL;
3046 if (!PyBytes_Check(new)) {
3047 PyErr_Format(PyExc_TypeError,
3048 "__bytes__ returned non-bytes (type %.200s)",
3049 Py_TYPE(new)->tp_name);
3050 Py_DECREF(new);
3051 return NULL;
3052 }
3053 return new;
3054 }
3055 else if (PyErr_Occurred())
3056 return NULL;
3057
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003058 /* Is it an integer? */
3059 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3060 if (size == -1 && PyErr_Occurred()) {
3061 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3062 return NULL;
3063 PyErr_Clear();
3064 }
3065 else if (size < 0) {
3066 PyErr_SetString(PyExc_ValueError, "negative count");
3067 return NULL;
3068 }
3069 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003070 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003071 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003072 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003073 return new;
3074 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003075
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003076 /* If it's not unicode, there can't be encoding or errors */
3077 if (encoding != NULL || errors != NULL) {
3078 PyErr_SetString(PyExc_TypeError,
3079 "encoding or errors without a string argument");
3080 return NULL;
3081 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003082
3083 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003084}
3085
3086PyObject *
3087PyBytes_FromObject(PyObject *x)
3088{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003089 PyObject *new, *it;
3090 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003092 if (x == NULL) {
3093 PyErr_BadInternalCall();
3094 return NULL;
3095 }
Larry Hastingsca28e992012-05-24 22:58:30 -07003096
3097 if (PyBytes_CheckExact(x)) {
3098 Py_INCREF(x);
3099 return x;
3100 }
3101
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003102 /* Use the modern buffer interface */
3103 if (PyObject_CheckBuffer(x)) {
3104 Py_buffer view;
3105 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3106 return NULL;
3107 new = PyBytes_FromStringAndSize(NULL, view.len);
3108 if (!new)
3109 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003110 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3111 &view, view.len, 'C') < 0)
3112 goto fail;
3113 PyBuffer_Release(&view);
3114 return new;
3115 fail:
3116 Py_XDECREF(new);
3117 PyBuffer_Release(&view);
3118 return NULL;
3119 }
3120 if (PyUnicode_Check(x)) {
3121 PyErr_SetString(PyExc_TypeError,
3122 "cannot convert unicode object to bytes");
3123 return NULL;
3124 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003125
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003126 if (PyList_CheckExact(x)) {
3127 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3128 if (new == NULL)
3129 return NULL;
3130 for (i = 0; i < Py_SIZE(x); i++) {
3131 Py_ssize_t value = PyNumber_AsSsize_t(
3132 PyList_GET_ITEM(x, i), PyExc_ValueError);
3133 if (value == -1 && PyErr_Occurred()) {
3134 Py_DECREF(new);
3135 return NULL;
3136 }
3137 if (value < 0 || value >= 256) {
3138 PyErr_SetString(PyExc_ValueError,
3139 "bytes must be in range(0, 256)");
3140 Py_DECREF(new);
3141 return NULL;
3142 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003143 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003144 }
3145 return new;
3146 }
3147 if (PyTuple_CheckExact(x)) {
3148 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3149 if (new == NULL)
3150 return NULL;
3151 for (i = 0; i < Py_SIZE(x); i++) {
3152 Py_ssize_t value = PyNumber_AsSsize_t(
3153 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
3154 if (value == -1 && PyErr_Occurred()) {
3155 Py_DECREF(new);
3156 return NULL;
3157 }
3158 if (value < 0 || value >= 256) {
3159 PyErr_SetString(PyExc_ValueError,
3160 "bytes must be in range(0, 256)");
3161 Py_DECREF(new);
3162 return NULL;
3163 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003164 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003165 }
3166 return new;
3167 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00003168
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003169 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003170 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003171 if (size == -1 && PyErr_Occurred())
3172 return NULL;
3173 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
3174 returning a shared empty bytes string. This required because we
3175 want to call _PyBytes_Resize() the returned object, which we can
3176 only do on bytes objects with refcount == 1. */
3177 size += 1;
3178 new = PyBytes_FromStringAndSize(NULL, size);
3179 if (new == NULL)
3180 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003181
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003182 /* Get the iterator */
3183 it = PyObject_GetIter(x);
3184 if (it == NULL)
3185 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003187 /* Run the iterator to exhaustion */
3188 for (i = 0; ; i++) {
3189 PyObject *item;
3190 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003191
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003192 /* Get the next item */
3193 item = PyIter_Next(it);
3194 if (item == NULL) {
3195 if (PyErr_Occurred())
3196 goto error;
3197 break;
3198 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003199
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003200 /* Interpret it as an int (__index__) */
3201 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3202 Py_DECREF(item);
3203 if (value == -1 && PyErr_Occurred())
3204 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003205
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003206 /* Range check */
3207 if (value < 0 || value >= 256) {
3208 PyErr_SetString(PyExc_ValueError,
3209 "bytes must be in range(0, 256)");
3210 goto error;
3211 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003212
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003213 /* Append the byte */
3214 if (i >= size) {
3215 size = 2 * size + 1;
3216 if (_PyBytes_Resize(&new, size) < 0)
3217 goto error;
3218 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003219 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003220 }
3221 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003222
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003223 /* Clean up and return success */
3224 Py_DECREF(it);
3225 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003226
3227 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003228 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01003229 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003230 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003231}
3232
3233static PyObject *
3234str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3235{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003236 PyObject *tmp, *pnew;
3237 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003238
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003239 assert(PyType_IsSubtype(type, &PyBytes_Type));
3240 tmp = bytes_new(&PyBytes_Type, args, kwds);
3241 if (tmp == NULL)
3242 return NULL;
3243 assert(PyBytes_CheckExact(tmp));
3244 n = PyBytes_GET_SIZE(tmp);
3245 pnew = type->tp_alloc(type, n);
3246 if (pnew != NULL) {
3247 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3248 PyBytes_AS_STRING(tmp), n+1);
3249 ((PyBytesObject *)pnew)->ob_shash =
3250 ((PyBytesObject *)tmp)->ob_shash;
3251 }
3252 Py_DECREF(tmp);
3253 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003254}
3255
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003256PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003257"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003258bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003259bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003260bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3261bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003262\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003263Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003264 - an iterable yielding integers in range(256)\n\
3265 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003266 - any object implementing the buffer API.\n\
3267 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003268
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003269static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003270
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003271PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003272 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3273 "bytes",
3274 PyBytesObject_SIZE,
3275 sizeof(char),
3276 bytes_dealloc, /* tp_dealloc */
3277 0, /* tp_print */
3278 0, /* tp_getattr */
3279 0, /* tp_setattr */
3280 0, /* tp_reserved */
3281 (reprfunc)bytes_repr, /* tp_repr */
3282 0, /* tp_as_number */
3283 &bytes_as_sequence, /* tp_as_sequence */
3284 &bytes_as_mapping, /* tp_as_mapping */
3285 (hashfunc)bytes_hash, /* tp_hash */
3286 0, /* tp_call */
3287 bytes_str, /* tp_str */
3288 PyObject_GenericGetAttr, /* tp_getattro */
3289 0, /* tp_setattro */
3290 &bytes_as_buffer, /* tp_as_buffer */
3291 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3292 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3293 bytes_doc, /* tp_doc */
3294 0, /* tp_traverse */
3295 0, /* tp_clear */
3296 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3297 0, /* tp_weaklistoffset */
3298 bytes_iter, /* tp_iter */
3299 0, /* tp_iternext */
3300 bytes_methods, /* tp_methods */
3301 0, /* tp_members */
3302 0, /* tp_getset */
3303 &PyBaseObject_Type, /* tp_base */
3304 0, /* tp_dict */
3305 0, /* tp_descr_get */
3306 0, /* tp_descr_set */
3307 0, /* tp_dictoffset */
3308 0, /* tp_init */
3309 0, /* tp_alloc */
3310 bytes_new, /* tp_new */
3311 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003312};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003313
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003314void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003315PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003316{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003317 assert(pv != NULL);
3318 if (*pv == NULL)
3319 return;
3320 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003321 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003322 return;
3323 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003324
3325 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3326 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003327 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003328 Py_buffer wb;
3329
3330 wb.len = -1;
3331 if (_getbuffer(w, &wb) < 0) {
3332 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3333 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3334 Py_CLEAR(*pv);
3335 return;
3336 }
3337
3338 oldsize = PyBytes_GET_SIZE(*pv);
3339 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3340 PyErr_NoMemory();
3341 goto error;
3342 }
3343 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3344 goto error;
3345
3346 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3347 PyBuffer_Release(&wb);
3348 return;
3349
3350 error:
3351 PyBuffer_Release(&wb);
3352 Py_CLEAR(*pv);
3353 return;
3354 }
3355
3356 else {
3357 /* Multiple references, need to create new object */
3358 PyObject *v;
3359 v = bytes_concat(*pv, w);
3360 Py_DECREF(*pv);
3361 *pv = v;
3362 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003363}
3364
3365void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003366PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003367{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003368 PyBytes_Concat(pv, w);
3369 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003370}
3371
3372
3373/* The following function breaks the notion that strings are immutable:
3374 it changes the size of a string. We get away with this only if there
3375 is only one module referencing the object. You can also think of it
3376 as creating a new string object and destroying the old one, only
3377 more efficiently. In any case, don't use this if the string may
3378 already be known to some other part of the code...
3379 Note that if there's not enough memory to resize the string, the original
3380 string object at *pv is deallocated, *pv is set to NULL, an "out of
3381 memory" exception is set, and -1 is returned. Else (on success) 0 is
3382 returned, and the value in *pv may or may not be the same as on input.
3383 As always, an extra byte is allocated for a trailing \0 byte (newsize
3384 does *not* include that), and a trailing \0 byte is stored.
3385*/
3386
3387int
3388_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3389{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003390 PyObject *v;
3391 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003392 v = *pv;
3393 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3394 *pv = 0;
3395 Py_DECREF(v);
3396 PyErr_BadInternalCall();
3397 return -1;
3398 }
3399 /* XXX UNREF/NEWREF interface should be more symmetrical */
3400 _Py_DEC_REFTOTAL;
3401 _Py_ForgetReference(v);
3402 *pv = (PyObject *)
3403 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
3404 if (*pv == NULL) {
3405 PyObject_Del(v);
3406 PyErr_NoMemory();
3407 return -1;
3408 }
3409 _Py_NewReference(*pv);
3410 sv = (PyBytesObject *) *pv;
3411 Py_SIZE(sv) = newsize;
3412 sv->ob_sval[newsize] = '\0';
3413 sv->ob_shash = -1; /* invalidate cached hash value */
3414 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003415}
3416
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003417void
3418PyBytes_Fini(void)
3419{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003420 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003421 for (i = 0; i < UCHAR_MAX + 1; i++)
3422 Py_CLEAR(characters[i]);
3423 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003424}
3425
Benjamin Peterson4116f362008-05-27 00:36:20 +00003426/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003427
3428typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003429 PyObject_HEAD
3430 Py_ssize_t it_index;
3431 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003432} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003433
3434static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003435striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003436{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003437 _PyObject_GC_UNTRACK(it);
3438 Py_XDECREF(it->it_seq);
3439 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003440}
3441
3442static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003443striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003444{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003445 Py_VISIT(it->it_seq);
3446 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003447}
3448
3449static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003450striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003451{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003452 PyBytesObject *seq;
3453 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003454
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003455 assert(it != NULL);
3456 seq = it->it_seq;
3457 if (seq == NULL)
3458 return NULL;
3459 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003460
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003461 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3462 item = PyLong_FromLong(
3463 (unsigned char)seq->ob_sval[it->it_index]);
3464 if (item != NULL)
3465 ++it->it_index;
3466 return item;
3467 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003468
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003469 Py_DECREF(seq);
3470 it->it_seq = NULL;
3471 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003472}
3473
3474static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003475striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003476{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003477 Py_ssize_t len = 0;
3478 if (it->it_seq)
3479 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3480 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003481}
3482
3483PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003484 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003485
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003486static PyObject *
3487striter_reduce(striterobject *it)
3488{
3489 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003490 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003491 it->it_seq, it->it_index);
3492 } else {
3493 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3494 if (u == NULL)
3495 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02003496 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003497 }
3498}
3499
3500PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3501
3502static PyObject *
3503striter_setstate(striterobject *it, PyObject *state)
3504{
3505 Py_ssize_t index = PyLong_AsSsize_t(state);
3506 if (index == -1 && PyErr_Occurred())
3507 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003508 if (it->it_seq != NULL) {
3509 if (index < 0)
3510 index = 0;
3511 else if (index > PyBytes_GET_SIZE(it->it_seq))
3512 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3513 it->it_index = index;
3514 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003515 Py_RETURN_NONE;
3516}
3517
3518PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3519
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003520static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003521 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3522 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003523 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3524 reduce_doc},
3525 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3526 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003527 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003528};
3529
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003530PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003531 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3532 "bytes_iterator", /* tp_name */
3533 sizeof(striterobject), /* tp_basicsize */
3534 0, /* tp_itemsize */
3535 /* methods */
3536 (destructor)striter_dealloc, /* tp_dealloc */
3537 0, /* tp_print */
3538 0, /* tp_getattr */
3539 0, /* tp_setattr */
3540 0, /* tp_reserved */
3541 0, /* tp_repr */
3542 0, /* tp_as_number */
3543 0, /* tp_as_sequence */
3544 0, /* tp_as_mapping */
3545 0, /* tp_hash */
3546 0, /* tp_call */
3547 0, /* tp_str */
3548 PyObject_GenericGetAttr, /* tp_getattro */
3549 0, /* tp_setattro */
3550 0, /* tp_as_buffer */
3551 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3552 0, /* tp_doc */
3553 (traverseproc)striter_traverse, /* tp_traverse */
3554 0, /* tp_clear */
3555 0, /* tp_richcompare */
3556 0, /* tp_weaklistoffset */
3557 PyObject_SelfIter, /* tp_iter */
3558 (iternextfunc)striter_next, /* tp_iternext */
3559 striter_methods, /* tp_methods */
3560 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003561};
3562
3563static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003564bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003565{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003566 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003567
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003568 if (!PyBytes_Check(seq)) {
3569 PyErr_BadInternalCall();
3570 return NULL;
3571 }
3572 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3573 if (it == NULL)
3574 return NULL;
3575 it->it_index = 0;
3576 Py_INCREF(seq);
3577 it->it_seq = (PyBytesObject *)seq;
3578 _PyObject_GC_TRACK(it);
3579 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003580}