blob: 63d5a5c6c3c4241e1e3a5a01229020514c471d4d [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020010/*[clinic input]
Martin v. Löwis0efea322014-07-27 17:29:17 +020011class bytes "PyBytesObject*" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020012[clinic start generated code]*/
Martin v. Löwis0efea322014-07-27 17:29:17 +020013/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020014
Neal Norwitz2bad9702007-08-27 06:19:22 +000015static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000016_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000017{
Antoine Pitroucfc22b42012-10-16 21:07:23 +020018 PyBufferProcs *bufferprocs;
19 if (PyBytes_CheckExact(obj)) {
20 /* Fast path, e.g. for .join() of many bytes objects */
21 Py_INCREF(obj);
22 view->obj = obj;
23 view->buf = PyBytes_AS_STRING(obj);
24 view->len = PyBytes_GET_SIZE(obj);
25 return view->len;
26 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000027
Antoine Pitroucfc22b42012-10-16 21:07:23 +020028 bufferprocs = Py_TYPE(obj)->tp_as_buffer;
29 if (bufferprocs == NULL || bufferprocs->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000030 {
Antoine Pitroud1188562010-06-09 16:38:55 +000031 PyErr_Format(PyExc_TypeError,
32 "Type %.100s doesn't support the buffer API",
33 Py_TYPE(obj)->tp_name);
34 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000035 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000036
Antoine Pitroucfc22b42012-10-16 21:07:23 +020037 if (bufferprocs->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000038 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000039 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000040}
41
Christian Heimes2c9c7a52008-05-26 13:42:13 +000042#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000043Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000045
Christian Heimes2c9c7a52008-05-26 13:42:13 +000046static PyBytesObject *characters[UCHAR_MAX + 1];
47static PyBytesObject *nullstring;
48
Mark Dickinsonfd24b322008-12-06 15:33:31 +000049/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
50 for a string of length n should request PyBytesObject_SIZE + n bytes.
51
52 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
53 3 bytes per string allocation on a typical system.
54*/
55#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
56
Christian Heimes2c9c7a52008-05-26 13:42:13 +000057/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000058 For PyBytes_FromString(), the parameter `str' points to a null-terminated
59 string containing exactly `size' bytes.
60
61 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
62 either NULL or else points to a string containing at least `size' bytes.
63 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
64 not have to be null-terminated. (Therefore it is safe to construct a
65 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
66 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
67 bytes (setting the last byte to the null terminating character) and you can
68 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000069 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070 alter the data yourself, since the strings may be shared.
71
72 The PyObject member `op->ob_size', which denotes the number of "extra
73 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020074 allocated for string data, not counting the null terminating character.
75 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000076 PyBytes_FromStringAndSize()) or the length of the string in the `str'
77 parameter (for PyBytes_FromString()).
78*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020079static PyObject *
80_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000081{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020082 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020083 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000086#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000088#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 Py_INCREF(op);
90 return (PyObject *)op;
91 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000092
Victor Stinner049e5092014-08-17 22:20:00 +020093 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094 PyErr_SetString(PyExc_OverflowError,
95 "byte string is too large");
96 return NULL;
97 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000098
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000099 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +0200100 if (use_calloc)
101 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
102 else
103 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000104 if (op == NULL)
105 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100106 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000107 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +0200108 if (!use_calloc)
109 op->ob_sval[size] = '\0';
110 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 if (size == 0) {
112 nullstring = op;
113 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200114 }
115 return (PyObject *) op;
116}
117
118PyObject *
119PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
120{
121 PyBytesObject *op;
122 if (size < 0) {
123 PyErr_SetString(PyExc_SystemError,
124 "Negative size passed to PyBytes_FromStringAndSize");
125 return NULL;
126 }
127 if (size == 1 && str != NULL &&
128 (op = characters[*str & UCHAR_MAX]) != NULL)
129 {
130#ifdef COUNT_ALLOCS
131 one_strings++;
132#endif
133 Py_INCREF(op);
134 return (PyObject *)op;
135 }
136
137 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
138 if (op == NULL)
139 return NULL;
140 if (str == NULL)
141 return (PyObject *) op;
142
143 Py_MEMCPY(op->ob_sval, str, size);
144 /* share short strings */
145 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000146 characters[*str & UCHAR_MAX] = op;
147 Py_INCREF(op);
148 }
149 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000150}
151
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000152PyObject *
153PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000154{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200155 size_t size;
156 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000157
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000158 assert(str != NULL);
159 size = strlen(str);
160 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
161 PyErr_SetString(PyExc_OverflowError,
162 "byte string is too long");
163 return NULL;
164 }
165 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000166#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000168#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000169 Py_INCREF(op);
170 return (PyObject *)op;
171 }
172 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000173#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000174 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000175#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000176 Py_INCREF(op);
177 return (PyObject *)op;
178 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 /* Inline PyObject_NewVar */
181 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
182 if (op == NULL)
183 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100184 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000185 op->ob_shash = -1;
186 Py_MEMCPY(op->ob_sval, str, size+1);
187 /* share short strings */
188 if (size == 0) {
189 nullstring = op;
190 Py_INCREF(op);
191 } else if (size == 1) {
192 characters[*str & UCHAR_MAX] = op;
193 Py_INCREF(op);
194 }
195 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000196}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000197
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000198PyObject *
199PyBytes_FromFormatV(const char *format, va_list vargs)
200{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 va_list count;
202 Py_ssize_t n = 0;
203 const char* f;
204 char *s;
205 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000206
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000207 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000208 /* step 1: figure out how large a buffer we need */
209 for (f = format; *f; f++) {
210 if (*f == '%') {
211 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000212 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000213 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000214
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000215 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
216 * they don't affect the amount of space we reserve.
217 */
218 if ((*f == 'l' || *f == 'z') &&
219 (f[1] == 'd' || f[1] == 'u'))
220 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000221
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000222 switch (*f) {
223 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100224 {
225 int c = va_arg(count, int);
226 if (c < 0 || c > 255) {
227 PyErr_SetString(PyExc_OverflowError,
228 "PyBytes_FromFormatV(): %c format "
229 "expects an integer in range [0; 255]");
230 return NULL;
231 }
232 n++;
233 break;
234 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 case '%':
236 n++;
237 break;
238 case 'd': case 'u': case 'i': case 'x':
239 (void) va_arg(count, int);
240 /* 20 bytes is enough to hold a 64-bit
241 integer. Decimal takes the most space.
242 This isn't enough for octal. */
243 n += 20;
244 break;
245 case 's':
246 s = va_arg(count, char*);
247 n += strlen(s);
248 break;
249 case 'p':
250 (void) va_arg(count, int);
251 /* maximum 64-bit pointer representation:
252 * 0xffffffffffffffff
253 * so 19 characters is enough.
254 * XXX I count 18 -- what's the extra for?
255 */
256 n += 19;
257 break;
258 default:
259 /* if we stumble upon an unknown
260 formatting code, copy the rest of
261 the format string to the output
262 string. (we cannot just skip the
263 code, since there's no way to know
264 what's in the argument list) */
265 n += strlen(p);
266 goto expand;
267 }
268 } else
269 n++;
270 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000271 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 /* step 2: fill the buffer */
273 /* Since we've analyzed how much space we need for the worst case,
274 use sprintf directly instead of the slower PyOS_snprintf. */
275 string = PyBytes_FromStringAndSize(NULL, n);
276 if (!string)
277 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000278
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000279 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000280
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000281 for (f = format; *f; f++) {
282 if (*f == '%') {
283 const char* p = f++;
284 Py_ssize_t i;
285 int longflag = 0;
286 int size_tflag = 0;
287 /* parse the width.precision part (we're only
288 interested in the precision value, if any) */
289 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000290 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000291 n = (n*10) + *f++ - '0';
292 if (*f == '.') {
293 f++;
294 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000295 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 n = (n*10) + *f++ - '0';
297 }
David Malcolm96960882010-11-05 17:23:41 +0000298 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000299 f++;
300 /* handle the long flag, but only for %ld and %lu.
301 others can be added when necessary. */
302 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
303 longflag = 1;
304 ++f;
305 }
306 /* handle the size_t flag. */
307 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
308 size_tflag = 1;
309 ++f;
310 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000311
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000312 switch (*f) {
313 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100314 {
315 int c = va_arg(vargs, int);
316 /* c has been checked for overflow in the first step */
317 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000318 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100319 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000320 case 'd':
321 if (longflag)
322 sprintf(s, "%ld", va_arg(vargs, long));
323 else if (size_tflag)
324 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
325 va_arg(vargs, Py_ssize_t));
326 else
327 sprintf(s, "%d", va_arg(vargs, int));
328 s += strlen(s);
329 break;
330 case 'u':
331 if (longflag)
332 sprintf(s, "%lu",
333 va_arg(vargs, unsigned long));
334 else if (size_tflag)
335 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
336 va_arg(vargs, size_t));
337 else
338 sprintf(s, "%u",
339 va_arg(vargs, unsigned int));
340 s += strlen(s);
341 break;
342 case 'i':
343 sprintf(s, "%i", va_arg(vargs, int));
344 s += strlen(s);
345 break;
346 case 'x':
347 sprintf(s, "%x", va_arg(vargs, int));
348 s += strlen(s);
349 break;
350 case 's':
351 p = va_arg(vargs, char*);
352 i = strlen(p);
353 if (n > 0 && i > n)
354 i = n;
355 Py_MEMCPY(s, p, i);
356 s += i;
357 break;
358 case 'p':
359 sprintf(s, "%p", va_arg(vargs, void*));
360 /* %p is ill-defined: ensure leading 0x. */
361 if (s[1] == 'X')
362 s[1] = 'x';
363 else if (s[1] != 'x') {
364 memmove(s+2, s, strlen(s)+1);
365 s[0] = '0';
366 s[1] = 'x';
367 }
368 s += strlen(s);
369 break;
370 case '%':
371 *s++ = '%';
372 break;
373 default:
374 strcpy(s, p);
375 s += strlen(s);
376 goto end;
377 }
378 } else
379 *s++ = *f;
380 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000381
382 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000383 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
384 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000385}
386
387PyObject *
388PyBytes_FromFormat(const char *format, ...)
389{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000390 PyObject* ret;
391 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000392
393#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000394 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000395#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000397#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000398 ret = PyBytes_FromFormatV(format, vargs);
399 va_end(vargs);
400 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000401}
402
403static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000404bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000405{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000406 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000407}
408
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000409/* Unescape a backslash-escaped string. If unicode is non-zero,
410 the string is a u-literal. If recode_encoding is non-zero,
411 the string is UTF-8 encoded and should be re-encoded in the
412 specified encoding. */
413
414PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000415 Py_ssize_t len,
416 const char *errors,
417 Py_ssize_t unicode,
418 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000419{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000420 int c;
421 char *p, *buf;
422 const char *end;
423 PyObject *v;
424 Py_ssize_t newlen = recode_encoding ? 4*len:len;
425 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
426 if (v == NULL)
427 return NULL;
428 p = buf = PyBytes_AsString(v);
429 end = s + len;
430 while (s < end) {
431 if (*s != '\\') {
432 non_esc:
433 if (recode_encoding && (*s & 0x80)) {
434 PyObject *u, *w;
435 char *r;
436 const char* t;
437 Py_ssize_t rn;
438 t = s;
439 /* Decode non-ASCII bytes as UTF-8. */
440 while (t < end && (*t & 0x80)) t++;
441 u = PyUnicode_DecodeUTF8(s, t - s, errors);
442 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000443
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000444 /* Recode them in target encoding. */
445 w = PyUnicode_AsEncodedString(
446 u, recode_encoding, errors);
447 Py_DECREF(u);
448 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000449
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 /* Append bytes to output buffer. */
451 assert(PyBytes_Check(w));
452 r = PyBytes_AS_STRING(w);
453 rn = PyBytes_GET_SIZE(w);
454 Py_MEMCPY(p, r, rn);
455 p += rn;
456 Py_DECREF(w);
457 s = t;
458 } else {
459 *p++ = *s++;
460 }
461 continue;
462 }
463 s++;
464 if (s==end) {
465 PyErr_SetString(PyExc_ValueError,
466 "Trailing \\ in string");
467 goto failed;
468 }
469 switch (*s++) {
470 /* XXX This assumes ASCII! */
471 case '\n': break;
472 case '\\': *p++ = '\\'; break;
473 case '\'': *p++ = '\''; break;
474 case '\"': *p++ = '\"'; break;
475 case 'b': *p++ = '\b'; break;
476 case 'f': *p++ = '\014'; break; /* FF */
477 case 't': *p++ = '\t'; break;
478 case 'n': *p++ = '\n'; break;
479 case 'r': *p++ = '\r'; break;
480 case 'v': *p++ = '\013'; break; /* VT */
481 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
482 case '0': case '1': case '2': case '3':
483 case '4': case '5': case '6': case '7':
484 c = s[-1] - '0';
485 if (s < end && '0' <= *s && *s <= '7') {
486 c = (c<<3) + *s++ - '0';
487 if (s < end && '0' <= *s && *s <= '7')
488 c = (c<<3) + *s++ - '0';
489 }
490 *p++ = c;
491 break;
492 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000493 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000494 unsigned int x = 0;
495 c = Py_CHARMASK(*s);
496 s++;
David Malcolm96960882010-11-05 17:23:41 +0000497 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000498 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000499 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000500 x = 10 + c - 'a';
501 else
502 x = 10 + c - 'A';
503 x = x << 4;
504 c = Py_CHARMASK(*s);
505 s++;
David Malcolm96960882010-11-05 17:23:41 +0000506 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000507 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000508 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000509 x += 10 + c - 'a';
510 else
511 x += 10 + c - 'A';
512 *p++ = x;
513 break;
514 }
515 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200516 PyErr_Format(PyExc_ValueError,
517 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +0200518 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000519 goto failed;
520 }
521 if (strcmp(errors, "replace") == 0) {
522 *p++ = '?';
523 } else if (strcmp(errors, "ignore") == 0)
524 /* do nothing */;
525 else {
526 PyErr_Format(PyExc_ValueError,
527 "decoding error; unknown "
528 "error handling code: %.400s",
529 errors);
530 goto failed;
531 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +0200532 /* skip \x */
533 if (s < end && Py_ISXDIGIT(s[0]))
534 s++; /* and a hexdigit */
535 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000536 default:
537 *p++ = '\\';
538 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200539 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000540 UTF-8 bytes may follow. */
541 }
542 }
543 if (p-buf < newlen)
544 _PyBytes_Resize(&v, p - buf);
545 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000546 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000547 Py_DECREF(v);
548 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000549}
550
551/* -------------------------------------------------------------------- */
552/* object api */
553
554Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200555PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000556{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000557 if (!PyBytes_Check(op)) {
558 PyErr_Format(PyExc_TypeError,
559 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
560 return -1;
561 }
562 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000563}
564
565char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200566PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000567{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000568 if (!PyBytes_Check(op)) {
569 PyErr_Format(PyExc_TypeError,
570 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
571 return NULL;
572 }
573 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000574}
575
576int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200577PyBytes_AsStringAndSize(PyObject *obj,
578 char **s,
579 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000580{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000581 if (s == NULL) {
582 PyErr_BadInternalCall();
583 return -1;
584 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000585
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000586 if (!PyBytes_Check(obj)) {
587 PyErr_Format(PyExc_TypeError,
588 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
589 return -1;
590 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000591
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000592 *s = PyBytes_AS_STRING(obj);
593 if (len != NULL)
594 *len = PyBytes_GET_SIZE(obj);
595 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +0300596 PyErr_SetString(PyExc_ValueError,
597 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000598 return -1;
599 }
600 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000601}
Neal Norwitz6968b052007-02-27 19:02:19 +0000602
603/* -------------------------------------------------------------------- */
604/* Methods */
605
Eric Smith0923d1d2009-04-16 20:16:10 +0000606#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000607
608#include "stringlib/fastsearch.h"
609#include "stringlib/count.h"
610#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +0200611#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000612#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000613#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000614#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000615
Eric Smith0f78bff2009-11-30 01:01:42 +0000616#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000617
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000618PyObject *
619PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000620{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200621 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200622 Py_ssize_t i, length = Py_SIZE(op);
623 size_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000624 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200625 unsigned char quote, *s, *p;
626
627 /* Compute size of output string */
628 squotes = dquotes = 0;
629 newsize = 3; /* b'' */
630 s = (unsigned char*)op->ob_sval;
631 for (i = 0; i < length; i++) {
632 switch(s[i]) {
633 case '\'': squotes++; newsize++; break;
634 case '"': dquotes++; newsize++; break;
635 case '\\': case '\t': case '\n': case '\r':
636 newsize += 2; break; /* \C */
637 default:
638 if (s[i] < ' ' || s[i] >= 0x7f)
639 newsize += 4; /* \xHH */
640 else
641 newsize++;
642 }
643 }
644 quote = '\'';
645 if (smartquotes && squotes && !dquotes)
646 quote = '"';
647 if (squotes && quote == '\'')
648 newsize += squotes;
Victor Stinner6430fd52011-09-29 04:02:13 +0200649
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200650 if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000651 PyErr_SetString(PyExc_OverflowError,
652 "bytes object is too large to make repr");
653 return NULL;
654 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200655
656 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000657 if (v == NULL) {
658 return NULL;
659 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200660 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000661
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200662 *p++ = 'b', *p++ = quote;
663 for (i = 0; i < length; i++) {
664 unsigned char c = op->ob_sval[i];
665 if (c == quote || c == '\\')
666 *p++ = '\\', *p++ = c;
667 else if (c == '\t')
668 *p++ = '\\', *p++ = 't';
669 else if (c == '\n')
670 *p++ = '\\', *p++ = 'n';
671 else if (c == '\r')
672 *p++ = '\\', *p++ = 'r';
673 else if (c < ' ' || c >= 0x7f) {
674 *p++ = '\\';
675 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200676 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
677 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000678 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200679 else
680 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000681 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200682 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +0200683 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200684 return v;
Neal Norwitz6968b052007-02-27 19:02:19 +0000685}
686
Neal Norwitz6968b052007-02-27 19:02:19 +0000687static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000688bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000689{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000690 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000691}
692
Neal Norwitz6968b052007-02-27 19:02:19 +0000693static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000694bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000695{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000696 if (Py_BytesWarningFlag) {
697 if (PyErr_WarnEx(PyExc_BytesWarning,
698 "str() on a bytes instance", 1))
699 return NULL;
700 }
701 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000702}
703
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000704static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000705bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000706{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000707 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000708}
Neal Norwitz6968b052007-02-27 19:02:19 +0000709
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000710/* This is also used by PyBytes_Concat() */
711static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000712bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000713{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000714 Py_ssize_t size;
715 Py_buffer va, vb;
716 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000717
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000718 va.len = -1;
719 vb.len = -1;
720 if (_getbuffer(a, &va) < 0 ||
721 _getbuffer(b, &vb) < 0) {
722 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
723 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
724 goto done;
725 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000726
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000727 /* Optimize end cases */
728 if (va.len == 0 && PyBytes_CheckExact(b)) {
729 result = b;
730 Py_INCREF(result);
731 goto done;
732 }
733 if (vb.len == 0 && PyBytes_CheckExact(a)) {
734 result = a;
735 Py_INCREF(result);
736 goto done;
737 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000738
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000739 size = va.len + vb.len;
740 if (size < 0) {
741 PyErr_NoMemory();
742 goto done;
743 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000744
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000745 result = PyBytes_FromStringAndSize(NULL, size);
746 if (result != NULL) {
747 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
748 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
749 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000750
751 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000752 if (va.len != -1)
753 PyBuffer_Release(&va);
754 if (vb.len != -1)
755 PyBuffer_Release(&vb);
756 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000757}
Neal Norwitz6968b052007-02-27 19:02:19 +0000758
759static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200760bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000761{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200762 Py_ssize_t i;
763 Py_ssize_t j;
764 Py_ssize_t size;
765 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000766 size_t nbytes;
767 if (n < 0)
768 n = 0;
769 /* watch out for overflows: the size can overflow int,
770 * and the # of bytes needed can overflow size_t
771 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000772 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000773 PyErr_SetString(PyExc_OverflowError,
774 "repeated bytes are too long");
775 return NULL;
776 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000777 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000778 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
779 Py_INCREF(a);
780 return (PyObject *)a;
781 }
782 nbytes = (size_t)size;
783 if (nbytes + PyBytesObject_SIZE <= nbytes) {
784 PyErr_SetString(PyExc_OverflowError,
785 "repeated bytes are too long");
786 return NULL;
787 }
788 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
789 if (op == NULL)
790 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100791 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000792 op->ob_shash = -1;
793 op->ob_sval[size] = '\0';
794 if (Py_SIZE(a) == 1 && n > 0) {
795 memset(op->ob_sval, a->ob_sval[0] , n);
796 return (PyObject *) op;
797 }
798 i = 0;
799 if (i < size) {
800 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
801 i = Py_SIZE(a);
802 }
803 while (i < size) {
804 j = (i <= size-i) ? i : size-i;
805 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
806 i += j;
807 }
808 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000809}
810
Guido van Rossum98297ee2007-11-06 21:34:58 +0000811static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000812bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000813{
814 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
815 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000816 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000817 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000818 PyErr_Clear();
819 if (_getbuffer(arg, &varg) < 0)
820 return -1;
821 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
822 varg.buf, varg.len, 0);
823 PyBuffer_Release(&varg);
824 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000825 }
826 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000827 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
828 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000829 }
830
Antoine Pitrou0010d372010-08-15 17:12:55 +0000831 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000832}
833
Neal Norwitz6968b052007-02-27 19:02:19 +0000834static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200835bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000836{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000837 if (i < 0 || i >= Py_SIZE(a)) {
838 PyErr_SetString(PyExc_IndexError, "index out of range");
839 return NULL;
840 }
841 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000842}
843
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100844Py_LOCAL(int)
845bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
846{
847 int cmp;
848 Py_ssize_t len;
849
850 len = Py_SIZE(a);
851 if (Py_SIZE(b) != len)
852 return 0;
853
854 if (a->ob_sval[0] != b->ob_sval[0])
855 return 0;
856
857 cmp = memcmp(a->ob_sval, b->ob_sval, len);
858 return (cmp == 0);
859}
860
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000861static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000862bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000863{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000864 int c;
865 Py_ssize_t len_a, len_b;
866 Py_ssize_t min_len;
867 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000868
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000869 /* Make sure both arguments are strings. */
870 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
871 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
872 (PyObject_IsInstance((PyObject*)a,
873 (PyObject*)&PyUnicode_Type) ||
874 PyObject_IsInstance((PyObject*)b,
875 (PyObject*)&PyUnicode_Type))) {
876 if (PyErr_WarnEx(PyExc_BytesWarning,
877 "Comparison between bytes and string", 1))
878 return NULL;
879 }
880 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000881 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100882 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000883 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100884 case Py_EQ:
885 case Py_LE:
886 case Py_GE:
887 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000888 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100889 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100890 case Py_NE:
891 case Py_LT:
892 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000893 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100894 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100895 default:
896 PyErr_BadArgument();
897 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000898 }
899 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100900 else if (op == Py_EQ || op == Py_NE) {
901 int eq = bytes_compare_eq(a, b);
902 eq ^= (op == Py_NE);
903 result = eq ? Py_True : Py_False;
904 }
905 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100906 len_a = Py_SIZE(a);
907 len_b = Py_SIZE(b);
908 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100909 if (min_len > 0) {
910 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100911 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100912 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000913 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100914 else
915 c = 0;
916 if (c == 0)
917 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
918 switch (op) {
919 case Py_LT: c = c < 0; break;
920 case Py_LE: c = c <= 0; break;
921 case Py_GT: c = c > 0; break;
922 case Py_GE: c = c >= 0; break;
923 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100924 PyErr_BadArgument();
925 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100926 }
927 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000928 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100929
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000930 Py_INCREF(result);
931 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000932}
933
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000934static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000935bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000936{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100937 if (a->ob_shash == -1) {
938 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +0100939 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100940 }
941 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +0000942}
943
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000944static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000945bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000946{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000947 if (PyIndex_Check(item)) {
948 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
949 if (i == -1 && PyErr_Occurred())
950 return NULL;
951 if (i < 0)
952 i += PyBytes_GET_SIZE(self);
953 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
954 PyErr_SetString(PyExc_IndexError,
955 "index out of range");
956 return NULL;
957 }
958 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
959 }
960 else if (PySlice_Check(item)) {
961 Py_ssize_t start, stop, step, slicelength, cur, i;
962 char* source_buf;
963 char* result_buf;
964 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000965
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000966 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000967 PyBytes_GET_SIZE(self),
968 &start, &stop, &step, &slicelength) < 0) {
969 return NULL;
970 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000971
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000972 if (slicelength <= 0) {
973 return PyBytes_FromStringAndSize("", 0);
974 }
975 else if (start == 0 && step == 1 &&
976 slicelength == PyBytes_GET_SIZE(self) &&
977 PyBytes_CheckExact(self)) {
978 Py_INCREF(self);
979 return (PyObject *)self;
980 }
981 else if (step == 1) {
982 return PyBytes_FromStringAndSize(
983 PyBytes_AS_STRING(self) + start,
984 slicelength);
985 }
986 else {
987 source_buf = PyBytes_AS_STRING(self);
988 result = PyBytes_FromStringAndSize(NULL, slicelength);
989 if (result == NULL)
990 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000991
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000992 result_buf = PyBytes_AS_STRING(result);
993 for (cur = start, i = 0; i < slicelength;
994 cur += step, i++) {
995 result_buf[i] = source_buf[cur];
996 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000997
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000998 return result;
999 }
1000 }
1001 else {
1002 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001003 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001004 Py_TYPE(item)->tp_name);
1005 return NULL;
1006 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001007}
1008
1009static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001010bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001011{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001012 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1013 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001014}
1015
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001016static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001017 (lenfunc)bytes_length, /*sq_length*/
1018 (binaryfunc)bytes_concat, /*sq_concat*/
1019 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1020 (ssizeargfunc)bytes_item, /*sq_item*/
1021 0, /*sq_slice*/
1022 0, /*sq_ass_item*/
1023 0, /*sq_ass_slice*/
1024 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001025};
1026
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001027static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001028 (lenfunc)bytes_length,
1029 (binaryfunc)bytes_subscript,
1030 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001031};
1032
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001033static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001034 (getbufferproc)bytes_buffer_getbuffer,
1035 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001036};
1037
1038
1039#define LEFTSTRIP 0
1040#define RIGHTSTRIP 1
1041#define BOTHSTRIP 2
1042
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001043/*[clinic input]
1044bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001045
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001046 sep: object = None
1047 The delimiter according which to split the bytes.
1048 None (the default value) means split on ASCII whitespace characters
1049 (space, tab, return, newline, formfeed, vertical tab).
1050 maxsplit: Py_ssize_t = -1
1051 Maximum number of splits to do.
1052 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001053
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001054Return a list of the sections in the bytes, using sep as the delimiter.
1055[clinic start generated code]*/
1056
1057PyDoc_STRVAR(bytes_split__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001058"split($self, /, sep=None, maxsplit=-1)\n"
1059"--\n"
1060"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001061"Return a list of the sections in the bytes, using sep as the delimiter.\n"
1062"\n"
1063" sep\n"
1064" The delimiter according which to split the bytes.\n"
1065" None (the default value) means split on ASCII whitespace characters\n"
1066" (space, tab, return, newline, formfeed, vertical tab).\n"
1067" maxsplit\n"
1068" Maximum number of splits to do.\n"
1069" -1 (the default value) means no limit.");
1070
1071#define BYTES_SPLIT_METHODDEF \
1072 {"split", (PyCFunction)bytes_split, METH_VARARGS|METH_KEYWORDS, bytes_split__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001073
1074static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001075bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001076
1077static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001078bytes_split(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Neal Norwitz6968b052007-02-27 19:02:19 +00001079{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001080 PyObject *return_value = NULL;
1081 static char *_keywords[] = {"sep", "maxsplit", NULL};
1082 PyObject *sep = Py_None;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001083 Py_ssize_t maxsplit = -1;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001084
1085 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
1086 "|On:split", _keywords,
1087 &sep, &maxsplit))
1088 goto exit;
1089 return_value = bytes_split_impl(self, sep, maxsplit);
1090
1091exit:
1092 return return_value;
1093}
1094
1095static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001096bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
1097/*[clinic end generated code: output=c80a47afdd505975 input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001098{
1099 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001100 const char *s = PyBytes_AS_STRING(self), *sub;
1101 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001102 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001103
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 if (maxsplit < 0)
1105 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001106 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001107 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001108 if (_getbuffer(sep, &vsub) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001109 return NULL;
1110 sub = vsub.buf;
1111 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001112
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001113 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1114 PyBuffer_Release(&vsub);
1115 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001116}
1117
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001118/*[clinic input]
1119bytes.partition
1120
1121 self: self(type="PyBytesObject *")
1122 sep: object
1123 /
1124
1125Partition the bytes into three parts using the given separator.
1126
1127This will search for the separator sep in the bytes. If the separator is found,
1128returns a 3-tuple containing the part before the separator, the separator
1129itself, and the part after it.
1130
1131If the separator is not found, returns a 3-tuple containing the original bytes
1132object and two empty bytes objects.
1133[clinic start generated code]*/
1134
1135PyDoc_STRVAR(bytes_partition__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001136"partition($self, sep, /)\n"
1137"--\n"
1138"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001139"Partition the bytes into three parts using the given separator.\n"
1140"\n"
1141"This will search for the separator sep in the bytes. If the separator is found,\n"
1142"returns a 3-tuple containing the part before the separator, the separator\n"
1143"itself, and the part after it.\n"
1144"\n"
1145"If the separator is not found, returns a 3-tuple containing the original bytes\n"
1146"object and two empty bytes objects.");
1147
1148#define BYTES_PARTITION_METHODDEF \
1149 {"partition", (PyCFunction)bytes_partition, METH_O, bytes_partition__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001150
1151static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001152bytes_partition(PyBytesObject *self, PyObject *sep)
Martin v. Löwis0efea322014-07-27 17:29:17 +02001153/*[clinic end generated code: output=b41e119c873c08bc input=6c5b9dcc5a9fd62e]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001154{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001155 const char *sep_chars;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001156 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001157
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001158 if (PyBytes_Check(sep)) {
1159 sep_chars = PyBytes_AS_STRING(sep);
1160 sep_len = PyBytes_GET_SIZE(sep);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001161 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001162 else if (PyObject_AsCharBuffer(sep, &sep_chars, &sep_len))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001163 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001164
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001165 return stringlib_partition(
1166 (PyObject*) self,
1167 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001168 sep, sep_chars, sep_len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001169 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001170}
1171
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001172/*[clinic input]
1173bytes.rpartition
1174
1175 self: self(type="PyBytesObject *")
1176 sep: object
1177 /
1178
1179Partition the bytes into three parts using the given separator.
1180
1181This will search for the separator sep in the bytes, starting and the end. If
1182the separator is found, returns a 3-tuple containing the part before the
1183separator, the separator itself, and the part after it.
1184
1185If the separator is not found, returns a 3-tuple containing two empty bytes
1186objects and the original bytes object.
1187[clinic start generated code]*/
1188
1189PyDoc_STRVAR(bytes_rpartition__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001190"rpartition($self, sep, /)\n"
1191"--\n"
1192"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001193"Partition the bytes into three parts using the given separator.\n"
1194"\n"
1195"This will search for the separator sep in the bytes, starting and the end. If\n"
1196"the separator is found, returns a 3-tuple containing the part before the\n"
1197"separator, the separator itself, and the part after it.\n"
1198"\n"
1199"If the separator is not found, returns a 3-tuple containing two empty bytes\n"
1200"objects and the original bytes object.");
1201
1202#define BYTES_RPARTITION_METHODDEF \
1203 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, bytes_rpartition__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001204
1205static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001206bytes_rpartition(PyBytesObject *self, PyObject *sep)
Martin v. Löwis0efea322014-07-27 17:29:17 +02001207/*[clinic end generated code: output=3a620803657196ee input=79bc2932e78e5ce0]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001208{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001209 const char *sep_chars;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001210 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001211
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001212 if (PyBytes_Check(sep)) {
1213 sep_chars = PyBytes_AS_STRING(sep);
1214 sep_len = PyBytes_GET_SIZE(sep);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001215 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001216 else if (PyObject_AsCharBuffer(sep, &sep_chars, &sep_len))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001217 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001218
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001219 return stringlib_rpartition(
1220 (PyObject*) self,
1221 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001222 sep, sep_chars, sep_len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001223 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001224}
1225
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001226/*[clinic input]
1227bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001228
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001229Return a list of the sections in the bytes, using sep as the delimiter.
1230
1231Splitting is done starting at the end of the bytes and working to the front.
1232[clinic start generated code]*/
1233
1234PyDoc_STRVAR(bytes_rsplit__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001235"rsplit($self, /, sep=None, maxsplit=-1)\n"
1236"--\n"
1237"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001238"Return a list of the sections in the bytes, using sep as the delimiter.\n"
1239"\n"
1240" sep\n"
1241" The delimiter according which to split the bytes.\n"
1242" None (the default value) means split on ASCII whitespace characters\n"
1243" (space, tab, return, newline, formfeed, vertical tab).\n"
1244" maxsplit\n"
1245" Maximum number of splits to do.\n"
1246" -1 (the default value) means no limit.\n"
1247"\n"
1248"Splitting is done starting at the end of the bytes and working to the front.");
1249
1250#define BYTES_RSPLIT_METHODDEF \
1251 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS|METH_KEYWORDS, bytes_rsplit__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001252
Neal Norwitz6968b052007-02-27 19:02:19 +00001253static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001254bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001255
1256static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001257bytes_rsplit(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Neal Norwitz6968b052007-02-27 19:02:19 +00001258{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001259 PyObject *return_value = NULL;
1260 static char *_keywords[] = {"sep", "maxsplit", NULL};
1261 PyObject *sep = Py_None;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001262 Py_ssize_t maxsplit = -1;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001263
1264 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
1265 "|On:rsplit", _keywords,
1266 &sep, &maxsplit))
1267 goto exit;
1268 return_value = bytes_rsplit_impl(self, sep, maxsplit);
1269
1270exit:
1271 return return_value;
1272}
1273
1274static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001275bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
1276/*[clinic end generated code: output=f86feddedbd7b26d input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001277{
1278 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001279 const char *s = PyBytes_AS_STRING(self), *sub;
1280 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001281 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001282
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001283 if (maxsplit < 0)
1284 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001285 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001286 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001287 if (_getbuffer(sep, &vsub) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001288 return NULL;
1289 sub = vsub.buf;
1290 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001291
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001292 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1293 PyBuffer_Release(&vsub);
1294 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001295}
1296
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001297
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001298/*[clinic input]
1299bytes.join
1300
1301 iterable_of_bytes: object
1302 /
1303
1304Concatenate any number of bytes objects.
1305
1306The bytes whose method is called is inserted in between each pair.
1307
1308The result is returned as a new bytes object.
1309
1310Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1311[clinic start generated code]*/
1312
1313PyDoc_STRVAR(bytes_join__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001314"join($self, iterable_of_bytes, /)\n"
1315"--\n"
1316"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001317"Concatenate any number of bytes objects.\n"
1318"\n"
1319"The bytes whose method is called is inserted in between each pair.\n"
1320"\n"
1321"The result is returned as a new bytes object.\n"
1322"\n"
1323"Example: b\'.\'.join([b\'ab\', b\'pq\', b\'rs\']) -> b\'ab.pq.rs\'.");
1324
1325#define BYTES_JOIN_METHODDEF \
1326 {"join", (PyCFunction)bytes_join, METH_O, bytes_join__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001327
Neal Norwitz6968b052007-02-27 19:02:19 +00001328static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001329bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
1330/*[clinic end generated code: output=e541a14a8da97908 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001331{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001332 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001333}
1334
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001335PyObject *
1336_PyBytes_Join(PyObject *sep, PyObject *x)
1337{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001338 assert(sep != NULL && PyBytes_Check(sep));
1339 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001340 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001341}
1342
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001343/* helper macro to fixup start/end slice values */
1344#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001345 if (end > len) \
1346 end = len; \
1347 else if (end < 0) { \
1348 end += len; \
1349 if (end < 0) \
1350 end = 0; \
1351 } \
1352 if (start < 0) { \
1353 start += len; \
1354 if (start < 0) \
1355 start = 0; \
1356 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001357
1358Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001359bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001360{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001361 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001362 char byte;
1363 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001364 const char *sub;
1365 Py_ssize_t sub_len;
1366 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001367 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001368
Antoine Pitrouac65d962011-10-20 23:54:17 +02001369 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1370 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001371 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001372
Antoine Pitrouac65d962011-10-20 23:54:17 +02001373 if (subobj) {
1374 if (_getbuffer(subobj, &subbuf) < 0)
1375 return -2;
1376
1377 sub = subbuf.buf;
1378 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001379 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001380 else {
1381 sub = &byte;
1382 sub_len = 1;
1383 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001384
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001385 if (dir > 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001386 res = stringlib_find_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001387 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1388 sub, sub_len, start, end);
1389 else
Antoine Pitrouac65d962011-10-20 23:54:17 +02001390 res = stringlib_rfind_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001391 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1392 sub, sub_len, start, end);
Antoine Pitrouac65d962011-10-20 23:54:17 +02001393
1394 if (subobj)
1395 PyBuffer_Release(&subbuf);
1396
1397 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001398}
1399
1400
1401PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001402"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001403\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001404Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001405such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001406arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001407\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001408Return -1 on failure.");
1409
Neal Norwitz6968b052007-02-27 19:02:19 +00001410static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001411bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001412{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001413 Py_ssize_t result = bytes_find_internal(self, args, +1);
1414 if (result == -2)
1415 return NULL;
1416 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001417}
1418
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001419
1420PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001421"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001422\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001423Like B.find() but raise ValueError when the substring is not found.");
1424
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001425static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001426bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001427{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 Py_ssize_t result = bytes_find_internal(self, args, +1);
1429 if (result == -2)
1430 return NULL;
1431 if (result == -1) {
1432 PyErr_SetString(PyExc_ValueError,
1433 "substring not found");
1434 return NULL;
1435 }
1436 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001437}
1438
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001439
1440PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001441"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001442\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001443Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001444such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001445arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001446\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001447Return -1 on failure.");
1448
Neal Norwitz6968b052007-02-27 19:02:19 +00001449static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001450bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001451{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001452 Py_ssize_t result = bytes_find_internal(self, args, -1);
1453 if (result == -2)
1454 return NULL;
1455 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001456}
1457
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001458
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001459PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001460"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001461\n\
1462Like B.rfind() but raise ValueError when the substring is not found.");
1463
1464static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001465bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001466{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001467 Py_ssize_t result = bytes_find_internal(self, args, -1);
1468 if (result == -2)
1469 return NULL;
1470 if (result == -1) {
1471 PyErr_SetString(PyExc_ValueError,
1472 "substring not found");
1473 return NULL;
1474 }
1475 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001476}
1477
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001478
1479Py_LOCAL_INLINE(PyObject *)
1480do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001481{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001482 Py_buffer vsep;
1483 char *s = PyBytes_AS_STRING(self);
1484 Py_ssize_t len = PyBytes_GET_SIZE(self);
1485 char *sep;
1486 Py_ssize_t seplen;
1487 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001488
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001489 if (_getbuffer(sepobj, &vsep) < 0)
1490 return NULL;
1491 sep = vsep.buf;
1492 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001493
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001494 i = 0;
1495 if (striptype != RIGHTSTRIP) {
1496 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1497 i++;
1498 }
1499 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001500
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001501 j = len;
1502 if (striptype != LEFTSTRIP) {
1503 do {
1504 j--;
1505 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1506 j++;
1507 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001508
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001509 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001510
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001511 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1512 Py_INCREF(self);
1513 return (PyObject*)self;
1514 }
1515 else
1516 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001517}
1518
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001519
1520Py_LOCAL_INLINE(PyObject *)
1521do_strip(PyBytesObject *self, int striptype)
1522{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001523 char *s = PyBytes_AS_STRING(self);
1524 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001525
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001526 i = 0;
1527 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001528 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001529 i++;
1530 }
1531 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001532
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001533 j = len;
1534 if (striptype != LEFTSTRIP) {
1535 do {
1536 j--;
David Malcolm96960882010-11-05 17:23:41 +00001537 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001538 j++;
1539 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001540
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001541 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1542 Py_INCREF(self);
1543 return (PyObject*)self;
1544 }
1545 else
1546 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001547}
1548
1549
1550Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001551do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001552{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001553 if (bytes != NULL && bytes != Py_None) {
1554 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001555 }
1556 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001557}
1558
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001559/*[clinic input]
1560bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001561
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001562 self: self(type="PyBytesObject *")
1563 bytes: object = None
1564 /
1565
1566Strip leading and trailing bytes contained in the argument.
1567
1568If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1569[clinic start generated code]*/
1570
1571PyDoc_STRVAR(bytes_strip__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001572"strip($self, bytes=None, /)\n"
1573"--\n"
1574"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001575"Strip leading and trailing bytes contained in the argument.\n"
1576"\n"
1577"If the argument is omitted or None, strip leading and trailing ASCII whitespace.");
1578
1579#define BYTES_STRIP_METHODDEF \
1580 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, bytes_strip__doc__},
1581
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001582static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001583bytes_strip_impl(PyBytesObject *self, PyObject *bytes);
1584
1585static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001586bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001587{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001588 PyObject *return_value = NULL;
1589 PyObject *bytes = Py_None;
1590
1591 if (!PyArg_UnpackTuple(args, "strip",
1592 0, 1,
1593 &bytes))
1594 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02001595 return_value = bytes_strip_impl(self, bytes);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001596
1597exit:
1598 return return_value;
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001599}
1600
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001601static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001602bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Martin v. Löwis0efea322014-07-27 17:29:17 +02001603/*[clinic end generated code: output=c8234a599ba5ec35 input=37daa5fad1395d95]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001604{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001605 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001606}
1607
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001608/*[clinic input]
1609bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001610
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001611 self: self(type="PyBytesObject *")
1612 bytes: object = None
1613 /
1614
1615Strip leading bytes contained in the argument.
1616
1617If the argument is omitted or None, strip leading ASCII whitespace.
1618[clinic start generated code]*/
1619
1620PyDoc_STRVAR(bytes_lstrip__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001621"lstrip($self, bytes=None, /)\n"
1622"--\n"
1623"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001624"Strip leading bytes contained in the argument.\n"
1625"\n"
1626"If the argument is omitted or None, strip leading ASCII whitespace.");
1627
1628#define BYTES_LSTRIP_METHODDEF \
1629 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, bytes_lstrip__doc__},
1630
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001631static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001632bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes);
1633
1634static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001635bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001636{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001637 PyObject *return_value = NULL;
1638 PyObject *bytes = Py_None;
1639
1640 if (!PyArg_UnpackTuple(args, "lstrip",
1641 0, 1,
1642 &bytes))
1643 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02001644 return_value = bytes_lstrip_impl(self, bytes);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001645
1646exit:
1647 return return_value;
1648}
1649
1650static PyObject *
1651bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Martin v. Löwis0efea322014-07-27 17:29:17 +02001652/*[clinic end generated code: output=529e8511ab6f1115 input=88811b09dfbc2988]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001653{
1654 return do_argstrip(self, LEFTSTRIP, bytes);
1655}
1656
1657/*[clinic input]
1658bytes.rstrip
1659
1660 self: self(type="PyBytesObject *")
1661 bytes: object = None
1662 /
1663
1664Strip trailing bytes contained in the argument.
1665
1666If the argument is omitted or None, strip trailing ASCII whitespace.
1667[clinic start generated code]*/
1668
1669PyDoc_STRVAR(bytes_rstrip__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001670"rstrip($self, bytes=None, /)\n"
1671"--\n"
1672"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001673"Strip trailing bytes contained in the argument.\n"
1674"\n"
1675"If the argument is omitted or None, strip trailing ASCII whitespace.");
1676
1677#define BYTES_RSTRIP_METHODDEF \
1678 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, bytes_rstrip__doc__},
1679
1680static PyObject *
1681bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes);
1682
1683static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001684bytes_rstrip(PyBytesObject *self, PyObject *args)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001685{
1686 PyObject *return_value = NULL;
1687 PyObject *bytes = Py_None;
1688
1689 if (!PyArg_UnpackTuple(args, "rstrip",
1690 0, 1,
1691 &bytes))
1692 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02001693 return_value = bytes_rstrip_impl(self, bytes);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001694
1695exit:
1696 return return_value;
1697}
1698
1699static PyObject *
1700bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Martin v. Löwis0efea322014-07-27 17:29:17 +02001701/*[clinic end generated code: output=e98730bd133e6593 input=8f93c9cd361f0140]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001702{
1703 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001704}
Neal Norwitz6968b052007-02-27 19:02:19 +00001705
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001706
1707PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001708"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001709\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001710Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001711string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001712as in slice notation.");
1713
1714static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001715bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001716{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001717 PyObject *sub_obj;
1718 const char *str = PyBytes_AS_STRING(self), *sub;
1719 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001720 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001721 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001722
Antoine Pitrouac65d962011-10-20 23:54:17 +02001723 Py_buffer vsub;
1724 PyObject *count_obj;
1725
1726 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
1727 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001728 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001729
Antoine Pitrouac65d962011-10-20 23:54:17 +02001730 if (sub_obj) {
1731 if (_getbuffer(sub_obj, &vsub) < 0)
1732 return NULL;
1733
1734 sub = vsub.buf;
1735 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001736 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001737 else {
1738 sub = &byte;
1739 sub_len = 1;
1740 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001741
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001742 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001743
Antoine Pitrouac65d962011-10-20 23:54:17 +02001744 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001745 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1746 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02001747
1748 if (sub_obj)
1749 PyBuffer_Release(&vsub);
1750
1751 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001752}
1753
1754
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001755/*[clinic input]
1756bytes.translate
1757
1758 self: self(type="PyBytesObject *")
Victor Stinner049e5092014-08-17 22:20:00 +02001759 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001760 Translation table, which must be a bytes object of length 256.
1761 [
1762 deletechars: object
1763 ]
1764 /
1765
1766Return a copy with each character mapped by the given translation table.
1767
1768All characters occurring in the optional argument deletechars are removed.
1769The remaining characters are mapped through the given translation table.
1770[clinic start generated code]*/
1771
1772PyDoc_STRVAR(bytes_translate__doc__,
1773"translate(table, [deletechars])\n"
1774"Return a copy with each character mapped by the given translation table.\n"
1775"\n"
1776" table\n"
1777" Translation table, which must be a bytes object of length 256.\n"
1778"\n"
1779"All characters occurring in the optional argument deletechars are removed.\n"
1780"The remaining characters are mapped through the given translation table.");
1781
1782#define BYTES_TRANSLATE_METHODDEF \
1783 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, bytes_translate__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001784
1785static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001786bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1, PyObject *deletechars);
1787
1788static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001789bytes_translate(PyBytesObject *self, PyObject *args)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001790{
1791 PyObject *return_value = NULL;
1792 PyObject *table;
1793 int group_right_1 = 0;
1794 PyObject *deletechars = NULL;
1795
1796 switch (PyTuple_GET_SIZE(args)) {
1797 case 1:
1798 if (!PyArg_ParseTuple(args, "O:translate", &table))
Martin v. Löwis0efea322014-07-27 17:29:17 +02001799 goto exit;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001800 break;
1801 case 2:
1802 if (!PyArg_ParseTuple(args, "OO:translate", &table, &deletechars))
Martin v. Löwis0efea322014-07-27 17:29:17 +02001803 goto exit;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001804 group_right_1 = 1;
1805 break;
1806 default:
1807 PyErr_SetString(PyExc_TypeError, "bytes.translate requires 1 to 2 arguments");
Martin v. Löwis0efea322014-07-27 17:29:17 +02001808 goto exit;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001809 }
Martin v. Löwis0efea322014-07-27 17:29:17 +02001810 return_value = bytes_translate_impl(self, table, group_right_1, deletechars);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001811
Martin v. Löwis0efea322014-07-27 17:29:17 +02001812exit:
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001813 return return_value;
1814}
1815
1816static PyObject *
1817bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1, PyObject *deletechars)
Martin v. Löwis0efea322014-07-27 17:29:17 +02001818/*[clinic end generated code: output=f0f29a57f41df5d8 input=a90fad893c3c88d7]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001819{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001820 char *input, *output;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001821 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001822 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001823 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001824 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001825 Py_ssize_t inlen, tablen, dellen = 0;
1826 PyObject *result;
1827 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001828
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001829 if (PyBytes_Check(table)) {
1830 table_chars = PyBytes_AS_STRING(table);
1831 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001832 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001833 else if (table == Py_None) {
1834 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001835 tablen = 256;
1836 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001837 else if (PyObject_AsCharBuffer(table, &table_chars, &tablen))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001838 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001839
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001840 if (tablen != 256) {
1841 PyErr_SetString(PyExc_ValueError,
1842 "translation table must be 256 characters long");
1843 return NULL;
1844 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001845
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001846 if (deletechars != NULL) {
1847 if (PyBytes_Check(deletechars)) {
1848 del_table_chars = PyBytes_AS_STRING(deletechars);
1849 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001850 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001851 else if (PyObject_AsCharBuffer(deletechars, &del_table_chars, &dellen))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001852 return NULL;
1853 }
1854 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001855 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001856 dellen = 0;
1857 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001858
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001859 inlen = PyBytes_GET_SIZE(input_obj);
1860 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1861 if (result == NULL)
1862 return NULL;
1863 output_start = output = PyBytes_AsString(result);
1864 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001865
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001866 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001867 /* If no deletions are required, use faster code */
1868 for (i = inlen; --i >= 0; ) {
1869 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001870 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001871 changed = 1;
1872 }
1873 if (changed || !PyBytes_CheckExact(input_obj))
1874 return result;
1875 Py_DECREF(result);
1876 Py_INCREF(input_obj);
1877 return input_obj;
1878 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001879
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001880 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001881 for (i = 0; i < 256; i++)
1882 trans_table[i] = Py_CHARMASK(i);
1883 } else {
1884 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001885 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001886 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001887
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001888 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001889 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001890
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001891 for (i = inlen; --i >= 0; ) {
1892 c = Py_CHARMASK(*input++);
1893 if (trans_table[c] != -1)
1894 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1895 continue;
1896 changed = 1;
1897 }
1898 if (!changed && PyBytes_CheckExact(input_obj)) {
1899 Py_DECREF(result);
1900 Py_INCREF(input_obj);
1901 return input_obj;
1902 }
1903 /* Fix the size of the resulting string */
1904 if (inlen > 0)
1905 _PyBytes_Resize(&result, output - output_start);
1906 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001907}
1908
1909
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001910/*[clinic input]
1911
1912@staticmethod
1913bytes.maketrans
1914
1915 frm: object
1916 to: object
1917 /
1918
1919Return a translation table useable for the bytes or bytearray translate method.
1920
1921The returned table will be one where each byte in frm is mapped to the byte at
1922the same position in to.
1923
1924The bytes objects frm and to must be of the same length.
1925[clinic start generated code]*/
1926
1927PyDoc_STRVAR(bytes_maketrans__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001928"maketrans(frm, to, /)\n"
1929"--\n"
1930"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001931"Return a translation table useable for the bytes or bytearray translate method.\n"
1932"\n"
1933"The returned table will be one where each byte in frm is mapped to the byte at\n"
1934"the same position in to.\n"
1935"\n"
1936"The bytes objects frm and to must be of the same length.");
1937
1938#define BYTES_MAKETRANS_METHODDEF \
1939 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC, bytes_maketrans__doc__},
1940
Georg Brandlabc38772009-04-12 15:51:51 +00001941static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001942bytes_maketrans_impl(PyObject *frm, PyObject *to);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001943
1944static PyObject *
1945bytes_maketrans(void *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001946{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001947 PyObject *return_value = NULL;
1948 PyObject *frm;
1949 PyObject *to;
1950
1951 if (!PyArg_UnpackTuple(args, "maketrans",
1952 2, 2,
1953 &frm, &to))
1954 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02001955 return_value = bytes_maketrans_impl(frm, to);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001956
1957exit:
1958 return return_value;
1959}
1960
1961static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001962bytes_maketrans_impl(PyObject *frm, PyObject *to)
1963/*[clinic end generated code: output=89a3c3556975e466 input=d204f680f85da382]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001964{
1965 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00001966}
1967
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001968/* find and count characters and substrings */
1969
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001970#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001971 ((char *)memchr((const void *)(target), c, target_len))
1972
1973/* String ops must return a string. */
1974/* If the object is subclass of string, create a copy */
1975Py_LOCAL(PyBytesObject *)
1976return_self(PyBytesObject *self)
1977{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001978 if (PyBytes_CheckExact(self)) {
1979 Py_INCREF(self);
1980 return self;
1981 }
1982 return (PyBytesObject *)PyBytes_FromStringAndSize(
1983 PyBytes_AS_STRING(self),
1984 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001985}
1986
1987Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001988countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001989{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001990 Py_ssize_t count=0;
1991 const char *start=target;
1992 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001993
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001994 while ( (start=findchar(start, end-start, c)) != NULL ) {
1995 count++;
1996 if (count >= maxcount)
1997 break;
1998 start += 1;
1999 }
2000 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002001}
2002
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002003
2004/* Algorithms for different cases of string replacement */
2005
2006/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2007Py_LOCAL(PyBytesObject *)
2008replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002009 const char *to_s, Py_ssize_t to_len,
2010 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002011{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002012 char *self_s, *result_s;
2013 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002014 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002015 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002016
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002017 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002018
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002019 /* 1 at the end plus 1 after every character;
2020 count = min(maxcount, self_len + 1) */
2021 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002022 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002023 else
2024 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2025 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002026
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002027 /* Check for overflow */
2028 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002029 assert(count > 0);
2030 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002031 PyErr_SetString(PyExc_OverflowError,
2032 "replacement bytes are too long");
2033 return NULL;
2034 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002035 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002036
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002037 if (! (result = (PyBytesObject *)
2038 PyBytes_FromStringAndSize(NULL, result_len)) )
2039 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002040
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002041 self_s = PyBytes_AS_STRING(self);
2042 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002043
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002044 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002045
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002046 /* Lay the first one down (guaranteed this will occur) */
2047 Py_MEMCPY(result_s, to_s, to_len);
2048 result_s += to_len;
2049 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002050
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002051 for (i=0; i<count; i++) {
2052 *result_s++ = *self_s++;
2053 Py_MEMCPY(result_s, to_s, to_len);
2054 result_s += to_len;
2055 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002056
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002057 /* Copy the rest of the original string */
2058 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002059
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002060 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002061}
2062
2063/* Special case for deleting a single character */
2064/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2065Py_LOCAL(PyBytesObject *)
2066replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002067 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002068{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002069 char *self_s, *result_s;
2070 char *start, *next, *end;
2071 Py_ssize_t self_len, result_len;
2072 Py_ssize_t count;
2073 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002074
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002075 self_len = PyBytes_GET_SIZE(self);
2076 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002077
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002078 count = countchar(self_s, self_len, from_c, maxcount);
2079 if (count == 0) {
2080 return return_self(self);
2081 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002082
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002083 result_len = self_len - count; /* from_len == 1 */
2084 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002085
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002086 if ( (result = (PyBytesObject *)
2087 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2088 return NULL;
2089 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002091 start = self_s;
2092 end = self_s + self_len;
2093 while (count-- > 0) {
2094 next = findchar(start, end-start, from_c);
2095 if (next == NULL)
2096 break;
2097 Py_MEMCPY(result_s, start, next-start);
2098 result_s += (next-start);
2099 start = next+1;
2100 }
2101 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002102
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002103 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002104}
2105
2106/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2107
2108Py_LOCAL(PyBytesObject *)
2109replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002110 const char *from_s, Py_ssize_t from_len,
2111 Py_ssize_t maxcount) {
2112 char *self_s, *result_s;
2113 char *start, *next, *end;
2114 Py_ssize_t self_len, result_len;
2115 Py_ssize_t count, offset;
2116 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002117
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002118 self_len = PyBytes_GET_SIZE(self);
2119 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002120
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002121 count = stringlib_count(self_s, self_len,
2122 from_s, from_len,
2123 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002124
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002125 if (count == 0) {
2126 /* no matches */
2127 return return_self(self);
2128 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002129
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002130 result_len = self_len - (count * from_len);
2131 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002132
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002133 if ( (result = (PyBytesObject *)
2134 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2135 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002136
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002137 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002138
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002139 start = self_s;
2140 end = self_s + self_len;
2141 while (count-- > 0) {
2142 offset = stringlib_find(start, end-start,
2143 from_s, from_len,
2144 0);
2145 if (offset == -1)
2146 break;
2147 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002149 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002150
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002151 result_s += (next-start);
2152 start = next+from_len;
2153 }
2154 Py_MEMCPY(result_s, start, end-start);
2155 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002156}
2157
2158/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2159Py_LOCAL(PyBytesObject *)
2160replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002161 char from_c, char to_c,
2162 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002163{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002164 char *self_s, *result_s, *start, *end, *next;
2165 Py_ssize_t self_len;
2166 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002167
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002168 /* The result string will be the same size */
2169 self_s = PyBytes_AS_STRING(self);
2170 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002171
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002172 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002173
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002174 if (next == NULL) {
2175 /* No matches; return the original string */
2176 return return_self(self);
2177 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002178
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002179 /* Need to make a new string */
2180 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2181 if (result == NULL)
2182 return NULL;
2183 result_s = PyBytes_AS_STRING(result);
2184 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002185
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002186 /* change everything in-place, starting with this one */
2187 start = result_s + (next-self_s);
2188 *start = to_c;
2189 start++;
2190 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002191
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002192 while (--maxcount > 0) {
2193 next = findchar(start, end-start, from_c);
2194 if (next == NULL)
2195 break;
2196 *next = to_c;
2197 start = next+1;
2198 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002199
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002200 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002201}
2202
2203/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2204Py_LOCAL(PyBytesObject *)
2205replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002206 const char *from_s, Py_ssize_t from_len,
2207 const char *to_s, Py_ssize_t to_len,
2208 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002209{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002210 char *result_s, *start, *end;
2211 char *self_s;
2212 Py_ssize_t self_len, offset;
2213 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002214
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002215 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002216
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002217 self_s = PyBytes_AS_STRING(self);
2218 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002219
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002220 offset = stringlib_find(self_s, self_len,
2221 from_s, from_len,
2222 0);
2223 if (offset == -1) {
2224 /* No matches; return the original string */
2225 return return_self(self);
2226 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002227
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002228 /* Need to make a new string */
2229 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2230 if (result == NULL)
2231 return NULL;
2232 result_s = PyBytes_AS_STRING(result);
2233 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002235 /* change everything in-place, starting with this one */
2236 start = result_s + offset;
2237 Py_MEMCPY(start, to_s, from_len);
2238 start += from_len;
2239 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002240
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002241 while ( --maxcount > 0) {
2242 offset = stringlib_find(start, end-start,
2243 from_s, from_len,
2244 0);
2245 if (offset==-1)
2246 break;
2247 Py_MEMCPY(start+offset, to_s, from_len);
2248 start += offset+from_len;
2249 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002250
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002251 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002252}
2253
2254/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2255Py_LOCAL(PyBytesObject *)
2256replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002257 char from_c,
2258 const char *to_s, Py_ssize_t to_len,
2259 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002260{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002261 char *self_s, *result_s;
2262 char *start, *next, *end;
2263 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002264 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002265 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002266
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002267 self_s = PyBytes_AS_STRING(self);
2268 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002269
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002270 count = countchar(self_s, self_len, from_c, maxcount);
2271 if (count == 0) {
2272 /* no matches, return unchanged */
2273 return return_self(self);
2274 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002275
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002276 /* use the difference between current and new, hence the "-1" */
2277 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002278 assert(count > 0);
2279 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002280 PyErr_SetString(PyExc_OverflowError,
2281 "replacement bytes are too long");
2282 return NULL;
2283 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002284 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002285
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002286 if ( (result = (PyBytesObject *)
2287 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2288 return NULL;
2289 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002290
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002291 start = self_s;
2292 end = self_s + self_len;
2293 while (count-- > 0) {
2294 next = findchar(start, end-start, from_c);
2295 if (next == NULL)
2296 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002297
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002298 if (next == start) {
2299 /* replace with the 'to' */
2300 Py_MEMCPY(result_s, to_s, to_len);
2301 result_s += to_len;
2302 start += 1;
2303 } else {
2304 /* copy the unchanged old then the 'to' */
2305 Py_MEMCPY(result_s, start, next-start);
2306 result_s += (next-start);
2307 Py_MEMCPY(result_s, to_s, to_len);
2308 result_s += to_len;
2309 start = next+1;
2310 }
2311 }
2312 /* Copy the remainder of the remaining string */
2313 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002314
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002315 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002316}
2317
2318/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2319Py_LOCAL(PyBytesObject *)
2320replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002321 const char *from_s, Py_ssize_t from_len,
2322 const char *to_s, Py_ssize_t to_len,
2323 Py_ssize_t maxcount) {
2324 char *self_s, *result_s;
2325 char *start, *next, *end;
2326 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002327 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002328 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002329
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002330 self_s = PyBytes_AS_STRING(self);
2331 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002332
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002333 count = stringlib_count(self_s, self_len,
2334 from_s, from_len,
2335 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002336
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002337 if (count == 0) {
2338 /* no matches, return unchanged */
2339 return return_self(self);
2340 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002341
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002342 /* Check for overflow */
2343 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002344 assert(count > 0);
2345 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002346 PyErr_SetString(PyExc_OverflowError,
2347 "replacement bytes are too long");
2348 return NULL;
2349 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002350 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002351
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002352 if ( (result = (PyBytesObject *)
2353 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2354 return NULL;
2355 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002356
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002357 start = self_s;
2358 end = self_s + self_len;
2359 while (count-- > 0) {
2360 offset = stringlib_find(start, end-start,
2361 from_s, from_len,
2362 0);
2363 if (offset == -1)
2364 break;
2365 next = start+offset;
2366 if (next == start) {
2367 /* replace with the 'to' */
2368 Py_MEMCPY(result_s, to_s, to_len);
2369 result_s += to_len;
2370 start += from_len;
2371 } else {
2372 /* copy the unchanged old then the 'to' */
2373 Py_MEMCPY(result_s, start, next-start);
2374 result_s += (next-start);
2375 Py_MEMCPY(result_s, to_s, to_len);
2376 result_s += to_len;
2377 start = next+from_len;
2378 }
2379 }
2380 /* Copy the remainder of the remaining string */
2381 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002382
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002383 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002384}
2385
2386
2387Py_LOCAL(PyBytesObject *)
2388replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002389 const char *from_s, Py_ssize_t from_len,
2390 const char *to_s, Py_ssize_t to_len,
2391 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002392{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002393 if (maxcount < 0) {
2394 maxcount = PY_SSIZE_T_MAX;
2395 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2396 /* nothing to do; return the original string */
2397 return return_self(self);
2398 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002399
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002400 if (maxcount == 0 ||
2401 (from_len == 0 && to_len == 0)) {
2402 /* nothing to do; return the original string */
2403 return return_self(self);
2404 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002405
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002406 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002407
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002408 if (from_len == 0) {
2409 /* insert the 'to' string everywhere. */
2410 /* >>> "Python".replace("", ".") */
2411 /* '.P.y.t.h.o.n.' */
2412 return replace_interleave(self, to_s, to_len, maxcount);
2413 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002414
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002415 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2416 /* point for an empty self string to generate a non-empty string */
2417 /* Special case so the remaining code always gets a non-empty string */
2418 if (PyBytes_GET_SIZE(self) == 0) {
2419 return return_self(self);
2420 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002422 if (to_len == 0) {
2423 /* delete all occurrences of 'from' string */
2424 if (from_len == 1) {
2425 return replace_delete_single_character(
2426 self, from_s[0], maxcount);
2427 } else {
2428 return replace_delete_substring(self, from_s,
2429 from_len, maxcount);
2430 }
2431 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002432
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002433 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002434
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002435 if (from_len == to_len) {
2436 if (from_len == 1) {
2437 return replace_single_character_in_place(
2438 self,
2439 from_s[0],
2440 to_s[0],
2441 maxcount);
2442 } else {
2443 return replace_substring_in_place(
2444 self, from_s, from_len, to_s, to_len,
2445 maxcount);
2446 }
2447 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002448
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002449 /* Otherwise use the more generic algorithms */
2450 if (from_len == 1) {
2451 return replace_single_character(self, from_s[0],
2452 to_s, to_len, maxcount);
2453 } else {
2454 /* len('from')>=2, len('to')>=1 */
2455 return replace_substring(self, from_s, from_len, to_s, to_len,
2456 maxcount);
2457 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002458}
2459
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002460
2461/*[clinic input]
2462bytes.replace
2463
2464 old: object
2465 new: object
2466 count: Py_ssize_t = -1
2467 Maximum number of occurrences to replace.
2468 -1 (the default value) means replace all occurrences.
2469 /
2470
2471Return a copy with all occurrences of substring old replaced by new.
2472
2473If the optional argument count is given, only the first count occurrences are
2474replaced.
2475[clinic start generated code]*/
2476
2477PyDoc_STRVAR(bytes_replace__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002478"replace($self, old, new, count=-1, /)\n"
2479"--\n"
2480"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002481"Return a copy with all occurrences of substring old replaced by new.\n"
2482"\n"
2483" count\n"
2484" Maximum number of occurrences to replace.\n"
2485" -1 (the default value) means replace all occurrences.\n"
2486"\n"
2487"If the optional argument count is given, only the first count occurrences are\n"
2488"replaced.");
2489
2490#define BYTES_REPLACE_METHODDEF \
2491 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, bytes_replace__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002492
2493static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002494bytes_replace_impl(PyBytesObject*self, PyObject *old, PyObject *new, Py_ssize_t count);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002495
2496static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002497bytes_replace(PyBytesObject*self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002498{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002499 PyObject *return_value = NULL;
2500 PyObject *old;
2501 PyObject *new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002502 Py_ssize_t count = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002503
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002504 if (!PyArg_ParseTuple(args,
2505 "OO|n:replace",
2506 &old, &new, &count))
2507 goto exit;
2508 return_value = bytes_replace_impl(self, old, new, count);
2509
2510exit:
2511 return return_value;
2512}
2513
2514static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002515bytes_replace_impl(PyBytesObject*self, PyObject *old, PyObject *new, Py_ssize_t count)
2516/*[clinic end generated code: output=14ce72f4f9cb91cf input=d3ac254ea50f4ac1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002517{
2518 const char *old_s, *new_s;
2519 Py_ssize_t old_len, new_len;
2520
2521 if (PyBytes_Check(old)) {
2522 old_s = PyBytes_AS_STRING(old);
2523 old_len = PyBytes_GET_SIZE(old);
2524 }
2525 else if (PyObject_AsCharBuffer(old, &old_s, &old_len))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002526 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002527
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002528 if (PyBytes_Check(new)) {
2529 new_s = PyBytes_AS_STRING(new);
2530 new_len = PyBytes_GET_SIZE(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002531 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002532 else if (PyObject_AsCharBuffer(new, &new_s, &new_len))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002533 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002534
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002535 return (PyObject *)replace((PyBytesObject *) self,
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002536 old_s, old_len,
2537 new_s, new_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002538}
2539
2540/** End DALKE **/
2541
2542/* Matches the end (direction >= 0) or start (direction < 0) of self
2543 * against substr, using the start and end arguments. Returns
2544 * -1 on error, 0 if not found and 1 if found.
2545 */
2546Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002547_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002548 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002549{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002550 Py_ssize_t len = PyBytes_GET_SIZE(self);
2551 Py_ssize_t slen;
2552 const char* sub;
2553 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002554
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002555 if (PyBytes_Check(substr)) {
2556 sub = PyBytes_AS_STRING(substr);
2557 slen = PyBytes_GET_SIZE(substr);
2558 }
2559 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2560 return -1;
2561 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002562
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002563 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002564
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002565 if (direction < 0) {
2566 /* startswith */
2567 if (start+slen > len)
2568 return 0;
2569 } else {
2570 /* endswith */
2571 if (end-start < slen || start > len)
2572 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002573
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002574 if (end-slen > start)
2575 start = end - slen;
2576 }
2577 if (end-start >= slen)
2578 return ! memcmp(str+start, sub, slen);
2579 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002580}
2581
2582
2583PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002584"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002585\n\
2586Return True if B starts with the specified prefix, False otherwise.\n\
2587With optional start, test B beginning at that position.\n\
2588With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002589prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002590
2591static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002592bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002593{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002594 Py_ssize_t start = 0;
2595 Py_ssize_t end = PY_SSIZE_T_MAX;
2596 PyObject *subobj;
2597 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002598
Jesus Ceaac451502011-04-20 17:09:23 +02002599 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002600 return NULL;
2601 if (PyTuple_Check(subobj)) {
2602 Py_ssize_t i;
2603 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2604 result = _bytes_tailmatch(self,
2605 PyTuple_GET_ITEM(subobj, i),
2606 start, end, -1);
2607 if (result == -1)
2608 return NULL;
2609 else if (result) {
2610 Py_RETURN_TRUE;
2611 }
2612 }
2613 Py_RETURN_FALSE;
2614 }
2615 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002616 if (result == -1) {
2617 if (PyErr_ExceptionMatches(PyExc_TypeError))
2618 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2619 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002620 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002621 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002622 else
2623 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002624}
2625
2626
2627PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002628"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002629\n\
2630Return True if B ends with the specified suffix, False otherwise.\n\
2631With optional start, test B beginning at that position.\n\
2632With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002633suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002634
2635static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002636bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002637{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002638 Py_ssize_t start = 0;
2639 Py_ssize_t end = PY_SSIZE_T_MAX;
2640 PyObject *subobj;
2641 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002642
Jesus Ceaac451502011-04-20 17:09:23 +02002643 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002644 return NULL;
2645 if (PyTuple_Check(subobj)) {
2646 Py_ssize_t i;
2647 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2648 result = _bytes_tailmatch(self,
2649 PyTuple_GET_ITEM(subobj, i),
2650 start, end, +1);
2651 if (result == -1)
2652 return NULL;
2653 else if (result) {
2654 Py_RETURN_TRUE;
2655 }
2656 }
2657 Py_RETURN_FALSE;
2658 }
2659 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002660 if (result == -1) {
2661 if (PyErr_ExceptionMatches(PyExc_TypeError))
2662 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2663 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002664 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002665 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002666 else
2667 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002668}
2669
2670
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002671/*[clinic input]
2672bytes.decode
2673
2674 encoding: str(c_default="NULL") = 'utf-8'
2675 The encoding with which to decode the bytes.
2676 errors: str(c_default="NULL") = 'strict'
2677 The error handling scheme to use for the handling of decoding errors.
2678 The default is 'strict' meaning that decoding errors raise a
2679 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2680 as well as any other name registered with codecs.register_error that
2681 can handle UnicodeDecodeErrors.
2682
2683Decode the bytes using the codec registered for encoding.
2684[clinic start generated code]*/
2685
2686PyDoc_STRVAR(bytes_decode__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002687"decode($self, /, encoding=\'utf-8\', errors=\'strict\')\n"
2688"--\n"
2689"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002690"Decode the bytes using the codec registered for encoding.\n"
2691"\n"
2692" encoding\n"
2693" The encoding with which to decode the bytes.\n"
2694" errors\n"
2695" The error handling scheme to use for the handling of decoding errors.\n"
2696" The default is \'strict\' meaning that decoding errors raise a\n"
2697" UnicodeDecodeError. Other possible values are \'ignore\' and \'replace\'\n"
2698" as well as any other name registered with codecs.register_error that\n"
2699" can handle UnicodeDecodeErrors.");
2700
2701#define BYTES_DECODE_METHODDEF \
2702 {"decode", (PyCFunction)bytes_decode, METH_VARARGS|METH_KEYWORDS, bytes_decode__doc__},
2703
2704static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002705bytes_decode_impl(PyBytesObject*self, const char *encoding, const char *errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002706
2707static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002708bytes_decode(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002709{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002710 PyObject *return_value = NULL;
2711 static char *_keywords[] = {"encoding", "errors", NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002712 const char *encoding = NULL;
2713 const char *errors = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +00002714
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002715 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
2716 "|ss:decode", _keywords,
2717 &encoding, &errors))
2718 goto exit;
2719 return_value = bytes_decode_impl(self, encoding, errors);
2720
2721exit:
2722 return return_value;
2723}
2724
2725static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002726bytes_decode_impl(PyBytesObject*self, const char *encoding, const char *errors)
2727/*[clinic end generated code: output=61a80290bbfce696 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002728{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002729 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002730}
2731
Guido van Rossum20188312006-05-05 15:15:40 +00002732
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002733/*[clinic input]
2734bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002735
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002736 keepends: int(py_default="False") = 0
2737
2738Return a list of the lines in the bytes, breaking at line boundaries.
2739
2740Line breaks are not included in the resulting list unless keepends is given and
2741true.
2742[clinic start generated code]*/
2743
2744PyDoc_STRVAR(bytes_splitlines__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002745"splitlines($self, /, keepends=False)\n"
2746"--\n"
2747"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002748"Return a list of the lines in the bytes, breaking at line boundaries.\n"
2749"\n"
2750"Line breaks are not included in the resulting list unless keepends is given and\n"
2751"true.");
2752
2753#define BYTES_SPLITLINES_METHODDEF \
2754 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS|METH_KEYWORDS, bytes_splitlines__doc__},
2755
2756static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002757bytes_splitlines_impl(PyBytesObject*self, int keepends);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002758
2759static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002760bytes_splitlines(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002761{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002762 PyObject *return_value = NULL;
2763 static char *_keywords[] = {"keepends", NULL};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002764 int keepends = 0;
2765
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002766 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
2767 "|i:splitlines", _keywords,
2768 &keepends))
2769 goto exit;
2770 return_value = bytes_splitlines_impl(self, keepends);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002771
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002772exit:
2773 return return_value;
2774}
2775
2776static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002777bytes_splitlines_impl(PyBytesObject*self, int keepends)
2778/*[clinic end generated code: output=79da057d05d126de input=ddb93e3351080c8c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002779{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002780 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002781 (PyObject*) self, PyBytes_AS_STRING(self),
2782 PyBytes_GET_SIZE(self), keepends
2783 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002784}
2785
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002786static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002787hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002788{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002789 if (c >= 128)
2790 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002791 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002792 return c - '0';
2793 else {
David Malcolm96960882010-11-05 17:23:41 +00002794 if (Py_ISUPPER(c))
2795 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002796 if (c >= 'a' && c <= 'f')
2797 return c - 'a' + 10;
2798 }
2799 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002800}
2801
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002802/*[clinic input]
2803@classmethod
2804bytes.fromhex
2805
2806 string: unicode
2807 /
2808
2809Create a bytes object from a string of hexadecimal numbers.
2810
2811Spaces between two numbers are accepted.
2812Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2813[clinic start generated code]*/
2814
2815PyDoc_STRVAR(bytes_fromhex__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002816"fromhex($type, string, /)\n"
2817"--\n"
2818"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002819"Create a bytes object from a string of hexadecimal numbers.\n"
2820"\n"
2821"Spaces between two numbers are accepted.\n"
Martin v. Löwis0efea322014-07-27 17:29:17 +02002822"Example: bytes.fromhex(\'B9 01EF\') -> b\'\\\\xb9\\\\x01\\\\xef\'.");
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002823
2824#define BYTES_FROMHEX_METHODDEF \
2825 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS, bytes_fromhex__doc__},
2826
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002827static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002828bytes_fromhex_impl(PyTypeObject *type, PyObject *string);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002829
2830static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002831bytes_fromhex(PyTypeObject *type, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002832{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002833 PyObject *return_value = NULL;
2834 PyObject *string;
2835
2836 if (!PyArg_ParseTuple(args,
2837 "U:fromhex",
2838 &string))
2839 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02002840 return_value = bytes_fromhex_impl(type, string);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002841
2842exit:
2843 return return_value;
2844}
2845
2846static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002847bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2848/*[clinic end generated code: output=09e6cbef56cbbb65 input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002849{
2850 PyObject *newstring;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002851 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002852 Py_ssize_t hexlen, byteslen, i, j;
2853 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002854 void *data;
2855 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002856
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002857 assert(PyUnicode_Check(string));
2858 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002859 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002860 kind = PyUnicode_KIND(string);
2861 data = PyUnicode_DATA(string);
2862 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002863
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002864 byteslen = hexlen/2; /* This overestimates if there are spaces */
2865 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2866 if (!newstring)
2867 return NULL;
2868 buf = PyBytes_AS_STRING(newstring);
2869 for (i = j = 0; i < hexlen; i += 2) {
2870 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002871 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002872 i++;
2873 if (i >= hexlen)
2874 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002875 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
2876 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002877 if (top == -1 || bot == -1) {
2878 PyErr_Format(PyExc_ValueError,
2879 "non-hexadecimal number found in "
2880 "fromhex() arg at position %zd", i);
2881 goto error;
2882 }
2883 buf[j++] = (top << 4) + bot;
2884 }
2885 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2886 goto error;
2887 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002888
2889 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002890 Py_XDECREF(newstring);
2891 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002892}
2893
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002894/*[clinic input]
2895bytes.__sizeof__ as bytes_sizeof
2896
2897 self: self(type="PyBytesObject *")
2898
2899Returns the size of the bytes object in memory, in bytes.
2900[clinic start generated code]*/
2901
2902PyDoc_STRVAR(bytes_sizeof__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002903"__sizeof__($self, /)\n"
2904"--\n"
2905"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002906"Returns the size of the bytes object in memory, in bytes.");
2907
2908#define BYTES_SIZEOF_METHODDEF \
2909 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS, bytes_sizeof__doc__},
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002910
2911static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002912bytes_sizeof_impl(PyBytesObject *self);
2913
2914static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002915bytes_sizeof(PyBytesObject *self, PyObject *Py_UNUSED(ignored))
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002916{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002917 return bytes_sizeof_impl(self);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002918}
2919
2920static PyObject *
2921bytes_sizeof_impl(PyBytesObject *self)
Martin v. Löwis0efea322014-07-27 17:29:17 +02002922/*[clinic end generated code: output=44933279343f24ae input=bee4c64bb42078ed]*/
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002923{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002924 Py_ssize_t res;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002925 res = PyBytesObject_SIZE + Py_SIZE(self) * Py_TYPE(self)->tp_itemsize;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002926 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002927}
2928
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002929
2930static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002931bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002932{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002933 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002934}
2935
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002936
2937static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002938bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002939 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2940 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2941 _Py_capitalize__doc__},
2942 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2943 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002944 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002945 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2946 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02002947 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002948 expandtabs__doc__},
2949 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002950 BYTES_FROMHEX_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002951 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2952 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2953 _Py_isalnum__doc__},
2954 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2955 _Py_isalpha__doc__},
2956 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2957 _Py_isdigit__doc__},
2958 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2959 _Py_islower__doc__},
2960 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2961 _Py_isspace__doc__},
2962 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2963 _Py_istitle__doc__},
2964 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2965 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002966 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002967 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2968 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002969 BYTES_LSTRIP_METHODDEF
2970 BYTES_MAKETRANS_METHODDEF
2971 BYTES_PARTITION_METHODDEF
2972 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002973 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2974 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2975 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002976 BYTES_RPARTITION_METHODDEF
2977 BYTES_RSPLIT_METHODDEF
2978 BYTES_RSTRIP_METHODDEF
2979 BYTES_SPLIT_METHODDEF
2980 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002981 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2982 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002983 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002984 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2985 _Py_swapcase__doc__},
2986 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002987 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002988 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2989 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002990 BYTES_SIZEOF_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002991 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002992};
2993
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002994static PyObject *
2995str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2996
2997static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002998bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002999{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003000 PyObject *x = NULL;
3001 const char *encoding = NULL;
3002 const char *errors = NULL;
3003 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003004 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003005 Py_ssize_t size;
3006 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003007 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003008
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003009 if (type != &PyBytes_Type)
3010 return str_subtype_new(type, args, kwds);
3011 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3012 &encoding, &errors))
3013 return NULL;
3014 if (x == NULL) {
3015 if (encoding != NULL || errors != NULL) {
3016 PyErr_SetString(PyExc_TypeError,
3017 "encoding or errors without sequence "
3018 "argument");
3019 return NULL;
3020 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003021 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003022 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003023
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003024 if (PyUnicode_Check(x)) {
3025 /* Encode via the codec registry */
3026 if (encoding == NULL) {
3027 PyErr_SetString(PyExc_TypeError,
3028 "string argument without an encoding");
3029 return NULL;
3030 }
3031 new = PyUnicode_AsEncodedString(x, encoding, errors);
3032 if (new == NULL)
3033 return NULL;
3034 assert(PyBytes_Check(new));
3035 return new;
3036 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003037
3038 /* We'd like to call PyObject_Bytes here, but we need to check for an
3039 integer argument before deferring to PyBytes_FromObject, something
3040 PyObject_Bytes doesn't do. */
3041 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3042 if (func != NULL) {
3043 new = PyObject_CallFunctionObjArgs(func, NULL);
3044 Py_DECREF(func);
3045 if (new == NULL)
3046 return NULL;
3047 if (!PyBytes_Check(new)) {
3048 PyErr_Format(PyExc_TypeError,
3049 "__bytes__ returned non-bytes (type %.200s)",
3050 Py_TYPE(new)->tp_name);
3051 Py_DECREF(new);
3052 return NULL;
3053 }
3054 return new;
3055 }
3056 else if (PyErr_Occurred())
3057 return NULL;
3058
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003059 /* Is it an integer? */
3060 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3061 if (size == -1 && PyErr_Occurred()) {
3062 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3063 return NULL;
3064 PyErr_Clear();
3065 }
3066 else if (size < 0) {
3067 PyErr_SetString(PyExc_ValueError, "negative count");
3068 return NULL;
3069 }
3070 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003071 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003072 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003073 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003074 return new;
3075 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003077 /* If it's not unicode, there can't be encoding or errors */
3078 if (encoding != NULL || errors != NULL) {
3079 PyErr_SetString(PyExc_TypeError,
3080 "encoding or errors without a string argument");
3081 return NULL;
3082 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003083
3084 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003085}
3086
3087PyObject *
3088PyBytes_FromObject(PyObject *x)
3089{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003090 PyObject *new, *it;
3091 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003092
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003093 if (x == NULL) {
3094 PyErr_BadInternalCall();
3095 return NULL;
3096 }
Larry Hastingsca28e992012-05-24 22:58:30 -07003097
3098 if (PyBytes_CheckExact(x)) {
3099 Py_INCREF(x);
3100 return x;
3101 }
3102
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003103 /* Use the modern buffer interface */
3104 if (PyObject_CheckBuffer(x)) {
3105 Py_buffer view;
3106 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3107 return NULL;
3108 new = PyBytes_FromStringAndSize(NULL, view.len);
3109 if (!new)
3110 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003111 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3112 &view, view.len, 'C') < 0)
3113 goto fail;
3114 PyBuffer_Release(&view);
3115 return new;
3116 fail:
3117 Py_XDECREF(new);
3118 PyBuffer_Release(&view);
3119 return NULL;
3120 }
3121 if (PyUnicode_Check(x)) {
3122 PyErr_SetString(PyExc_TypeError,
3123 "cannot convert unicode object to bytes");
3124 return NULL;
3125 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003127 if (PyList_CheckExact(x)) {
3128 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3129 if (new == NULL)
3130 return NULL;
3131 for (i = 0; i < Py_SIZE(x); i++) {
3132 Py_ssize_t value = PyNumber_AsSsize_t(
3133 PyList_GET_ITEM(x, i), PyExc_ValueError);
3134 if (value == -1 && PyErr_Occurred()) {
3135 Py_DECREF(new);
3136 return NULL;
3137 }
3138 if (value < 0 || value >= 256) {
3139 PyErr_SetString(PyExc_ValueError,
3140 "bytes must be in range(0, 256)");
3141 Py_DECREF(new);
3142 return NULL;
3143 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003144 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003145 }
3146 return new;
3147 }
3148 if (PyTuple_CheckExact(x)) {
3149 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3150 if (new == NULL)
3151 return NULL;
3152 for (i = 0; i < Py_SIZE(x); i++) {
3153 Py_ssize_t value = PyNumber_AsSsize_t(
3154 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
3155 if (value == -1 && PyErr_Occurred()) {
3156 Py_DECREF(new);
3157 return NULL;
3158 }
3159 if (value < 0 || value >= 256) {
3160 PyErr_SetString(PyExc_ValueError,
3161 "bytes must be in range(0, 256)");
3162 Py_DECREF(new);
3163 return NULL;
3164 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003165 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003166 }
3167 return new;
3168 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00003169
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003170 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003171 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003172 if (size == -1 && PyErr_Occurred())
3173 return NULL;
3174 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
3175 returning a shared empty bytes string. This required because we
3176 want to call _PyBytes_Resize() the returned object, which we can
3177 only do on bytes objects with refcount == 1. */
Victor Stinner88d146b2014-08-17 21:12:18 +02003178 if (size == 0)
3179 size = 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003180 new = PyBytes_FromStringAndSize(NULL, size);
3181 if (new == NULL)
3182 return NULL;
Victor Stinner88d146b2014-08-17 21:12:18 +02003183 assert(Py_REFCNT(new) == 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003184
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003185 /* Get the iterator */
3186 it = PyObject_GetIter(x);
3187 if (it == NULL)
3188 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003189
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003190 /* Run the iterator to exhaustion */
3191 for (i = 0; ; i++) {
3192 PyObject *item;
3193 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003194
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003195 /* Get the next item */
3196 item = PyIter_Next(it);
3197 if (item == NULL) {
3198 if (PyErr_Occurred())
3199 goto error;
3200 break;
3201 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003202
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003203 /* Interpret it as an int (__index__) */
3204 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3205 Py_DECREF(item);
3206 if (value == -1 && PyErr_Occurred())
3207 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003208
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003209 /* Range check */
3210 if (value < 0 || value >= 256) {
3211 PyErr_SetString(PyExc_ValueError,
3212 "bytes must be in range(0, 256)");
3213 goto error;
3214 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003215
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003216 /* Append the byte */
3217 if (i >= size) {
3218 size = 2 * size + 1;
3219 if (_PyBytes_Resize(&new, size) < 0)
3220 goto error;
3221 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003222 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003223 }
3224 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003225
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003226 /* Clean up and return success */
3227 Py_DECREF(it);
3228 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003229
3230 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003231 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01003232 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003233 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003234}
3235
3236static PyObject *
3237str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3238{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003239 PyObject *tmp, *pnew;
3240 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003241
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003242 assert(PyType_IsSubtype(type, &PyBytes_Type));
3243 tmp = bytes_new(&PyBytes_Type, args, kwds);
3244 if (tmp == NULL)
3245 return NULL;
3246 assert(PyBytes_CheckExact(tmp));
3247 n = PyBytes_GET_SIZE(tmp);
3248 pnew = type->tp_alloc(type, n);
3249 if (pnew != NULL) {
3250 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3251 PyBytes_AS_STRING(tmp), n+1);
3252 ((PyBytesObject *)pnew)->ob_shash =
3253 ((PyBytesObject *)tmp)->ob_shash;
3254 }
3255 Py_DECREF(tmp);
3256 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003257}
3258
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003259PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003260"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003261bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003262bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003263bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3264bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003265\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003266Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003267 - an iterable yielding integers in range(256)\n\
3268 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003269 - any object implementing the buffer API.\n\
3270 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003271
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003272static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003273
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003274PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003275 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3276 "bytes",
3277 PyBytesObject_SIZE,
3278 sizeof(char),
3279 bytes_dealloc, /* tp_dealloc */
3280 0, /* tp_print */
3281 0, /* tp_getattr */
3282 0, /* tp_setattr */
3283 0, /* tp_reserved */
3284 (reprfunc)bytes_repr, /* tp_repr */
3285 0, /* tp_as_number */
3286 &bytes_as_sequence, /* tp_as_sequence */
3287 &bytes_as_mapping, /* tp_as_mapping */
3288 (hashfunc)bytes_hash, /* tp_hash */
3289 0, /* tp_call */
3290 bytes_str, /* tp_str */
3291 PyObject_GenericGetAttr, /* tp_getattro */
3292 0, /* tp_setattro */
3293 &bytes_as_buffer, /* tp_as_buffer */
3294 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3295 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3296 bytes_doc, /* tp_doc */
3297 0, /* tp_traverse */
3298 0, /* tp_clear */
3299 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3300 0, /* tp_weaklistoffset */
3301 bytes_iter, /* tp_iter */
3302 0, /* tp_iternext */
3303 bytes_methods, /* tp_methods */
3304 0, /* tp_members */
3305 0, /* tp_getset */
3306 &PyBaseObject_Type, /* tp_base */
3307 0, /* tp_dict */
3308 0, /* tp_descr_get */
3309 0, /* tp_descr_set */
3310 0, /* tp_dictoffset */
3311 0, /* tp_init */
3312 0, /* tp_alloc */
3313 bytes_new, /* tp_new */
3314 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003315};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003316
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003317void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003318PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003319{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003320 assert(pv != NULL);
3321 if (*pv == NULL)
3322 return;
3323 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003324 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003325 return;
3326 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003327
3328 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3329 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003330 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003331 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02003332
Antoine Pitrou161d6952014-05-01 14:36:20 +02003333 wb.len = -1;
3334 if (_getbuffer(w, &wb) < 0) {
3335 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3336 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3337 Py_CLEAR(*pv);
3338 return;
3339 }
3340
3341 oldsize = PyBytes_GET_SIZE(*pv);
3342 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3343 PyErr_NoMemory();
3344 goto error;
3345 }
3346 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3347 goto error;
3348
3349 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3350 PyBuffer_Release(&wb);
3351 return;
3352
3353 error:
3354 PyBuffer_Release(&wb);
3355 Py_CLEAR(*pv);
3356 return;
3357 }
3358
3359 else {
3360 /* Multiple references, need to create new object */
3361 PyObject *v;
3362 v = bytes_concat(*pv, w);
3363 Py_DECREF(*pv);
3364 *pv = v;
3365 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003366}
3367
3368void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003369PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003370{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003371 PyBytes_Concat(pv, w);
3372 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003373}
3374
3375
3376/* The following function breaks the notion that strings are immutable:
3377 it changes the size of a string. We get away with this only if there
3378 is only one module referencing the object. You can also think of it
3379 as creating a new string object and destroying the old one, only
3380 more efficiently. In any case, don't use this if the string may
3381 already be known to some other part of the code...
3382 Note that if there's not enough memory to resize the string, the original
3383 string object at *pv is deallocated, *pv is set to NULL, an "out of
3384 memory" exception is set, and -1 is returned. Else (on success) 0 is
3385 returned, and the value in *pv may or may not be the same as on input.
3386 As always, an extra byte is allocated for a trailing \0 byte (newsize
3387 does *not* include that), and a trailing \0 byte is stored.
3388*/
3389
3390int
3391_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3392{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003393 PyObject *v;
3394 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003395 v = *pv;
3396 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3397 *pv = 0;
3398 Py_DECREF(v);
3399 PyErr_BadInternalCall();
3400 return -1;
3401 }
3402 /* XXX UNREF/NEWREF interface should be more symmetrical */
3403 _Py_DEC_REFTOTAL;
3404 _Py_ForgetReference(v);
3405 *pv = (PyObject *)
3406 PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
3407 if (*pv == NULL) {
3408 PyObject_Del(v);
3409 PyErr_NoMemory();
3410 return -1;
3411 }
3412 _Py_NewReference(*pv);
3413 sv = (PyBytesObject *) *pv;
3414 Py_SIZE(sv) = newsize;
3415 sv->ob_sval[newsize] = '\0';
3416 sv->ob_shash = -1; /* invalidate cached hash value */
3417 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003418}
3419
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003420void
3421PyBytes_Fini(void)
3422{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003423 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003424 for (i = 0; i < UCHAR_MAX + 1; i++)
3425 Py_CLEAR(characters[i]);
3426 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003427}
3428
Benjamin Peterson4116f362008-05-27 00:36:20 +00003429/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003430
3431typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003432 PyObject_HEAD
3433 Py_ssize_t it_index;
3434 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003435} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003436
3437static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003438striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003439{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003440 _PyObject_GC_UNTRACK(it);
3441 Py_XDECREF(it->it_seq);
3442 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003443}
3444
3445static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003446striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003447{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003448 Py_VISIT(it->it_seq);
3449 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003450}
3451
3452static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003453striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003454{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003455 PyBytesObject *seq;
3456 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003457
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003458 assert(it != NULL);
3459 seq = it->it_seq;
3460 if (seq == NULL)
3461 return NULL;
3462 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003463
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003464 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3465 item = PyLong_FromLong(
3466 (unsigned char)seq->ob_sval[it->it_index]);
3467 if (item != NULL)
3468 ++it->it_index;
3469 return item;
3470 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003471
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003472 Py_DECREF(seq);
3473 it->it_seq = NULL;
3474 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003475}
3476
3477static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003478striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003479{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003480 Py_ssize_t len = 0;
3481 if (it->it_seq)
3482 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3483 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003484}
3485
3486PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003487 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003488
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003489static PyObject *
3490striter_reduce(striterobject *it)
3491{
3492 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003493 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003494 it->it_seq, it->it_index);
3495 } else {
3496 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3497 if (u == NULL)
3498 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02003499 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003500 }
3501}
3502
3503PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3504
3505static PyObject *
3506striter_setstate(striterobject *it, PyObject *state)
3507{
3508 Py_ssize_t index = PyLong_AsSsize_t(state);
3509 if (index == -1 && PyErr_Occurred())
3510 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003511 if (it->it_seq != NULL) {
3512 if (index < 0)
3513 index = 0;
3514 else if (index > PyBytes_GET_SIZE(it->it_seq))
3515 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3516 it->it_index = index;
3517 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003518 Py_RETURN_NONE;
3519}
3520
3521PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3522
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003523static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003524 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3525 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003526 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3527 reduce_doc},
3528 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3529 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003530 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003531};
3532
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003533PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003534 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3535 "bytes_iterator", /* tp_name */
3536 sizeof(striterobject), /* tp_basicsize */
3537 0, /* tp_itemsize */
3538 /* methods */
3539 (destructor)striter_dealloc, /* tp_dealloc */
3540 0, /* tp_print */
3541 0, /* tp_getattr */
3542 0, /* tp_setattr */
3543 0, /* tp_reserved */
3544 0, /* tp_repr */
3545 0, /* tp_as_number */
3546 0, /* tp_as_sequence */
3547 0, /* tp_as_mapping */
3548 0, /* tp_hash */
3549 0, /* tp_call */
3550 0, /* tp_str */
3551 PyObject_GenericGetAttr, /* tp_getattro */
3552 0, /* tp_setattro */
3553 0, /* tp_as_buffer */
3554 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3555 0, /* tp_doc */
3556 (traverseproc)striter_traverse, /* tp_traverse */
3557 0, /* tp_clear */
3558 0, /* tp_richcompare */
3559 0, /* tp_weaklistoffset */
3560 PyObject_SelfIter, /* tp_iter */
3561 (iternextfunc)striter_next, /* tp_iternext */
3562 striter_methods, /* tp_methods */
3563 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003564};
3565
3566static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003567bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003568{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003569 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003570
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003571 if (!PyBytes_Check(seq)) {
3572 PyErr_BadInternalCall();
3573 return NULL;
3574 }
3575 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3576 if (it == NULL)
3577 return NULL;
3578 it->it_index = 0;
3579 Py_INCREF(seq);
3580 it->it_seq = (PyBytesObject *)seq;
3581 _PyObject_GC_TRACK(it);
3582 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003583}