blob: ff99f936aa72bb9c0e347ab0f903db3a08b69ebb [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020010/*[clinic input]
Martin v. Löwis0efea322014-07-27 17:29:17 +020011class bytes "PyBytesObject*" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020012[clinic start generated code]*/
Martin v. Löwis0efea322014-07-27 17:29:17 +020013/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020014
Neal Norwitz2bad9702007-08-27 06:19:22 +000015static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000016_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000017{
Antoine Pitroucfc22b42012-10-16 21:07:23 +020018 PyBufferProcs *bufferprocs;
19 if (PyBytes_CheckExact(obj)) {
20 /* Fast path, e.g. for .join() of many bytes objects */
21 Py_INCREF(obj);
22 view->obj = obj;
23 view->buf = PyBytes_AS_STRING(obj);
24 view->len = PyBytes_GET_SIZE(obj);
25 return view->len;
26 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000027
Antoine Pitroucfc22b42012-10-16 21:07:23 +020028 bufferprocs = Py_TYPE(obj)->tp_as_buffer;
29 if (bufferprocs == NULL || bufferprocs->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000030 {
Antoine Pitroud1188562010-06-09 16:38:55 +000031 PyErr_Format(PyExc_TypeError,
R David Murray861470c2014-10-05 11:47:01 -040032 "a bytes-like object is required, not '%.100s'",
Antoine Pitroud1188562010-06-09 16:38:55 +000033 Py_TYPE(obj)->tp_name);
34 return -1;
Guido van Rossuma74184e2007-08-29 04:05:57 +000035 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000036
Antoine Pitroucfc22b42012-10-16 21:07:23 +020037 if (bufferprocs->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000038 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000039 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000040}
41
Christian Heimes2c9c7a52008-05-26 13:42:13 +000042#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000043Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000044#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000045
Christian Heimes2c9c7a52008-05-26 13:42:13 +000046static PyBytesObject *characters[UCHAR_MAX + 1];
47static PyBytesObject *nullstring;
48
Mark Dickinsonfd24b322008-12-06 15:33:31 +000049/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
50 for a string of length n should request PyBytesObject_SIZE + n bytes.
51
52 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
53 3 bytes per string allocation on a typical system.
54*/
55#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
56
Christian Heimes2c9c7a52008-05-26 13:42:13 +000057/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000058 For PyBytes_FromString(), the parameter `str' points to a null-terminated
59 string containing exactly `size' bytes.
60
61 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
62 either NULL or else points to a string containing at least `size' bytes.
63 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
64 not have to be null-terminated. (Therefore it is safe to construct a
65 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
66 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
67 bytes (setting the last byte to the null terminating character) and you can
68 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000069 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000070 alter the data yourself, since the strings may be shared.
71
72 The PyObject member `op->ob_size', which denotes the number of "extra
73 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020074 allocated for string data, not counting the null terminating character.
75 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000076 PyBytes_FromStringAndSize()) or the length of the string in the `str'
77 parameter (for PyBytes_FromString()).
78*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020079static PyObject *
80_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000081{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020082 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020083 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000086#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000088#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 Py_INCREF(op);
90 return (PyObject *)op;
91 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000092
Victor Stinner049e5092014-08-17 22:20:00 +020093 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094 PyErr_SetString(PyExc_OverflowError,
95 "byte string is too large");
96 return NULL;
97 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000098
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000099 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +0200100 if (use_calloc)
101 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
102 else
103 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000104 if (op == NULL)
105 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100106 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000107 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +0200108 if (!use_calloc)
109 op->ob_sval[size] = '\0';
110 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 if (size == 0) {
112 nullstring = op;
113 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +0200114 }
115 return (PyObject *) op;
116}
117
118PyObject *
119PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
120{
121 PyBytesObject *op;
122 if (size < 0) {
123 PyErr_SetString(PyExc_SystemError,
124 "Negative size passed to PyBytes_FromStringAndSize");
125 return NULL;
126 }
127 if (size == 1 && str != NULL &&
128 (op = characters[*str & UCHAR_MAX]) != NULL)
129 {
130#ifdef COUNT_ALLOCS
131 one_strings++;
132#endif
133 Py_INCREF(op);
134 return (PyObject *)op;
135 }
136
137 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
138 if (op == NULL)
139 return NULL;
140 if (str == NULL)
141 return (PyObject *) op;
142
143 Py_MEMCPY(op->ob_sval, str, size);
144 /* share short strings */
145 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000146 characters[*str & UCHAR_MAX] = op;
147 Py_INCREF(op);
148 }
149 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000150}
151
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000152PyObject *
153PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000154{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200155 size_t size;
156 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000157
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000158 assert(str != NULL);
159 size = strlen(str);
160 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
161 PyErr_SetString(PyExc_OverflowError,
162 "byte string is too long");
163 return NULL;
164 }
165 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000166#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000168#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000169 Py_INCREF(op);
170 return (PyObject *)op;
171 }
172 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000173#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000174 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000175#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000176 Py_INCREF(op);
177 return (PyObject *)op;
178 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 /* Inline PyObject_NewVar */
181 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
182 if (op == NULL)
183 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100184 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000185 op->ob_shash = -1;
186 Py_MEMCPY(op->ob_sval, str, size+1);
187 /* share short strings */
188 if (size == 0) {
189 nullstring = op;
190 Py_INCREF(op);
191 } else if (size == 1) {
192 characters[*str & UCHAR_MAX] = op;
193 Py_INCREF(op);
194 }
195 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000196}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000197
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000198PyObject *
199PyBytes_FromFormatV(const char *format, va_list vargs)
200{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 va_list count;
202 Py_ssize_t n = 0;
203 const char* f;
204 char *s;
205 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000206
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000207 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000208 /* step 1: figure out how large a buffer we need */
209 for (f = format; *f; f++) {
210 if (*f == '%') {
211 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000212 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000213 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000214
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000215 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
216 * they don't affect the amount of space we reserve.
217 */
218 if ((*f == 'l' || *f == 'z') &&
219 (f[1] == 'd' || f[1] == 'u'))
220 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000221
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000222 switch (*f) {
223 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100224 {
225 int c = va_arg(count, int);
226 if (c < 0 || c > 255) {
227 PyErr_SetString(PyExc_OverflowError,
228 "PyBytes_FromFormatV(): %c format "
229 "expects an integer in range [0; 255]");
230 return NULL;
231 }
232 n++;
233 break;
234 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 case '%':
236 n++;
237 break;
238 case 'd': case 'u': case 'i': case 'x':
239 (void) va_arg(count, int);
240 /* 20 bytes is enough to hold a 64-bit
241 integer. Decimal takes the most space.
242 This isn't enough for octal. */
243 n += 20;
244 break;
245 case 's':
246 s = va_arg(count, char*);
247 n += strlen(s);
248 break;
249 case 'p':
250 (void) va_arg(count, int);
251 /* maximum 64-bit pointer representation:
252 * 0xffffffffffffffff
253 * so 19 characters is enough.
254 * XXX I count 18 -- what's the extra for?
255 */
256 n += 19;
257 break;
258 default:
259 /* if we stumble upon an unknown
260 formatting code, copy the rest of
261 the format string to the output
262 string. (we cannot just skip the
263 code, since there's no way to know
264 what's in the argument list) */
265 n += strlen(p);
266 goto expand;
267 }
268 } else
269 n++;
270 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000271 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 /* step 2: fill the buffer */
273 /* Since we've analyzed how much space we need for the worst case,
274 use sprintf directly instead of the slower PyOS_snprintf. */
275 string = PyBytes_FromStringAndSize(NULL, n);
276 if (!string)
277 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000278
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000279 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000280
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000281 for (f = format; *f; f++) {
282 if (*f == '%') {
283 const char* p = f++;
284 Py_ssize_t i;
285 int longflag = 0;
286 int size_tflag = 0;
287 /* parse the width.precision part (we're only
288 interested in the precision value, if any) */
289 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000290 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000291 n = (n*10) + *f++ - '0';
292 if (*f == '.') {
293 f++;
294 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000295 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 n = (n*10) + *f++ - '0';
297 }
David Malcolm96960882010-11-05 17:23:41 +0000298 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000299 f++;
300 /* handle the long flag, but only for %ld and %lu.
301 others can be added when necessary. */
302 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
303 longflag = 1;
304 ++f;
305 }
306 /* handle the size_t flag. */
307 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
308 size_tflag = 1;
309 ++f;
310 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000311
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000312 switch (*f) {
313 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100314 {
315 int c = va_arg(vargs, int);
316 /* c has been checked for overflow in the first step */
317 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000318 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100319 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000320 case 'd':
321 if (longflag)
322 sprintf(s, "%ld", va_arg(vargs, long));
323 else if (size_tflag)
324 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
325 va_arg(vargs, Py_ssize_t));
326 else
327 sprintf(s, "%d", va_arg(vargs, int));
328 s += strlen(s);
329 break;
330 case 'u':
331 if (longflag)
332 sprintf(s, "%lu",
333 va_arg(vargs, unsigned long));
334 else if (size_tflag)
335 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
336 va_arg(vargs, size_t));
337 else
338 sprintf(s, "%u",
339 va_arg(vargs, unsigned int));
340 s += strlen(s);
341 break;
342 case 'i':
343 sprintf(s, "%i", va_arg(vargs, int));
344 s += strlen(s);
345 break;
346 case 'x':
347 sprintf(s, "%x", va_arg(vargs, int));
348 s += strlen(s);
349 break;
350 case 's':
351 p = va_arg(vargs, char*);
352 i = strlen(p);
353 if (n > 0 && i > n)
354 i = n;
355 Py_MEMCPY(s, p, i);
356 s += i;
357 break;
358 case 'p':
359 sprintf(s, "%p", va_arg(vargs, void*));
360 /* %p is ill-defined: ensure leading 0x. */
361 if (s[1] == 'X')
362 s[1] = 'x';
363 else if (s[1] != 'x') {
364 memmove(s+2, s, strlen(s)+1);
365 s[0] = '0';
366 s[1] = 'x';
367 }
368 s += strlen(s);
369 break;
370 case '%':
371 *s++ = '%';
372 break;
373 default:
374 strcpy(s, p);
375 s += strlen(s);
376 goto end;
377 }
378 } else
379 *s++ = *f;
380 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000381
382 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000383 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
384 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000385}
386
387PyObject *
388PyBytes_FromFormat(const char *format, ...)
389{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000390 PyObject* ret;
391 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000392
393#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000394 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000395#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000397#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000398 ret = PyBytes_FromFormatV(format, vargs);
399 va_end(vargs);
400 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000401}
402
403static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000404bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000405{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000406 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000407}
408
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000409/* Unescape a backslash-escaped string. If unicode is non-zero,
410 the string is a u-literal. If recode_encoding is non-zero,
411 the string is UTF-8 encoded and should be re-encoded in the
412 specified encoding. */
413
414PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000415 Py_ssize_t len,
416 const char *errors,
417 Py_ssize_t unicode,
418 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000419{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000420 int c;
421 char *p, *buf;
422 const char *end;
423 PyObject *v;
424 Py_ssize_t newlen = recode_encoding ? 4*len:len;
425 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
426 if (v == NULL)
427 return NULL;
428 p = buf = PyBytes_AsString(v);
429 end = s + len;
430 while (s < end) {
431 if (*s != '\\') {
432 non_esc:
433 if (recode_encoding && (*s & 0x80)) {
434 PyObject *u, *w;
435 char *r;
436 const char* t;
437 Py_ssize_t rn;
438 t = s;
439 /* Decode non-ASCII bytes as UTF-8. */
440 while (t < end && (*t & 0x80)) t++;
441 u = PyUnicode_DecodeUTF8(s, t - s, errors);
442 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000443
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000444 /* Recode them in target encoding. */
445 w = PyUnicode_AsEncodedString(
446 u, recode_encoding, errors);
447 Py_DECREF(u);
448 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000449
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 /* Append bytes to output buffer. */
451 assert(PyBytes_Check(w));
452 r = PyBytes_AS_STRING(w);
453 rn = PyBytes_GET_SIZE(w);
454 Py_MEMCPY(p, r, rn);
455 p += rn;
456 Py_DECREF(w);
457 s = t;
458 } else {
459 *p++ = *s++;
460 }
461 continue;
462 }
463 s++;
464 if (s==end) {
465 PyErr_SetString(PyExc_ValueError,
466 "Trailing \\ in string");
467 goto failed;
468 }
469 switch (*s++) {
470 /* XXX This assumes ASCII! */
471 case '\n': break;
472 case '\\': *p++ = '\\'; break;
473 case '\'': *p++ = '\''; break;
474 case '\"': *p++ = '\"'; break;
475 case 'b': *p++ = '\b'; break;
476 case 'f': *p++ = '\014'; break; /* FF */
477 case 't': *p++ = '\t'; break;
478 case 'n': *p++ = '\n'; break;
479 case 'r': *p++ = '\r'; break;
480 case 'v': *p++ = '\013'; break; /* VT */
481 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
482 case '0': case '1': case '2': case '3':
483 case '4': case '5': case '6': case '7':
484 c = s[-1] - '0';
485 if (s < end && '0' <= *s && *s <= '7') {
486 c = (c<<3) + *s++ - '0';
487 if (s < end && '0' <= *s && *s <= '7')
488 c = (c<<3) + *s++ - '0';
489 }
490 *p++ = c;
491 break;
492 case 'x':
David Malcolm96960882010-11-05 17:23:41 +0000493 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000494 unsigned int x = 0;
495 c = Py_CHARMASK(*s);
496 s++;
David Malcolm96960882010-11-05 17:23:41 +0000497 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000498 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000499 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000500 x = 10 + c - 'a';
501 else
502 x = 10 + c - 'A';
503 x = x << 4;
504 c = Py_CHARMASK(*s);
505 s++;
David Malcolm96960882010-11-05 17:23:41 +0000506 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000507 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +0000508 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000509 x += 10 + c - 'a';
510 else
511 x += 10 + c - 'A';
512 *p++ = x;
513 break;
514 }
515 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +0200516 PyErr_Format(PyExc_ValueError,
517 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +0200518 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000519 goto failed;
520 }
521 if (strcmp(errors, "replace") == 0) {
522 *p++ = '?';
523 } else if (strcmp(errors, "ignore") == 0)
524 /* do nothing */;
525 else {
526 PyErr_Format(PyExc_ValueError,
527 "decoding error; unknown "
528 "error handling code: %.400s",
529 errors);
530 goto failed;
531 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +0200532 /* skip \x */
533 if (s < end && Py_ISXDIGIT(s[0]))
534 s++; /* and a hexdigit */
535 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000536 default:
537 *p++ = '\\';
538 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +0200539 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000540 UTF-8 bytes may follow. */
541 }
542 }
543 if (p-buf < newlen)
544 _PyBytes_Resize(&v, p - buf);
545 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000546 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000547 Py_DECREF(v);
548 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000549}
550
551/* -------------------------------------------------------------------- */
552/* object api */
553
554Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200555PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000556{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000557 if (!PyBytes_Check(op)) {
558 PyErr_Format(PyExc_TypeError,
559 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
560 return -1;
561 }
562 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000563}
564
565char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200566PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000567{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000568 if (!PyBytes_Check(op)) {
569 PyErr_Format(PyExc_TypeError,
570 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
571 return NULL;
572 }
573 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000574}
575
576int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200577PyBytes_AsStringAndSize(PyObject *obj,
578 char **s,
579 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000580{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000581 if (s == NULL) {
582 PyErr_BadInternalCall();
583 return -1;
584 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000585
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000586 if (!PyBytes_Check(obj)) {
587 PyErr_Format(PyExc_TypeError,
588 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
589 return -1;
590 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000591
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000592 *s = PyBytes_AS_STRING(obj);
593 if (len != NULL)
594 *len = PyBytes_GET_SIZE(obj);
595 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +0300596 PyErr_SetString(PyExc_ValueError,
597 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000598 return -1;
599 }
600 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000601}
Neal Norwitz6968b052007-02-27 19:02:19 +0000602
603/* -------------------------------------------------------------------- */
604/* Methods */
605
Eric Smith0923d1d2009-04-16 20:16:10 +0000606#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000607
608#include "stringlib/fastsearch.h"
609#include "stringlib/count.h"
610#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +0200611#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000612#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +0000613#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +0000614#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000615
Eric Smith0f78bff2009-11-30 01:01:42 +0000616#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +0000617
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000618PyObject *
619PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +0000620{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200621 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200622 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -0400623 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000624 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200625 unsigned char quote, *s, *p;
626
627 /* Compute size of output string */
628 squotes = dquotes = 0;
629 newsize = 3; /* b'' */
630 s = (unsigned char*)op->ob_sval;
631 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400632 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200633 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400634 case '\'': squotes++; break;
635 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200636 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400637 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200638 default:
639 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400640 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200641 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400642 if (newsize > PY_SSIZE_T_MAX - incr)
643 goto overflow;
644 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200645 }
646 quote = '\'';
647 if (smartquotes && squotes && !dquotes)
648 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400649 if (squotes && quote == '\'') {
650 if (newsize > PY_SSIZE_T_MAX - squotes)
651 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200652 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000653 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200654
655 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000656 if (v == NULL) {
657 return NULL;
658 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200659 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000660
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200661 *p++ = 'b', *p++ = quote;
662 for (i = 0; i < length; i++) {
663 unsigned char c = op->ob_sval[i];
664 if (c == quote || c == '\\')
665 *p++ = '\\', *p++ = c;
666 else if (c == '\t')
667 *p++ = '\\', *p++ = 't';
668 else if (c == '\n')
669 *p++ = '\\', *p++ = 'n';
670 else if (c == '\r')
671 *p++ = '\\', *p++ = 'r';
672 else if (c < ' ' || c >= 0x7f) {
673 *p++ = '\\';
674 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200675 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
676 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000677 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200678 else
679 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000680 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200681 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +0200682 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200683 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -0400684
685 overflow:
686 PyErr_SetString(PyExc_OverflowError,
687 "bytes object is too large to make repr");
688 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000689}
690
Neal Norwitz6968b052007-02-27 19:02:19 +0000691static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000692bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000693{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000694 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +0000695}
696
Neal Norwitz6968b052007-02-27 19:02:19 +0000697static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000698bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000699{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000700 if (Py_BytesWarningFlag) {
701 if (PyErr_WarnEx(PyExc_BytesWarning,
702 "str() on a bytes instance", 1))
703 return NULL;
704 }
705 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +0000706}
707
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000708static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000709bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000710{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000711 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000712}
Neal Norwitz6968b052007-02-27 19:02:19 +0000713
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000714/* This is also used by PyBytes_Concat() */
715static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000716bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000717{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000718 Py_ssize_t size;
719 Py_buffer va, vb;
720 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000721
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000722 va.len = -1;
723 vb.len = -1;
724 if (_getbuffer(a, &va) < 0 ||
725 _getbuffer(b, &vb) < 0) {
726 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
727 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
728 goto done;
729 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000730
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000731 /* Optimize end cases */
732 if (va.len == 0 && PyBytes_CheckExact(b)) {
733 result = b;
734 Py_INCREF(result);
735 goto done;
736 }
737 if (vb.len == 0 && PyBytes_CheckExact(a)) {
738 result = a;
739 Py_INCREF(result);
740 goto done;
741 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000742
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000743 size = va.len + vb.len;
744 if (size < 0) {
745 PyErr_NoMemory();
746 goto done;
747 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000748
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000749 result = PyBytes_FromStringAndSize(NULL, size);
750 if (result != NULL) {
751 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
752 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
753 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000754
755 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000756 if (va.len != -1)
757 PyBuffer_Release(&va);
758 if (vb.len != -1)
759 PyBuffer_Release(&vb);
760 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000761}
Neal Norwitz6968b052007-02-27 19:02:19 +0000762
763static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200764bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +0000765{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200766 Py_ssize_t i;
767 Py_ssize_t j;
768 Py_ssize_t size;
769 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000770 size_t nbytes;
771 if (n < 0)
772 n = 0;
773 /* watch out for overflows: the size can overflow int,
774 * and the # of bytes needed can overflow size_t
775 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000776 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000777 PyErr_SetString(PyExc_OverflowError,
778 "repeated bytes are too long");
779 return NULL;
780 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +0000781 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000782 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
783 Py_INCREF(a);
784 return (PyObject *)a;
785 }
786 nbytes = (size_t)size;
787 if (nbytes + PyBytesObject_SIZE <= nbytes) {
788 PyErr_SetString(PyExc_OverflowError,
789 "repeated bytes are too long");
790 return NULL;
791 }
792 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
793 if (op == NULL)
794 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100795 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000796 op->ob_shash = -1;
797 op->ob_sval[size] = '\0';
798 if (Py_SIZE(a) == 1 && n > 0) {
799 memset(op->ob_sval, a->ob_sval[0] , n);
800 return (PyObject *) op;
801 }
802 i = 0;
803 if (i < size) {
804 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
805 i = Py_SIZE(a);
806 }
807 while (i < size) {
808 j = (i <= size-i) ? i : size-i;
809 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
810 i += j;
811 }
812 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +0000813}
814
Guido van Rossum98297ee2007-11-06 21:34:58 +0000815static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000816bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +0000817{
818 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
819 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000820 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +0000821 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +0000822 PyErr_Clear();
823 if (_getbuffer(arg, &varg) < 0)
824 return -1;
825 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
826 varg.buf, varg.len, 0);
827 PyBuffer_Release(&varg);
828 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000829 }
830 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +0000831 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
832 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000833 }
834
Antoine Pitrou0010d372010-08-15 17:12:55 +0000835 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000836}
837
Neal Norwitz6968b052007-02-27 19:02:19 +0000838static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200839bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +0000840{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000841 if (i < 0 || i >= Py_SIZE(a)) {
842 PyErr_SetString(PyExc_IndexError, "index out of range");
843 return NULL;
844 }
845 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +0000846}
847
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100848Py_LOCAL(int)
849bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
850{
851 int cmp;
852 Py_ssize_t len;
853
854 len = Py_SIZE(a);
855 if (Py_SIZE(b) != len)
856 return 0;
857
858 if (a->ob_sval[0] != b->ob_sval[0])
859 return 0;
860
861 cmp = memcmp(a->ob_sval, b->ob_sval, len);
862 return (cmp == 0);
863}
864
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000865static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000866bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +0000867{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000868 int c;
869 Py_ssize_t len_a, len_b;
870 Py_ssize_t min_len;
871 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000872
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000873 /* Make sure both arguments are strings. */
874 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
875 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
876 (PyObject_IsInstance((PyObject*)a,
877 (PyObject*)&PyUnicode_Type) ||
878 PyObject_IsInstance((PyObject*)b,
879 (PyObject*)&PyUnicode_Type))) {
880 if (PyErr_WarnEx(PyExc_BytesWarning,
881 "Comparison between bytes and string", 1))
882 return NULL;
883 }
884 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000885 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100886 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000887 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100888 case Py_EQ:
889 case Py_LE:
890 case Py_GE:
891 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000892 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100893 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100894 case Py_NE:
895 case Py_LT:
896 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000897 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100898 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100899 default:
900 PyErr_BadArgument();
901 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000902 }
903 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100904 else if (op == Py_EQ || op == Py_NE) {
905 int eq = bytes_compare_eq(a, b);
906 eq ^= (op == Py_NE);
907 result = eq ? Py_True : Py_False;
908 }
909 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100910 len_a = Py_SIZE(a);
911 len_b = Py_SIZE(b);
912 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100913 if (min_len > 0) {
914 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100915 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100916 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000917 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100918 else
919 c = 0;
920 if (c == 0)
921 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
922 switch (op) {
923 case Py_LT: c = c < 0; break;
924 case Py_LE: c = c <= 0; break;
925 case Py_GT: c = c > 0; break;
926 case Py_GE: c = c >= 0; break;
927 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +0100928 PyErr_BadArgument();
929 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100930 }
931 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000932 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +0100933
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000934 Py_INCREF(result);
935 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000936}
937
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000938static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000939bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +0000940{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100941 if (a->ob_shash == -1) {
942 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +0100943 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +0100944 }
945 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +0000946}
947
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000948static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000949bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000950{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000951 if (PyIndex_Check(item)) {
952 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
953 if (i == -1 && PyErr_Occurred())
954 return NULL;
955 if (i < 0)
956 i += PyBytes_GET_SIZE(self);
957 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
958 PyErr_SetString(PyExc_IndexError,
959 "index out of range");
960 return NULL;
961 }
962 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
963 }
964 else if (PySlice_Check(item)) {
965 Py_ssize_t start, stop, step, slicelength, cur, i;
966 char* source_buf;
967 char* result_buf;
968 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +0000969
Martin v. Löwis4d0d4712010-12-03 20:14:31 +0000970 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000971 PyBytes_GET_SIZE(self),
972 &start, &stop, &step, &slicelength) < 0) {
973 return NULL;
974 }
Neal Norwitz6968b052007-02-27 19:02:19 +0000975
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000976 if (slicelength <= 0) {
977 return PyBytes_FromStringAndSize("", 0);
978 }
979 else if (start == 0 && step == 1 &&
980 slicelength == PyBytes_GET_SIZE(self) &&
981 PyBytes_CheckExact(self)) {
982 Py_INCREF(self);
983 return (PyObject *)self;
984 }
985 else if (step == 1) {
986 return PyBytes_FromStringAndSize(
987 PyBytes_AS_STRING(self) + start,
988 slicelength);
989 }
990 else {
991 source_buf = PyBytes_AS_STRING(self);
992 result = PyBytes_FromStringAndSize(NULL, slicelength);
993 if (result == NULL)
994 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +0000995
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000996 result_buf = PyBytes_AS_STRING(result);
997 for (cur = start, i = 0; i < slicelength;
998 cur += step, i++) {
999 result_buf[i] = source_buf[cur];
1000 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001001
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001002 return result;
1003 }
1004 }
1005 else {
1006 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001007 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001008 Py_TYPE(item)->tp_name);
1009 return NULL;
1010 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001011}
1012
1013static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001014bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001015{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001016 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1017 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001018}
1019
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001020static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001021 (lenfunc)bytes_length, /*sq_length*/
1022 (binaryfunc)bytes_concat, /*sq_concat*/
1023 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1024 (ssizeargfunc)bytes_item, /*sq_item*/
1025 0, /*sq_slice*/
1026 0, /*sq_ass_item*/
1027 0, /*sq_ass_slice*/
1028 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001029};
1030
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001031static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001032 (lenfunc)bytes_length,
1033 (binaryfunc)bytes_subscript,
1034 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001035};
1036
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001037static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001038 (getbufferproc)bytes_buffer_getbuffer,
1039 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001040};
1041
1042
1043#define LEFTSTRIP 0
1044#define RIGHTSTRIP 1
1045#define BOTHSTRIP 2
1046
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001047/*[clinic input]
1048bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001049
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001050 sep: object = None
1051 The delimiter according which to split the bytes.
1052 None (the default value) means split on ASCII whitespace characters
1053 (space, tab, return, newline, formfeed, vertical tab).
1054 maxsplit: Py_ssize_t = -1
1055 Maximum number of splits to do.
1056 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001057
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001058Return a list of the sections in the bytes, using sep as the delimiter.
1059[clinic start generated code]*/
1060
1061PyDoc_STRVAR(bytes_split__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001062"split($self, /, sep=None, maxsplit=-1)\n"
1063"--\n"
1064"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001065"Return a list of the sections in the bytes, using sep as the delimiter.\n"
1066"\n"
1067" sep\n"
1068" The delimiter according which to split the bytes.\n"
1069" None (the default value) means split on ASCII whitespace characters\n"
1070" (space, tab, return, newline, formfeed, vertical tab).\n"
1071" maxsplit\n"
1072" Maximum number of splits to do.\n"
1073" -1 (the default value) means no limit.");
1074
1075#define BYTES_SPLIT_METHODDEF \
1076 {"split", (PyCFunction)bytes_split, METH_VARARGS|METH_KEYWORDS, bytes_split__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001077
1078static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001079bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001080
1081static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001082bytes_split(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Neal Norwitz6968b052007-02-27 19:02:19 +00001083{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001084 PyObject *return_value = NULL;
1085 static char *_keywords[] = {"sep", "maxsplit", NULL};
1086 PyObject *sep = Py_None;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001087 Py_ssize_t maxsplit = -1;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001088
1089 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
1090 "|On:split", _keywords,
1091 &sep, &maxsplit))
1092 goto exit;
1093 return_value = bytes_split_impl(self, sep, maxsplit);
1094
1095exit:
1096 return return_value;
1097}
1098
1099static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001100bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
1101/*[clinic end generated code: output=c80a47afdd505975 input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001102{
1103 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 const char *s = PyBytes_AS_STRING(self), *sub;
1105 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001106 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001107
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 if (maxsplit < 0)
1109 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001110 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001111 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001112 if (_getbuffer(sep, &vsub) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001113 return NULL;
1114 sub = vsub.buf;
1115 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001116
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001117 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1118 PyBuffer_Release(&vsub);
1119 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001120}
1121
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001122/*[clinic input]
1123bytes.partition
1124
1125 self: self(type="PyBytesObject *")
1126 sep: object
1127 /
1128
1129Partition the bytes into three parts using the given separator.
1130
1131This will search for the separator sep in the bytes. If the separator is found,
1132returns a 3-tuple containing the part before the separator, the separator
1133itself, and the part after it.
1134
1135If the separator is not found, returns a 3-tuple containing the original bytes
1136object and two empty bytes objects.
1137[clinic start generated code]*/
1138
1139PyDoc_STRVAR(bytes_partition__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001140"partition($self, sep, /)\n"
1141"--\n"
1142"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001143"Partition the bytes into three parts using the given separator.\n"
1144"\n"
1145"This will search for the separator sep in the bytes. If the separator is found,\n"
1146"returns a 3-tuple containing the part before the separator, the separator\n"
1147"itself, and the part after it.\n"
1148"\n"
1149"If the separator is not found, returns a 3-tuple containing the original bytes\n"
1150"object and two empty bytes objects.");
1151
1152#define BYTES_PARTITION_METHODDEF \
1153 {"partition", (PyCFunction)bytes_partition, METH_O, bytes_partition__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001154
1155static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001156bytes_partition(PyBytesObject *self, PyObject *sep)
Martin v. Löwis0efea322014-07-27 17:29:17 +02001157/*[clinic end generated code: output=b41e119c873c08bc input=6c5b9dcc5a9fd62e]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001158{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001159 const char *sep_chars;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001160 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001161
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001162 if (PyBytes_Check(sep)) {
1163 sep_chars = PyBytes_AS_STRING(sep);
1164 sep_len = PyBytes_GET_SIZE(sep);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001165 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001166 else if (PyObject_AsCharBuffer(sep, &sep_chars, &sep_len))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001167 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001168
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001169 return stringlib_partition(
1170 (PyObject*) self,
1171 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001172 sep, sep_chars, sep_len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001173 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001174}
1175
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001176/*[clinic input]
1177bytes.rpartition
1178
1179 self: self(type="PyBytesObject *")
1180 sep: object
1181 /
1182
1183Partition the bytes into three parts using the given separator.
1184
1185This will search for the separator sep in the bytes, starting and the end. If
1186the separator is found, returns a 3-tuple containing the part before the
1187separator, the separator itself, and the part after it.
1188
1189If the separator is not found, returns a 3-tuple containing two empty bytes
1190objects and the original bytes object.
1191[clinic start generated code]*/
1192
1193PyDoc_STRVAR(bytes_rpartition__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001194"rpartition($self, sep, /)\n"
1195"--\n"
1196"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001197"Partition the bytes into three parts using the given separator.\n"
1198"\n"
1199"This will search for the separator sep in the bytes, starting and the end. If\n"
1200"the separator is found, returns a 3-tuple containing the part before the\n"
1201"separator, the separator itself, and the part after it.\n"
1202"\n"
1203"If the separator is not found, returns a 3-tuple containing two empty bytes\n"
1204"objects and the original bytes object.");
1205
1206#define BYTES_RPARTITION_METHODDEF \
1207 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, bytes_rpartition__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001208
1209static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001210bytes_rpartition(PyBytesObject *self, PyObject *sep)
Martin v. Löwis0efea322014-07-27 17:29:17 +02001211/*[clinic end generated code: output=3a620803657196ee input=79bc2932e78e5ce0]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001212{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001213 const char *sep_chars;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001214 Py_ssize_t sep_len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001215
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001216 if (PyBytes_Check(sep)) {
1217 sep_chars = PyBytes_AS_STRING(sep);
1218 sep_len = PyBytes_GET_SIZE(sep);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001219 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001220 else if (PyObject_AsCharBuffer(sep, &sep_chars, &sep_len))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001221 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001222
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001223 return stringlib_rpartition(
1224 (PyObject*) self,
1225 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001226 sep, sep_chars, sep_len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001227 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001228}
1229
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001230/*[clinic input]
1231bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001232
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001233Return a list of the sections in the bytes, using sep as the delimiter.
1234
1235Splitting is done starting at the end of the bytes and working to the front.
1236[clinic start generated code]*/
1237
1238PyDoc_STRVAR(bytes_rsplit__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001239"rsplit($self, /, sep=None, maxsplit=-1)\n"
1240"--\n"
1241"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001242"Return a list of the sections in the bytes, using sep as the delimiter.\n"
1243"\n"
1244" sep\n"
1245" The delimiter according which to split the bytes.\n"
1246" None (the default value) means split on ASCII whitespace characters\n"
1247" (space, tab, return, newline, formfeed, vertical tab).\n"
1248" maxsplit\n"
1249" Maximum number of splits to do.\n"
1250" -1 (the default value) means no limit.\n"
1251"\n"
1252"Splitting is done starting at the end of the bytes and working to the front.");
1253
1254#define BYTES_RSPLIT_METHODDEF \
1255 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS|METH_KEYWORDS, bytes_rsplit__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001256
Neal Norwitz6968b052007-02-27 19:02:19 +00001257static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001258bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001259
1260static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001261bytes_rsplit(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Neal Norwitz6968b052007-02-27 19:02:19 +00001262{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001263 PyObject *return_value = NULL;
1264 static char *_keywords[] = {"sep", "maxsplit", NULL};
1265 PyObject *sep = Py_None;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001266 Py_ssize_t maxsplit = -1;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001267
1268 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
1269 "|On:rsplit", _keywords,
1270 &sep, &maxsplit))
1271 goto exit;
1272 return_value = bytes_rsplit_impl(self, sep, maxsplit);
1273
1274exit:
1275 return return_value;
1276}
1277
1278static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001279bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
1280/*[clinic end generated code: output=f86feddedbd7b26d input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001281{
1282 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001283 const char *s = PyBytes_AS_STRING(self), *sub;
1284 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001285 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001286
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001287 if (maxsplit < 0)
1288 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001289 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001290 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001291 if (_getbuffer(sep, &vsub) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001292 return NULL;
1293 sub = vsub.buf;
1294 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001295
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001296 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1297 PyBuffer_Release(&vsub);
1298 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001299}
1300
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001301
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001302/*[clinic input]
1303bytes.join
1304
1305 iterable_of_bytes: object
1306 /
1307
1308Concatenate any number of bytes objects.
1309
1310The bytes whose method is called is inserted in between each pair.
1311
1312The result is returned as a new bytes object.
1313
1314Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1315[clinic start generated code]*/
1316
1317PyDoc_STRVAR(bytes_join__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001318"join($self, iterable_of_bytes, /)\n"
1319"--\n"
1320"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001321"Concatenate any number of bytes objects.\n"
1322"\n"
1323"The bytes whose method is called is inserted in between each pair.\n"
1324"\n"
1325"The result is returned as a new bytes object.\n"
1326"\n"
1327"Example: b\'.\'.join([b\'ab\', b\'pq\', b\'rs\']) -> b\'ab.pq.rs\'.");
1328
1329#define BYTES_JOIN_METHODDEF \
1330 {"join", (PyCFunction)bytes_join, METH_O, bytes_join__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001331
Neal Norwitz6968b052007-02-27 19:02:19 +00001332static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001333bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
1334/*[clinic end generated code: output=e541a14a8da97908 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001335{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001336 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001337}
1338
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001339PyObject *
1340_PyBytes_Join(PyObject *sep, PyObject *x)
1341{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001342 assert(sep != NULL && PyBytes_Check(sep));
1343 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001344 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001345}
1346
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001347/* helper macro to fixup start/end slice values */
1348#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 if (end > len) \
1350 end = len; \
1351 else if (end < 0) { \
1352 end += len; \
1353 if (end < 0) \
1354 end = 0; \
1355 } \
1356 if (start < 0) { \
1357 start += len; \
1358 if (start < 0) \
1359 start = 0; \
1360 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001361
1362Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001363bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001364{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001365 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001366 char byte;
1367 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001368 const char *sub;
1369 Py_ssize_t sub_len;
1370 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001371 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001372
Antoine Pitrouac65d962011-10-20 23:54:17 +02001373 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1374 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001375 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001376
Antoine Pitrouac65d962011-10-20 23:54:17 +02001377 if (subobj) {
1378 if (_getbuffer(subobj, &subbuf) < 0)
1379 return -2;
1380
1381 sub = subbuf.buf;
1382 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001383 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001384 else {
1385 sub = &byte;
1386 sub_len = 1;
1387 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001388
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001389 if (dir > 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001390 res = stringlib_find_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001391 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1392 sub, sub_len, start, end);
1393 else
Antoine Pitrouac65d962011-10-20 23:54:17 +02001394 res = stringlib_rfind_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001395 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1396 sub, sub_len, start, end);
Antoine Pitrouac65d962011-10-20 23:54:17 +02001397
1398 if (subobj)
1399 PyBuffer_Release(&subbuf);
1400
1401 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001402}
1403
1404
1405PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001406"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001407\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001408Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001409such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001410arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001411\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001412Return -1 on failure.");
1413
Neal Norwitz6968b052007-02-27 19:02:19 +00001414static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001415bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001416{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 Py_ssize_t result = bytes_find_internal(self, args, +1);
1418 if (result == -2)
1419 return NULL;
1420 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001421}
1422
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001423
1424PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001425"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001426\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001427Like B.find() but raise ValueError when the substring is not found.");
1428
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001429static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001430bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001431{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001432 Py_ssize_t result = bytes_find_internal(self, args, +1);
1433 if (result == -2)
1434 return NULL;
1435 if (result == -1) {
1436 PyErr_SetString(PyExc_ValueError,
1437 "substring not found");
1438 return NULL;
1439 }
1440 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001441}
1442
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001443
1444PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001445"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001446\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001447Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001448such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001449arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001450\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001451Return -1 on failure.");
1452
Neal Norwitz6968b052007-02-27 19:02:19 +00001453static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001454bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001455{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001456 Py_ssize_t result = bytes_find_internal(self, args, -1);
1457 if (result == -2)
1458 return NULL;
1459 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001460}
1461
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001462
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001463PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001464"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001465\n\
1466Like B.rfind() but raise ValueError when the substring is not found.");
1467
1468static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001469bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001470{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001471 Py_ssize_t result = bytes_find_internal(self, args, -1);
1472 if (result == -2)
1473 return NULL;
1474 if (result == -1) {
1475 PyErr_SetString(PyExc_ValueError,
1476 "substring not found");
1477 return NULL;
1478 }
1479 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001480}
1481
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001482
1483Py_LOCAL_INLINE(PyObject *)
1484do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001485{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001486 Py_buffer vsep;
1487 char *s = PyBytes_AS_STRING(self);
1488 Py_ssize_t len = PyBytes_GET_SIZE(self);
1489 char *sep;
1490 Py_ssize_t seplen;
1491 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001492
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001493 if (_getbuffer(sepobj, &vsep) < 0)
1494 return NULL;
1495 sep = vsep.buf;
1496 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001497
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001498 i = 0;
1499 if (striptype != RIGHTSTRIP) {
1500 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1501 i++;
1502 }
1503 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001504
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001505 j = len;
1506 if (striptype != LEFTSTRIP) {
1507 do {
1508 j--;
1509 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1510 j++;
1511 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001512
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001513 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001514
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001515 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1516 Py_INCREF(self);
1517 return (PyObject*)self;
1518 }
1519 else
1520 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001521}
1522
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001523
1524Py_LOCAL_INLINE(PyObject *)
1525do_strip(PyBytesObject *self, int striptype)
1526{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001527 char *s = PyBytes_AS_STRING(self);
1528 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001529
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001530 i = 0;
1531 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001532 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001533 i++;
1534 }
1535 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001536
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001537 j = len;
1538 if (striptype != LEFTSTRIP) {
1539 do {
1540 j--;
David Malcolm96960882010-11-05 17:23:41 +00001541 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001542 j++;
1543 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001544
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001545 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1546 Py_INCREF(self);
1547 return (PyObject*)self;
1548 }
1549 else
1550 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001551}
1552
1553
1554Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001555do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001556{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001557 if (bytes != NULL && bytes != Py_None) {
1558 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001559 }
1560 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001561}
1562
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001563/*[clinic input]
1564bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001565
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001566 self: self(type="PyBytesObject *")
1567 bytes: object = None
1568 /
1569
1570Strip leading and trailing bytes contained in the argument.
1571
1572If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1573[clinic start generated code]*/
1574
1575PyDoc_STRVAR(bytes_strip__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001576"strip($self, bytes=None, /)\n"
1577"--\n"
1578"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001579"Strip leading and trailing bytes contained in the argument.\n"
1580"\n"
1581"If the argument is omitted or None, strip leading and trailing ASCII whitespace.");
1582
1583#define BYTES_STRIP_METHODDEF \
1584 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, bytes_strip__doc__},
1585
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001586static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001587bytes_strip_impl(PyBytesObject *self, PyObject *bytes);
1588
1589static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001590bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001591{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001592 PyObject *return_value = NULL;
1593 PyObject *bytes = Py_None;
1594
1595 if (!PyArg_UnpackTuple(args, "strip",
1596 0, 1,
1597 &bytes))
1598 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02001599 return_value = bytes_strip_impl(self, bytes);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001600
1601exit:
1602 return return_value;
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001603}
1604
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001605static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001606bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Martin v. Löwis0efea322014-07-27 17:29:17 +02001607/*[clinic end generated code: output=c8234a599ba5ec35 input=37daa5fad1395d95]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001608{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001609 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001610}
1611
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001612/*[clinic input]
1613bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001614
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001615 self: self(type="PyBytesObject *")
1616 bytes: object = None
1617 /
1618
1619Strip leading bytes contained in the argument.
1620
1621If the argument is omitted or None, strip leading ASCII whitespace.
1622[clinic start generated code]*/
1623
1624PyDoc_STRVAR(bytes_lstrip__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001625"lstrip($self, bytes=None, /)\n"
1626"--\n"
1627"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001628"Strip leading bytes contained in the argument.\n"
1629"\n"
1630"If the argument is omitted or None, strip leading ASCII whitespace.");
1631
1632#define BYTES_LSTRIP_METHODDEF \
1633 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, bytes_lstrip__doc__},
1634
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001635static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001636bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes);
1637
1638static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001639bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001640{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001641 PyObject *return_value = NULL;
1642 PyObject *bytes = Py_None;
1643
1644 if (!PyArg_UnpackTuple(args, "lstrip",
1645 0, 1,
1646 &bytes))
1647 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02001648 return_value = bytes_lstrip_impl(self, bytes);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001649
1650exit:
1651 return return_value;
1652}
1653
1654static PyObject *
1655bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Martin v. Löwis0efea322014-07-27 17:29:17 +02001656/*[clinic end generated code: output=529e8511ab6f1115 input=88811b09dfbc2988]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001657{
1658 return do_argstrip(self, LEFTSTRIP, bytes);
1659}
1660
1661/*[clinic input]
1662bytes.rstrip
1663
1664 self: self(type="PyBytesObject *")
1665 bytes: object = None
1666 /
1667
1668Strip trailing bytes contained in the argument.
1669
1670If the argument is omitted or None, strip trailing ASCII whitespace.
1671[clinic start generated code]*/
1672
1673PyDoc_STRVAR(bytes_rstrip__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001674"rstrip($self, bytes=None, /)\n"
1675"--\n"
1676"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001677"Strip trailing bytes contained in the argument.\n"
1678"\n"
1679"If the argument is omitted or None, strip trailing ASCII whitespace.");
1680
1681#define BYTES_RSTRIP_METHODDEF \
1682 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, bytes_rstrip__doc__},
1683
1684static PyObject *
1685bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes);
1686
1687static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001688bytes_rstrip(PyBytesObject *self, PyObject *args)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001689{
1690 PyObject *return_value = NULL;
1691 PyObject *bytes = Py_None;
1692
1693 if (!PyArg_UnpackTuple(args, "rstrip",
1694 0, 1,
1695 &bytes))
1696 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02001697 return_value = bytes_rstrip_impl(self, bytes);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001698
1699exit:
1700 return return_value;
1701}
1702
1703static PyObject *
1704bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Martin v. Löwis0efea322014-07-27 17:29:17 +02001705/*[clinic end generated code: output=e98730bd133e6593 input=8f93c9cd361f0140]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001706{
1707 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001708}
Neal Norwitz6968b052007-02-27 19:02:19 +00001709
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001710
1711PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001712"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00001713\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001714Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001715string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001716as in slice notation.");
1717
1718static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001719bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001720{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001721 PyObject *sub_obj;
1722 const char *str = PyBytes_AS_STRING(self), *sub;
1723 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001724 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001725 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001726
Antoine Pitrouac65d962011-10-20 23:54:17 +02001727 Py_buffer vsub;
1728 PyObject *count_obj;
1729
1730 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
1731 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001732 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001733
Antoine Pitrouac65d962011-10-20 23:54:17 +02001734 if (sub_obj) {
1735 if (_getbuffer(sub_obj, &vsub) < 0)
1736 return NULL;
1737
1738 sub = vsub.buf;
1739 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001740 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001741 else {
1742 sub = &byte;
1743 sub_len = 1;
1744 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001745
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001746 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001747
Antoine Pitrouac65d962011-10-20 23:54:17 +02001748 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001749 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1750 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02001751
1752 if (sub_obj)
1753 PyBuffer_Release(&vsub);
1754
1755 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001756}
1757
1758
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001759/*[clinic input]
1760bytes.translate
1761
1762 self: self(type="PyBytesObject *")
Victor Stinner049e5092014-08-17 22:20:00 +02001763 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001764 Translation table, which must be a bytes object of length 256.
1765 [
1766 deletechars: object
1767 ]
1768 /
1769
1770Return a copy with each character mapped by the given translation table.
1771
1772All characters occurring in the optional argument deletechars are removed.
1773The remaining characters are mapped through the given translation table.
1774[clinic start generated code]*/
1775
1776PyDoc_STRVAR(bytes_translate__doc__,
1777"translate(table, [deletechars])\n"
1778"Return a copy with each character mapped by the given translation table.\n"
1779"\n"
1780" table\n"
1781" Translation table, which must be a bytes object of length 256.\n"
1782"\n"
1783"All characters occurring in the optional argument deletechars are removed.\n"
1784"The remaining characters are mapped through the given translation table.");
1785
1786#define BYTES_TRANSLATE_METHODDEF \
1787 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, bytes_translate__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001788
1789static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001790bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1, PyObject *deletechars);
1791
1792static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001793bytes_translate(PyBytesObject *self, PyObject *args)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001794{
1795 PyObject *return_value = NULL;
1796 PyObject *table;
1797 int group_right_1 = 0;
1798 PyObject *deletechars = NULL;
1799
1800 switch (PyTuple_GET_SIZE(args)) {
1801 case 1:
1802 if (!PyArg_ParseTuple(args, "O:translate", &table))
Martin v. Löwis0efea322014-07-27 17:29:17 +02001803 goto exit;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001804 break;
1805 case 2:
1806 if (!PyArg_ParseTuple(args, "OO:translate", &table, &deletechars))
Martin v. Löwis0efea322014-07-27 17:29:17 +02001807 goto exit;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001808 group_right_1 = 1;
1809 break;
1810 default:
1811 PyErr_SetString(PyExc_TypeError, "bytes.translate requires 1 to 2 arguments");
Martin v. Löwis0efea322014-07-27 17:29:17 +02001812 goto exit;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001813 }
Martin v. Löwis0efea322014-07-27 17:29:17 +02001814 return_value = bytes_translate_impl(self, table, group_right_1, deletechars);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001815
Martin v. Löwis0efea322014-07-27 17:29:17 +02001816exit:
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001817 return return_value;
1818}
1819
1820static PyObject *
1821bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1, PyObject *deletechars)
Martin v. Löwis0efea322014-07-27 17:29:17 +02001822/*[clinic end generated code: output=f0f29a57f41df5d8 input=a90fad893c3c88d7]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001823{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001824 char *input, *output;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001825 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001826 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001827 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001828 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001829 Py_ssize_t inlen, tablen, dellen = 0;
1830 PyObject *result;
1831 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001832
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001833 if (PyBytes_Check(table)) {
1834 table_chars = PyBytes_AS_STRING(table);
1835 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001836 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001837 else if (table == Py_None) {
1838 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001839 tablen = 256;
1840 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001841 else if (PyObject_AsCharBuffer(table, &table_chars, &tablen))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001842 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001843
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001844 if (tablen != 256) {
1845 PyErr_SetString(PyExc_ValueError,
1846 "translation table must be 256 characters long");
1847 return NULL;
1848 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001849
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001850 if (deletechars != NULL) {
1851 if (PyBytes_Check(deletechars)) {
1852 del_table_chars = PyBytes_AS_STRING(deletechars);
1853 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001854 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001855 else if (PyObject_AsCharBuffer(deletechars, &del_table_chars, &dellen))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001856 return NULL;
1857 }
1858 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001859 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001860 dellen = 0;
1861 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001862
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001863 inlen = PyBytes_GET_SIZE(input_obj);
1864 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1865 if (result == NULL)
1866 return NULL;
1867 output_start = output = PyBytes_AsString(result);
1868 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001869
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001870 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001871 /* If no deletions are required, use faster code */
1872 for (i = inlen; --i >= 0; ) {
1873 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001874 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001875 changed = 1;
1876 }
1877 if (changed || !PyBytes_CheckExact(input_obj))
1878 return result;
1879 Py_DECREF(result);
1880 Py_INCREF(input_obj);
1881 return input_obj;
1882 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001883
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001884 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001885 for (i = 0; i < 256; i++)
1886 trans_table[i] = Py_CHARMASK(i);
1887 } else {
1888 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001889 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001890 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001891
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001892 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001893 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001894
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001895 for (i = inlen; --i >= 0; ) {
1896 c = Py_CHARMASK(*input++);
1897 if (trans_table[c] != -1)
1898 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1899 continue;
1900 changed = 1;
1901 }
1902 if (!changed && PyBytes_CheckExact(input_obj)) {
1903 Py_DECREF(result);
1904 Py_INCREF(input_obj);
1905 return input_obj;
1906 }
1907 /* Fix the size of the resulting string */
1908 if (inlen > 0)
1909 _PyBytes_Resize(&result, output - output_start);
1910 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001911}
1912
1913
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001914/*[clinic input]
1915
1916@staticmethod
1917bytes.maketrans
1918
1919 frm: object
1920 to: object
1921 /
1922
1923Return a translation table useable for the bytes or bytearray translate method.
1924
1925The returned table will be one where each byte in frm is mapped to the byte at
1926the same position in to.
1927
1928The bytes objects frm and to must be of the same length.
1929[clinic start generated code]*/
1930
1931PyDoc_STRVAR(bytes_maketrans__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001932"maketrans(frm, to, /)\n"
1933"--\n"
1934"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001935"Return a translation table useable for the bytes or bytearray translate method.\n"
1936"\n"
1937"The returned table will be one where each byte in frm is mapped to the byte at\n"
1938"the same position in to.\n"
1939"\n"
1940"The bytes objects frm and to must be of the same length.");
1941
1942#define BYTES_MAKETRANS_METHODDEF \
1943 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC, bytes_maketrans__doc__},
1944
Georg Brandlabc38772009-04-12 15:51:51 +00001945static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001946bytes_maketrans_impl(PyObject *frm, PyObject *to);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001947
1948static PyObject *
1949bytes_maketrans(void *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00001950{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001951 PyObject *return_value = NULL;
1952 PyObject *frm;
1953 PyObject *to;
1954
1955 if (!PyArg_UnpackTuple(args, "maketrans",
1956 2, 2,
1957 &frm, &to))
1958 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02001959 return_value = bytes_maketrans_impl(frm, to);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001960
1961exit:
1962 return return_value;
1963}
1964
1965static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001966bytes_maketrans_impl(PyObject *frm, PyObject *to)
1967/*[clinic end generated code: output=89a3c3556975e466 input=d204f680f85da382]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001968{
1969 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00001970}
1971
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001972/* find and count characters and substrings */
1973
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001974#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001975 ((char *)memchr((const void *)(target), c, target_len))
1976
1977/* String ops must return a string. */
1978/* If the object is subclass of string, create a copy */
1979Py_LOCAL(PyBytesObject *)
1980return_self(PyBytesObject *self)
1981{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001982 if (PyBytes_CheckExact(self)) {
1983 Py_INCREF(self);
1984 return self;
1985 }
1986 return (PyBytesObject *)PyBytes_FromStringAndSize(
1987 PyBytes_AS_STRING(self),
1988 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001989}
1990
1991Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00001992countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001993{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001994 Py_ssize_t count=0;
1995 const char *start=target;
1996 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001997
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001998 while ( (start=findchar(start, end-start, c)) != NULL ) {
1999 count++;
2000 if (count >= maxcount)
2001 break;
2002 start += 1;
2003 }
2004 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002005}
2006
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002007
2008/* Algorithms for different cases of string replacement */
2009
2010/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2011Py_LOCAL(PyBytesObject *)
2012replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002013 const char *to_s, Py_ssize_t to_len,
2014 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002015{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002016 char *self_s, *result_s;
2017 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002018 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002019 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002020
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002021 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002022
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002023 /* 1 at the end plus 1 after every character;
2024 count = min(maxcount, self_len + 1) */
2025 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002026 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002027 else
2028 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2029 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002030
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002031 /* Check for overflow */
2032 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002033 assert(count > 0);
2034 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002035 PyErr_SetString(PyExc_OverflowError,
2036 "replacement bytes are too long");
2037 return NULL;
2038 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002039 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002040
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002041 if (! (result = (PyBytesObject *)
2042 PyBytes_FromStringAndSize(NULL, result_len)) )
2043 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002044
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002045 self_s = PyBytes_AS_STRING(self);
2046 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002047
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002048 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002049
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002050 /* Lay the first one down (guaranteed this will occur) */
2051 Py_MEMCPY(result_s, to_s, to_len);
2052 result_s += to_len;
2053 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002054
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002055 for (i=0; i<count; i++) {
2056 *result_s++ = *self_s++;
2057 Py_MEMCPY(result_s, to_s, to_len);
2058 result_s += to_len;
2059 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002060
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002061 /* Copy the rest of the original string */
2062 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002063
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002064 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002065}
2066
2067/* Special case for deleting a single character */
2068/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2069Py_LOCAL(PyBytesObject *)
2070replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002071 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002072{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002073 char *self_s, *result_s;
2074 char *start, *next, *end;
2075 Py_ssize_t self_len, result_len;
2076 Py_ssize_t count;
2077 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002079 self_len = PyBytes_GET_SIZE(self);
2080 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002082 count = countchar(self_s, self_len, from_c, maxcount);
2083 if (count == 0) {
2084 return return_self(self);
2085 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002086
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002087 result_len = self_len - count; /* from_len == 1 */
2088 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002089
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002090 if ( (result = (PyBytesObject *)
2091 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2092 return NULL;
2093 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002095 start = self_s;
2096 end = self_s + self_len;
2097 while (count-- > 0) {
2098 next = findchar(start, end-start, from_c);
2099 if (next == NULL)
2100 break;
2101 Py_MEMCPY(result_s, start, next-start);
2102 result_s += (next-start);
2103 start = next+1;
2104 }
2105 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002106
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002107 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002108}
2109
2110/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2111
2112Py_LOCAL(PyBytesObject *)
2113replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002114 const char *from_s, Py_ssize_t from_len,
2115 Py_ssize_t maxcount) {
2116 char *self_s, *result_s;
2117 char *start, *next, *end;
2118 Py_ssize_t self_len, result_len;
2119 Py_ssize_t count, offset;
2120 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002121
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002122 self_len = PyBytes_GET_SIZE(self);
2123 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002124
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002125 count = stringlib_count(self_s, self_len,
2126 from_s, from_len,
2127 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002128
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002129 if (count == 0) {
2130 /* no matches */
2131 return return_self(self);
2132 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002134 result_len = self_len - (count * from_len);
2135 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002136
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002137 if ( (result = (PyBytesObject *)
2138 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2139 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002141 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002143 start = self_s;
2144 end = self_s + self_len;
2145 while (count-- > 0) {
2146 offset = stringlib_find(start, end-start,
2147 from_s, from_len,
2148 0);
2149 if (offset == -1)
2150 break;
2151 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002152
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002153 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002154
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002155 result_s += (next-start);
2156 start = next+from_len;
2157 }
2158 Py_MEMCPY(result_s, start, end-start);
2159 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002160}
2161
2162/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2163Py_LOCAL(PyBytesObject *)
2164replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002165 char from_c, char to_c,
2166 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002167{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002168 char *self_s, *result_s, *start, *end, *next;
2169 Py_ssize_t self_len;
2170 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002171
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002172 /* The result string will be the same size */
2173 self_s = PyBytes_AS_STRING(self);
2174 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002175
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002176 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002177
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002178 if (next == NULL) {
2179 /* No matches; return the original string */
2180 return return_self(self);
2181 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002182
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002183 /* Need to make a new string */
2184 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2185 if (result == NULL)
2186 return NULL;
2187 result_s = PyBytes_AS_STRING(result);
2188 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002189
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002190 /* change everything in-place, starting with this one */
2191 start = result_s + (next-self_s);
2192 *start = to_c;
2193 start++;
2194 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002195
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002196 while (--maxcount > 0) {
2197 next = findchar(start, end-start, from_c);
2198 if (next == NULL)
2199 break;
2200 *next = to_c;
2201 start = next+1;
2202 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002203
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002204 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002205}
2206
2207/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2208Py_LOCAL(PyBytesObject *)
2209replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002210 const char *from_s, Py_ssize_t from_len,
2211 const char *to_s, Py_ssize_t to_len,
2212 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002213{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002214 char *result_s, *start, *end;
2215 char *self_s;
2216 Py_ssize_t self_len, offset;
2217 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002218
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002219 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002220
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002221 self_s = PyBytes_AS_STRING(self);
2222 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002223
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002224 offset = stringlib_find(self_s, self_len,
2225 from_s, from_len,
2226 0);
2227 if (offset == -1) {
2228 /* No matches; return the original string */
2229 return return_self(self);
2230 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002231
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002232 /* Need to make a new string */
2233 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2234 if (result == NULL)
2235 return NULL;
2236 result_s = PyBytes_AS_STRING(result);
2237 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002238
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002239 /* change everything in-place, starting with this one */
2240 start = result_s + offset;
2241 Py_MEMCPY(start, to_s, from_len);
2242 start += from_len;
2243 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002244
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002245 while ( --maxcount > 0) {
2246 offset = stringlib_find(start, end-start,
2247 from_s, from_len,
2248 0);
2249 if (offset==-1)
2250 break;
2251 Py_MEMCPY(start+offset, to_s, from_len);
2252 start += offset+from_len;
2253 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002254
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002255 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002256}
2257
2258/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2259Py_LOCAL(PyBytesObject *)
2260replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002261 char from_c,
2262 const char *to_s, Py_ssize_t to_len,
2263 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002264{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002265 char *self_s, *result_s;
2266 char *start, *next, *end;
2267 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002268 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002269 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002270
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002271 self_s = PyBytes_AS_STRING(self);
2272 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002273
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002274 count = countchar(self_s, self_len, from_c, maxcount);
2275 if (count == 0) {
2276 /* no matches, return unchanged */
2277 return return_self(self);
2278 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002279
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002280 /* use the difference between current and new, hence the "-1" */
2281 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002282 assert(count > 0);
2283 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002284 PyErr_SetString(PyExc_OverflowError,
2285 "replacement bytes are too long");
2286 return NULL;
2287 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002288 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002289
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002290 if ( (result = (PyBytesObject *)
2291 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2292 return NULL;
2293 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002294
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002295 start = self_s;
2296 end = self_s + self_len;
2297 while (count-- > 0) {
2298 next = findchar(start, end-start, from_c);
2299 if (next == NULL)
2300 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002301
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002302 if (next == start) {
2303 /* replace with the 'to' */
2304 Py_MEMCPY(result_s, to_s, to_len);
2305 result_s += to_len;
2306 start += 1;
2307 } else {
2308 /* copy the unchanged old then the 'to' */
2309 Py_MEMCPY(result_s, start, next-start);
2310 result_s += (next-start);
2311 Py_MEMCPY(result_s, to_s, to_len);
2312 result_s += to_len;
2313 start = next+1;
2314 }
2315 }
2316 /* Copy the remainder of the remaining string */
2317 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002318
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002319 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002320}
2321
2322/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2323Py_LOCAL(PyBytesObject *)
2324replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002325 const char *from_s, Py_ssize_t from_len,
2326 const char *to_s, Py_ssize_t to_len,
2327 Py_ssize_t maxcount) {
2328 char *self_s, *result_s;
2329 char *start, *next, *end;
2330 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002331 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002332 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002333
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002334 self_s = PyBytes_AS_STRING(self);
2335 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002336
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002337 count = stringlib_count(self_s, self_len,
2338 from_s, from_len,
2339 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002340
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002341 if (count == 0) {
2342 /* no matches, return unchanged */
2343 return return_self(self);
2344 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002345
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002346 /* Check for overflow */
2347 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002348 assert(count > 0);
2349 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002350 PyErr_SetString(PyExc_OverflowError,
2351 "replacement bytes are too long");
2352 return NULL;
2353 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002354 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002355
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002356 if ( (result = (PyBytesObject *)
2357 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2358 return NULL;
2359 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002360
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002361 start = self_s;
2362 end = self_s + self_len;
2363 while (count-- > 0) {
2364 offset = stringlib_find(start, end-start,
2365 from_s, from_len,
2366 0);
2367 if (offset == -1)
2368 break;
2369 next = start+offset;
2370 if (next == start) {
2371 /* replace with the 'to' */
2372 Py_MEMCPY(result_s, to_s, to_len);
2373 result_s += to_len;
2374 start += from_len;
2375 } else {
2376 /* copy the unchanged old then the 'to' */
2377 Py_MEMCPY(result_s, start, next-start);
2378 result_s += (next-start);
2379 Py_MEMCPY(result_s, to_s, to_len);
2380 result_s += to_len;
2381 start = next+from_len;
2382 }
2383 }
2384 /* Copy the remainder of the remaining string */
2385 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002386
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002387 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002388}
2389
2390
2391Py_LOCAL(PyBytesObject *)
2392replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002393 const char *from_s, Py_ssize_t from_len,
2394 const char *to_s, Py_ssize_t to_len,
2395 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002396{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002397 if (maxcount < 0) {
2398 maxcount = PY_SSIZE_T_MAX;
2399 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2400 /* nothing to do; return the original string */
2401 return return_self(self);
2402 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002403
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002404 if (maxcount == 0 ||
2405 (from_len == 0 && to_len == 0)) {
2406 /* nothing to do; return the original string */
2407 return return_self(self);
2408 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002409
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002410 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002411
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002412 if (from_len == 0) {
2413 /* insert the 'to' string everywhere. */
2414 /* >>> "Python".replace("", ".") */
2415 /* '.P.y.t.h.o.n.' */
2416 return replace_interleave(self, to_s, to_len, maxcount);
2417 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002418
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002419 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2420 /* point for an empty self string to generate a non-empty string */
2421 /* Special case so the remaining code always gets a non-empty string */
2422 if (PyBytes_GET_SIZE(self) == 0) {
2423 return return_self(self);
2424 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002425
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002426 if (to_len == 0) {
2427 /* delete all occurrences of 'from' string */
2428 if (from_len == 1) {
2429 return replace_delete_single_character(
2430 self, from_s[0], maxcount);
2431 } else {
2432 return replace_delete_substring(self, from_s,
2433 from_len, maxcount);
2434 }
2435 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002436
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002437 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002438
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002439 if (from_len == to_len) {
2440 if (from_len == 1) {
2441 return replace_single_character_in_place(
2442 self,
2443 from_s[0],
2444 to_s[0],
2445 maxcount);
2446 } else {
2447 return replace_substring_in_place(
2448 self, from_s, from_len, to_s, to_len,
2449 maxcount);
2450 }
2451 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002452
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002453 /* Otherwise use the more generic algorithms */
2454 if (from_len == 1) {
2455 return replace_single_character(self, from_s[0],
2456 to_s, to_len, maxcount);
2457 } else {
2458 /* len('from')>=2, len('to')>=1 */
2459 return replace_substring(self, from_s, from_len, to_s, to_len,
2460 maxcount);
2461 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002462}
2463
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002464
2465/*[clinic input]
2466bytes.replace
2467
2468 old: object
2469 new: object
2470 count: Py_ssize_t = -1
2471 Maximum number of occurrences to replace.
2472 -1 (the default value) means replace all occurrences.
2473 /
2474
2475Return a copy with all occurrences of substring old replaced by new.
2476
2477If the optional argument count is given, only the first count occurrences are
2478replaced.
2479[clinic start generated code]*/
2480
2481PyDoc_STRVAR(bytes_replace__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002482"replace($self, old, new, count=-1, /)\n"
2483"--\n"
2484"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002485"Return a copy with all occurrences of substring old replaced by new.\n"
2486"\n"
2487" count\n"
2488" Maximum number of occurrences to replace.\n"
2489" -1 (the default value) means replace all occurrences.\n"
2490"\n"
2491"If the optional argument count is given, only the first count occurrences are\n"
2492"replaced.");
2493
2494#define BYTES_REPLACE_METHODDEF \
2495 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, bytes_replace__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002496
2497static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002498bytes_replace_impl(PyBytesObject*self, PyObject *old, PyObject *new, Py_ssize_t count);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002499
2500static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002501bytes_replace(PyBytesObject*self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002502{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002503 PyObject *return_value = NULL;
2504 PyObject *old;
2505 PyObject *new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002506 Py_ssize_t count = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002507
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002508 if (!PyArg_ParseTuple(args,
2509 "OO|n:replace",
2510 &old, &new, &count))
2511 goto exit;
2512 return_value = bytes_replace_impl(self, old, new, count);
2513
2514exit:
2515 return return_value;
2516}
2517
2518static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002519bytes_replace_impl(PyBytesObject*self, PyObject *old, PyObject *new, Py_ssize_t count)
2520/*[clinic end generated code: output=14ce72f4f9cb91cf input=d3ac254ea50f4ac1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002521{
2522 const char *old_s, *new_s;
2523 Py_ssize_t old_len, new_len;
2524
2525 if (PyBytes_Check(old)) {
2526 old_s = PyBytes_AS_STRING(old);
2527 old_len = PyBytes_GET_SIZE(old);
2528 }
2529 else if (PyObject_AsCharBuffer(old, &old_s, &old_len))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002530 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002531
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002532 if (PyBytes_Check(new)) {
2533 new_s = PyBytes_AS_STRING(new);
2534 new_len = PyBytes_GET_SIZE(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002535 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002536 else if (PyObject_AsCharBuffer(new, &new_s, &new_len))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002537 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002538
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002539 return (PyObject *)replace((PyBytesObject *) self,
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002540 old_s, old_len,
2541 new_s, new_len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002542}
2543
2544/** End DALKE **/
2545
2546/* Matches the end (direction >= 0) or start (direction < 0) of self
2547 * against substr, using the start and end arguments. Returns
2548 * -1 on error, 0 if not found and 1 if found.
2549 */
2550Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002551_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002552 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002553{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002554 Py_ssize_t len = PyBytes_GET_SIZE(self);
2555 Py_ssize_t slen;
2556 const char* sub;
2557 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002558
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002559 if (PyBytes_Check(substr)) {
2560 sub = PyBytes_AS_STRING(substr);
2561 slen = PyBytes_GET_SIZE(substr);
2562 }
2563 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2564 return -1;
2565 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002566
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002567 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002568
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002569 if (direction < 0) {
2570 /* startswith */
2571 if (start+slen > len)
2572 return 0;
2573 } else {
2574 /* endswith */
2575 if (end-start < slen || start > len)
2576 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002577
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002578 if (end-slen > start)
2579 start = end - slen;
2580 }
2581 if (end-start >= slen)
2582 return ! memcmp(str+start, sub, slen);
2583 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002584}
2585
2586
2587PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002588"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002589\n\
2590Return True if B starts with the specified prefix, False otherwise.\n\
2591With optional start, test B beginning at that position.\n\
2592With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002593prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002594
2595static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002596bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002597{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002598 Py_ssize_t start = 0;
2599 Py_ssize_t end = PY_SSIZE_T_MAX;
2600 PyObject *subobj;
2601 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002602
Jesus Ceaac451502011-04-20 17:09:23 +02002603 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002604 return NULL;
2605 if (PyTuple_Check(subobj)) {
2606 Py_ssize_t i;
2607 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2608 result = _bytes_tailmatch(self,
2609 PyTuple_GET_ITEM(subobj, i),
2610 start, end, -1);
2611 if (result == -1)
2612 return NULL;
2613 else if (result) {
2614 Py_RETURN_TRUE;
2615 }
2616 }
2617 Py_RETURN_FALSE;
2618 }
2619 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002620 if (result == -1) {
2621 if (PyErr_ExceptionMatches(PyExc_TypeError))
2622 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2623 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002624 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002625 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002626 else
2627 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002628}
2629
2630
2631PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002632"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002633\n\
2634Return True if B ends with the specified suffix, False otherwise.\n\
2635With optional start, test B beginning at that position.\n\
2636With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002637suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002638
2639static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002640bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002641{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002642 Py_ssize_t start = 0;
2643 Py_ssize_t end = PY_SSIZE_T_MAX;
2644 PyObject *subobj;
2645 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002646
Jesus Ceaac451502011-04-20 17:09:23 +02002647 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002648 return NULL;
2649 if (PyTuple_Check(subobj)) {
2650 Py_ssize_t i;
2651 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2652 result = _bytes_tailmatch(self,
2653 PyTuple_GET_ITEM(subobj, i),
2654 start, end, +1);
2655 if (result == -1)
2656 return NULL;
2657 else if (result) {
2658 Py_RETURN_TRUE;
2659 }
2660 }
2661 Py_RETURN_FALSE;
2662 }
2663 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002664 if (result == -1) {
2665 if (PyErr_ExceptionMatches(PyExc_TypeError))
2666 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2667 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002668 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002669 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002670 else
2671 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002672}
2673
2674
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002675/*[clinic input]
2676bytes.decode
2677
2678 encoding: str(c_default="NULL") = 'utf-8'
2679 The encoding with which to decode the bytes.
2680 errors: str(c_default="NULL") = 'strict'
2681 The error handling scheme to use for the handling of decoding errors.
2682 The default is 'strict' meaning that decoding errors raise a
2683 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2684 as well as any other name registered with codecs.register_error that
2685 can handle UnicodeDecodeErrors.
2686
2687Decode the bytes using the codec registered for encoding.
2688[clinic start generated code]*/
2689
2690PyDoc_STRVAR(bytes_decode__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002691"decode($self, /, encoding=\'utf-8\', errors=\'strict\')\n"
2692"--\n"
2693"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002694"Decode the bytes using the codec registered for encoding.\n"
2695"\n"
2696" encoding\n"
2697" The encoding with which to decode the bytes.\n"
2698" errors\n"
2699" The error handling scheme to use for the handling of decoding errors.\n"
2700" The default is \'strict\' meaning that decoding errors raise a\n"
2701" UnicodeDecodeError. Other possible values are \'ignore\' and \'replace\'\n"
2702" as well as any other name registered with codecs.register_error that\n"
2703" can handle UnicodeDecodeErrors.");
2704
2705#define BYTES_DECODE_METHODDEF \
2706 {"decode", (PyCFunction)bytes_decode, METH_VARARGS|METH_KEYWORDS, bytes_decode__doc__},
2707
2708static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002709bytes_decode_impl(PyBytesObject*self, const char *encoding, const char *errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002710
2711static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002712bytes_decode(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002713{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002714 PyObject *return_value = NULL;
2715 static char *_keywords[] = {"encoding", "errors", NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002716 const char *encoding = NULL;
2717 const char *errors = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +00002718
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002719 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
2720 "|ss:decode", _keywords,
2721 &encoding, &errors))
2722 goto exit;
2723 return_value = bytes_decode_impl(self, encoding, errors);
2724
2725exit:
2726 return return_value;
2727}
2728
2729static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002730bytes_decode_impl(PyBytesObject*self, const char *encoding, const char *errors)
2731/*[clinic end generated code: output=61a80290bbfce696 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002732{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002733 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002734}
2735
Guido van Rossum20188312006-05-05 15:15:40 +00002736
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002737/*[clinic input]
2738bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002739
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002740 keepends: int(py_default="False") = 0
2741
2742Return a list of the lines in the bytes, breaking at line boundaries.
2743
2744Line breaks are not included in the resulting list unless keepends is given and
2745true.
2746[clinic start generated code]*/
2747
2748PyDoc_STRVAR(bytes_splitlines__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002749"splitlines($self, /, keepends=False)\n"
2750"--\n"
2751"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002752"Return a list of the lines in the bytes, breaking at line boundaries.\n"
2753"\n"
2754"Line breaks are not included in the resulting list unless keepends is given and\n"
2755"true.");
2756
2757#define BYTES_SPLITLINES_METHODDEF \
2758 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS|METH_KEYWORDS, bytes_splitlines__doc__},
2759
2760static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002761bytes_splitlines_impl(PyBytesObject*self, int keepends);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002762
2763static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002764bytes_splitlines(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002765{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002766 PyObject *return_value = NULL;
2767 static char *_keywords[] = {"keepends", NULL};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002768 int keepends = 0;
2769
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002770 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
2771 "|i:splitlines", _keywords,
2772 &keepends))
2773 goto exit;
2774 return_value = bytes_splitlines_impl(self, keepends);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002775
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002776exit:
2777 return return_value;
2778}
2779
2780static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002781bytes_splitlines_impl(PyBytesObject*self, int keepends)
2782/*[clinic end generated code: output=79da057d05d126de input=ddb93e3351080c8c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002783{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002784 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002785 (PyObject*) self, PyBytes_AS_STRING(self),
2786 PyBytes_GET_SIZE(self), keepends
2787 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002788}
2789
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002790static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002791hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002792{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002793 if (c >= 128)
2794 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002795 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002796 return c - '0';
2797 else {
David Malcolm96960882010-11-05 17:23:41 +00002798 if (Py_ISUPPER(c))
2799 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002800 if (c >= 'a' && c <= 'f')
2801 return c - 'a' + 10;
2802 }
2803 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002804}
2805
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002806/*[clinic input]
2807@classmethod
2808bytes.fromhex
2809
2810 string: unicode
2811 /
2812
2813Create a bytes object from a string of hexadecimal numbers.
2814
2815Spaces between two numbers are accepted.
2816Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2817[clinic start generated code]*/
2818
2819PyDoc_STRVAR(bytes_fromhex__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002820"fromhex($type, string, /)\n"
2821"--\n"
2822"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002823"Create a bytes object from a string of hexadecimal numbers.\n"
2824"\n"
2825"Spaces between two numbers are accepted.\n"
Martin v. Löwis0efea322014-07-27 17:29:17 +02002826"Example: bytes.fromhex(\'B9 01EF\') -> b\'\\\\xb9\\\\x01\\\\xef\'.");
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002827
2828#define BYTES_FROMHEX_METHODDEF \
2829 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS, bytes_fromhex__doc__},
2830
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002831static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002832bytes_fromhex_impl(PyTypeObject *type, PyObject *string);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002833
2834static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002835bytes_fromhex(PyTypeObject *type, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002836{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002837 PyObject *return_value = NULL;
2838 PyObject *string;
2839
2840 if (!PyArg_ParseTuple(args,
2841 "U:fromhex",
2842 &string))
2843 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02002844 return_value = bytes_fromhex_impl(type, string);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002845
2846exit:
2847 return return_value;
2848}
2849
2850static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002851bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2852/*[clinic end generated code: output=09e6cbef56cbbb65 input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002853{
2854 PyObject *newstring;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002855 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002856 Py_ssize_t hexlen, byteslen, i, j;
2857 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002858 void *data;
2859 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002860
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002861 assert(PyUnicode_Check(string));
2862 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002863 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002864 kind = PyUnicode_KIND(string);
2865 data = PyUnicode_DATA(string);
2866 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002867
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002868 byteslen = hexlen/2; /* This overestimates if there are spaces */
2869 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2870 if (!newstring)
2871 return NULL;
2872 buf = PyBytes_AS_STRING(newstring);
2873 for (i = j = 0; i < hexlen; i += 2) {
2874 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002875 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002876 i++;
2877 if (i >= hexlen)
2878 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002879 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
2880 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002881 if (top == -1 || bot == -1) {
2882 PyErr_Format(PyExc_ValueError,
2883 "non-hexadecimal number found in "
2884 "fromhex() arg at position %zd", i);
2885 goto error;
2886 }
2887 buf[j++] = (top << 4) + bot;
2888 }
2889 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2890 goto error;
2891 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002892
2893 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002894 Py_XDECREF(newstring);
2895 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002896}
2897
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002898/*[clinic input]
2899bytes.__sizeof__ as bytes_sizeof
2900
2901 self: self(type="PyBytesObject *")
2902
2903Returns the size of the bytes object in memory, in bytes.
2904[clinic start generated code]*/
2905
2906PyDoc_STRVAR(bytes_sizeof__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002907"__sizeof__($self, /)\n"
2908"--\n"
2909"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002910"Returns the size of the bytes object in memory, in bytes.");
2911
2912#define BYTES_SIZEOF_METHODDEF \
2913 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS, bytes_sizeof__doc__},
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002914
2915static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002916bytes_sizeof_impl(PyBytesObject *self);
2917
2918static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002919bytes_sizeof(PyBytesObject *self, PyObject *Py_UNUSED(ignored))
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002920{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002921 return bytes_sizeof_impl(self);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002922}
2923
2924static PyObject *
2925bytes_sizeof_impl(PyBytesObject *self)
Martin v. Löwis0efea322014-07-27 17:29:17 +02002926/*[clinic end generated code: output=44933279343f24ae input=bee4c64bb42078ed]*/
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002927{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002928 Py_ssize_t res;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002929 res = PyBytesObject_SIZE + Py_SIZE(self) * Py_TYPE(self)->tp_itemsize;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002930 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00002931}
2932
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002933
2934static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002935bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002936{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002937 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002938}
2939
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002940
2941static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002942bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002943 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2944 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2945 _Py_capitalize__doc__},
2946 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2947 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002948 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002949 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2950 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02002951 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002952 expandtabs__doc__},
2953 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002954 BYTES_FROMHEX_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002955 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2956 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2957 _Py_isalnum__doc__},
2958 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2959 _Py_isalpha__doc__},
2960 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2961 _Py_isdigit__doc__},
2962 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2963 _Py_islower__doc__},
2964 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2965 _Py_isspace__doc__},
2966 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2967 _Py_istitle__doc__},
2968 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2969 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002970 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002971 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2972 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002973 BYTES_LSTRIP_METHODDEF
2974 BYTES_MAKETRANS_METHODDEF
2975 BYTES_PARTITION_METHODDEF
2976 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002977 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2978 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2979 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002980 BYTES_RPARTITION_METHODDEF
2981 BYTES_RSPLIT_METHODDEF
2982 BYTES_RSTRIP_METHODDEF
2983 BYTES_SPLIT_METHODDEF
2984 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002985 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2986 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002987 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002988 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2989 _Py_swapcase__doc__},
2990 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002991 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002992 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2993 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002994 BYTES_SIZEOF_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002995 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002996};
2997
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002998static PyObject *
2999str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3000
3001static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003002bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003003{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003004 PyObject *x = NULL;
3005 const char *encoding = NULL;
3006 const char *errors = NULL;
3007 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003008 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003009 Py_ssize_t size;
3010 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003011 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003012
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003013 if (type != &PyBytes_Type)
3014 return str_subtype_new(type, args, kwds);
3015 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3016 &encoding, &errors))
3017 return NULL;
3018 if (x == NULL) {
3019 if (encoding != NULL || errors != NULL) {
3020 PyErr_SetString(PyExc_TypeError,
3021 "encoding or errors without sequence "
3022 "argument");
3023 return NULL;
3024 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003025 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003026 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003027
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003028 if (PyUnicode_Check(x)) {
3029 /* Encode via the codec registry */
3030 if (encoding == NULL) {
3031 PyErr_SetString(PyExc_TypeError,
3032 "string argument without an encoding");
3033 return NULL;
3034 }
3035 new = PyUnicode_AsEncodedString(x, encoding, errors);
3036 if (new == NULL)
3037 return NULL;
3038 assert(PyBytes_Check(new));
3039 return new;
3040 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003041
3042 /* We'd like to call PyObject_Bytes here, but we need to check for an
3043 integer argument before deferring to PyBytes_FromObject, something
3044 PyObject_Bytes doesn't do. */
3045 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3046 if (func != NULL) {
3047 new = PyObject_CallFunctionObjArgs(func, NULL);
3048 Py_DECREF(func);
3049 if (new == NULL)
3050 return NULL;
3051 if (!PyBytes_Check(new)) {
3052 PyErr_Format(PyExc_TypeError,
3053 "__bytes__ returned non-bytes (type %.200s)",
3054 Py_TYPE(new)->tp_name);
3055 Py_DECREF(new);
3056 return NULL;
3057 }
3058 return new;
3059 }
3060 else if (PyErr_Occurred())
3061 return NULL;
3062
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003063 /* Is it an integer? */
3064 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3065 if (size == -1 && PyErr_Occurred()) {
3066 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3067 return NULL;
3068 PyErr_Clear();
3069 }
3070 else if (size < 0) {
3071 PyErr_SetString(PyExc_ValueError, "negative count");
3072 return NULL;
3073 }
3074 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003075 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003076 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003077 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003078 return new;
3079 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003080
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003081 /* If it's not unicode, there can't be encoding or errors */
3082 if (encoding != NULL || errors != NULL) {
3083 PyErr_SetString(PyExc_TypeError,
3084 "encoding or errors without a string argument");
3085 return NULL;
3086 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003087
3088 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003089}
3090
3091PyObject *
3092PyBytes_FromObject(PyObject *x)
3093{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003094 PyObject *new, *it;
3095 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003097 if (x == NULL) {
3098 PyErr_BadInternalCall();
3099 return NULL;
3100 }
Larry Hastingsca28e992012-05-24 22:58:30 -07003101
3102 if (PyBytes_CheckExact(x)) {
3103 Py_INCREF(x);
3104 return x;
3105 }
3106
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003107 /* Use the modern buffer interface */
3108 if (PyObject_CheckBuffer(x)) {
3109 Py_buffer view;
3110 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3111 return NULL;
3112 new = PyBytes_FromStringAndSize(NULL, view.len);
3113 if (!new)
3114 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003115 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3116 &view, view.len, 'C') < 0)
3117 goto fail;
3118 PyBuffer_Release(&view);
3119 return new;
3120 fail:
3121 Py_XDECREF(new);
3122 PyBuffer_Release(&view);
3123 return NULL;
3124 }
3125 if (PyUnicode_Check(x)) {
3126 PyErr_SetString(PyExc_TypeError,
3127 "cannot convert unicode object to bytes");
3128 return NULL;
3129 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003130
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003131 if (PyList_CheckExact(x)) {
3132 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3133 if (new == NULL)
3134 return NULL;
3135 for (i = 0; i < Py_SIZE(x); i++) {
3136 Py_ssize_t value = PyNumber_AsSsize_t(
3137 PyList_GET_ITEM(x, i), PyExc_ValueError);
3138 if (value == -1 && PyErr_Occurred()) {
3139 Py_DECREF(new);
3140 return NULL;
3141 }
3142 if (value < 0 || value >= 256) {
3143 PyErr_SetString(PyExc_ValueError,
3144 "bytes must be in range(0, 256)");
3145 Py_DECREF(new);
3146 return NULL;
3147 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003148 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003149 }
3150 return new;
3151 }
3152 if (PyTuple_CheckExact(x)) {
3153 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3154 if (new == NULL)
3155 return NULL;
3156 for (i = 0; i < Py_SIZE(x); i++) {
3157 Py_ssize_t value = PyNumber_AsSsize_t(
3158 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
3159 if (value == -1 && PyErr_Occurred()) {
3160 Py_DECREF(new);
3161 return NULL;
3162 }
3163 if (value < 0 || value >= 256) {
3164 PyErr_SetString(PyExc_ValueError,
3165 "bytes must be in range(0, 256)");
3166 Py_DECREF(new);
3167 return NULL;
3168 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003169 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003170 }
3171 return new;
3172 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00003173
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003174 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003175 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003176 if (size == -1 && PyErr_Occurred())
3177 return NULL;
3178 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
3179 returning a shared empty bytes string. This required because we
3180 want to call _PyBytes_Resize() the returned object, which we can
3181 only do on bytes objects with refcount == 1. */
Victor Stinner88d146b2014-08-17 21:12:18 +02003182 if (size == 0)
3183 size = 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003184 new = PyBytes_FromStringAndSize(NULL, size);
3185 if (new == NULL)
3186 return NULL;
Victor Stinner88d146b2014-08-17 21:12:18 +02003187 assert(Py_REFCNT(new) == 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003188
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003189 /* Get the iterator */
3190 it = PyObject_GetIter(x);
3191 if (it == NULL)
3192 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003193
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003194 /* Run the iterator to exhaustion */
3195 for (i = 0; ; i++) {
3196 PyObject *item;
3197 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003198
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003199 /* Get the next item */
3200 item = PyIter_Next(it);
3201 if (item == NULL) {
3202 if (PyErr_Occurred())
3203 goto error;
3204 break;
3205 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003206
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003207 /* Interpret it as an int (__index__) */
3208 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3209 Py_DECREF(item);
3210 if (value == -1 && PyErr_Occurred())
3211 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003212
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003213 /* Range check */
3214 if (value < 0 || value >= 256) {
3215 PyErr_SetString(PyExc_ValueError,
3216 "bytes must be in range(0, 256)");
3217 goto error;
3218 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003219
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003220 /* Append the byte */
3221 if (i >= size) {
3222 size = 2 * size + 1;
3223 if (_PyBytes_Resize(&new, size) < 0)
3224 goto error;
3225 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003226 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003227 }
3228 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003229
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003230 /* Clean up and return success */
3231 Py_DECREF(it);
3232 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003233
3234 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003235 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01003236 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003237 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003238}
3239
3240static PyObject *
3241str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3242{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003243 PyObject *tmp, *pnew;
3244 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003245
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003246 assert(PyType_IsSubtype(type, &PyBytes_Type));
3247 tmp = bytes_new(&PyBytes_Type, args, kwds);
3248 if (tmp == NULL)
3249 return NULL;
3250 assert(PyBytes_CheckExact(tmp));
3251 n = PyBytes_GET_SIZE(tmp);
3252 pnew = type->tp_alloc(type, n);
3253 if (pnew != NULL) {
3254 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3255 PyBytes_AS_STRING(tmp), n+1);
3256 ((PyBytesObject *)pnew)->ob_shash =
3257 ((PyBytesObject *)tmp)->ob_shash;
3258 }
3259 Py_DECREF(tmp);
3260 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003261}
3262
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003263PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003264"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003265bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003266bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003267bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3268bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003269\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003270Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003271 - an iterable yielding integers in range(256)\n\
3272 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003273 - any object implementing the buffer API.\n\
3274 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003275
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003276static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003277
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003278PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003279 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3280 "bytes",
3281 PyBytesObject_SIZE,
3282 sizeof(char),
3283 bytes_dealloc, /* tp_dealloc */
3284 0, /* tp_print */
3285 0, /* tp_getattr */
3286 0, /* tp_setattr */
3287 0, /* tp_reserved */
3288 (reprfunc)bytes_repr, /* tp_repr */
3289 0, /* tp_as_number */
3290 &bytes_as_sequence, /* tp_as_sequence */
3291 &bytes_as_mapping, /* tp_as_mapping */
3292 (hashfunc)bytes_hash, /* tp_hash */
3293 0, /* tp_call */
3294 bytes_str, /* tp_str */
3295 PyObject_GenericGetAttr, /* tp_getattro */
3296 0, /* tp_setattro */
3297 &bytes_as_buffer, /* tp_as_buffer */
3298 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3299 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3300 bytes_doc, /* tp_doc */
3301 0, /* tp_traverse */
3302 0, /* tp_clear */
3303 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3304 0, /* tp_weaklistoffset */
3305 bytes_iter, /* tp_iter */
3306 0, /* tp_iternext */
3307 bytes_methods, /* tp_methods */
3308 0, /* tp_members */
3309 0, /* tp_getset */
3310 &PyBaseObject_Type, /* tp_base */
3311 0, /* tp_dict */
3312 0, /* tp_descr_get */
3313 0, /* tp_descr_set */
3314 0, /* tp_dictoffset */
3315 0, /* tp_init */
3316 0, /* tp_alloc */
3317 bytes_new, /* tp_new */
3318 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003319};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003320
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003321void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003322PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003323{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003324 assert(pv != NULL);
3325 if (*pv == NULL)
3326 return;
3327 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003328 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003329 return;
3330 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003331
3332 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3333 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003334 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003335 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02003336
Antoine Pitrou161d6952014-05-01 14:36:20 +02003337 wb.len = -1;
3338 if (_getbuffer(w, &wb) < 0) {
3339 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3340 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3341 Py_CLEAR(*pv);
3342 return;
3343 }
3344
3345 oldsize = PyBytes_GET_SIZE(*pv);
3346 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3347 PyErr_NoMemory();
3348 goto error;
3349 }
3350 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3351 goto error;
3352
3353 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3354 PyBuffer_Release(&wb);
3355 return;
3356
3357 error:
3358 PyBuffer_Release(&wb);
3359 Py_CLEAR(*pv);
3360 return;
3361 }
3362
3363 else {
3364 /* Multiple references, need to create new object */
3365 PyObject *v;
3366 v = bytes_concat(*pv, w);
3367 Py_DECREF(*pv);
3368 *pv = v;
3369 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003370}
3371
3372void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003373PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003374{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003375 PyBytes_Concat(pv, w);
3376 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003377}
3378
3379
3380/* The following function breaks the notion that strings are immutable:
3381 it changes the size of a string. We get away with this only if there
3382 is only one module referencing the object. You can also think of it
3383 as creating a new string object and destroying the old one, only
3384 more efficiently. In any case, don't use this if the string may
3385 already be known to some other part of the code...
3386 Note that if there's not enough memory to resize the string, the original
3387 string object at *pv is deallocated, *pv is set to NULL, an "out of
3388 memory" exception is set, and -1 is returned. Else (on success) 0 is
3389 returned, and the value in *pv may or may not be the same as on input.
3390 As always, an extra byte is allocated for a trailing \0 byte (newsize
3391 does *not* include that), and a trailing \0 byte is stored.
3392*/
3393
3394int
3395_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3396{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003397 PyObject *v;
3398 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003399 v = *pv;
3400 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3401 *pv = 0;
3402 Py_DECREF(v);
3403 PyErr_BadInternalCall();
3404 return -1;
3405 }
3406 /* XXX UNREF/NEWREF interface should be more symmetrical */
3407 _Py_DEC_REFTOTAL;
3408 _Py_ForgetReference(v);
3409 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003410 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003411 if (*pv == NULL) {
3412 PyObject_Del(v);
3413 PyErr_NoMemory();
3414 return -1;
3415 }
3416 _Py_NewReference(*pv);
3417 sv = (PyBytesObject *) *pv;
3418 Py_SIZE(sv) = newsize;
3419 sv->ob_sval[newsize] = '\0';
3420 sv->ob_shash = -1; /* invalidate cached hash value */
3421 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003422}
3423
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003424void
3425PyBytes_Fini(void)
3426{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003427 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003428 for (i = 0; i < UCHAR_MAX + 1; i++)
3429 Py_CLEAR(characters[i]);
3430 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003431}
3432
Benjamin Peterson4116f362008-05-27 00:36:20 +00003433/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003434
3435typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003436 PyObject_HEAD
3437 Py_ssize_t it_index;
3438 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003439} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003440
3441static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003442striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003443{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003444 _PyObject_GC_UNTRACK(it);
3445 Py_XDECREF(it->it_seq);
3446 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003447}
3448
3449static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003450striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003451{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003452 Py_VISIT(it->it_seq);
3453 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003454}
3455
3456static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003457striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003458{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003459 PyBytesObject *seq;
3460 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003461
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003462 assert(it != NULL);
3463 seq = it->it_seq;
3464 if (seq == NULL)
3465 return NULL;
3466 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003467
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003468 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3469 item = PyLong_FromLong(
3470 (unsigned char)seq->ob_sval[it->it_index]);
3471 if (item != NULL)
3472 ++it->it_index;
3473 return item;
3474 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003475
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003476 Py_DECREF(seq);
3477 it->it_seq = NULL;
3478 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003479}
3480
3481static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003482striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003483{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003484 Py_ssize_t len = 0;
3485 if (it->it_seq)
3486 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3487 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003488}
3489
3490PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003491 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003492
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003493static PyObject *
3494striter_reduce(striterobject *it)
3495{
3496 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003497 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003498 it->it_seq, it->it_index);
3499 } else {
3500 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3501 if (u == NULL)
3502 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02003503 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003504 }
3505}
3506
3507PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3508
3509static PyObject *
3510striter_setstate(striterobject *it, PyObject *state)
3511{
3512 Py_ssize_t index = PyLong_AsSsize_t(state);
3513 if (index == -1 && PyErr_Occurred())
3514 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003515 if (it->it_seq != NULL) {
3516 if (index < 0)
3517 index = 0;
3518 else if (index > PyBytes_GET_SIZE(it->it_seq))
3519 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3520 it->it_index = index;
3521 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003522 Py_RETURN_NONE;
3523}
3524
3525PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3526
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003527static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003528 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3529 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003530 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3531 reduce_doc},
3532 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3533 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003534 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003535};
3536
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003537PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003538 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3539 "bytes_iterator", /* tp_name */
3540 sizeof(striterobject), /* tp_basicsize */
3541 0, /* tp_itemsize */
3542 /* methods */
3543 (destructor)striter_dealloc, /* tp_dealloc */
3544 0, /* tp_print */
3545 0, /* tp_getattr */
3546 0, /* tp_setattr */
3547 0, /* tp_reserved */
3548 0, /* tp_repr */
3549 0, /* tp_as_number */
3550 0, /* tp_as_sequence */
3551 0, /* tp_as_mapping */
3552 0, /* tp_hash */
3553 0, /* tp_call */
3554 0, /* tp_str */
3555 PyObject_GenericGetAttr, /* tp_getattro */
3556 0, /* tp_setattro */
3557 0, /* tp_as_buffer */
3558 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3559 0, /* tp_doc */
3560 (traverseproc)striter_traverse, /* tp_traverse */
3561 0, /* tp_clear */
3562 0, /* tp_richcompare */
3563 0, /* tp_weaklistoffset */
3564 PyObject_SelfIter, /* tp_iter */
3565 (iternextfunc)striter_next, /* tp_iternext */
3566 striter_methods, /* tp_methods */
3567 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003568};
3569
3570static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003571bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003572{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003573 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003574
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003575 if (!PyBytes_Check(seq)) {
3576 PyErr_BadInternalCall();
3577 return NULL;
3578 }
3579 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3580 if (it == NULL)
3581 return NULL;
3582 it->it_index = 0;
3583 Py_INCREF(seq);
3584 it->it_seq = (PyBytesObject *)seq;
3585 _PyObject_GC_TRACK(it);
3586 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003587}