blob: 30b0e50534412e00ff8cd81b055acb49f5b24df5 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020010/*[clinic input]
Martin v. Löwis0efea322014-07-27 17:29:17 +020011class bytes "PyBytesObject*" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020012[clinic start generated code]*/
Martin v. Löwis0efea322014-07-27 17:29:17 +020013/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020014
Christian Heimes2c9c7a52008-05-26 13:42:13 +000015#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000016Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000017#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000018
Christian Heimes2c9c7a52008-05-26 13:42:13 +000019static PyBytesObject *characters[UCHAR_MAX + 1];
20static PyBytesObject *nullstring;
21
Mark Dickinsonfd24b322008-12-06 15:33:31 +000022/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
23 for a string of length n should request PyBytesObject_SIZE + n bytes.
24
25 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
26 3 bytes per string allocation on a typical system.
27*/
28#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
29
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000031 For PyBytes_FromString(), the parameter `str' points to a null-terminated
32 string containing exactly `size' bytes.
33
34 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000042 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043 alter the data yourself, since the strings may be shared.
44
45 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020047 allocated for string data, not counting the null terminating character.
48 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000049 PyBytes_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyBytes_FromString()).
51*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020052static PyObject *
53_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000054{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020055 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020056 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020057
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000058 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000059#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000060 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000061#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000062 Py_INCREF(op);
63 return (PyObject *)op;
64 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000065
Victor Stinner049e5092014-08-17 22:20:00 +020066 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 PyErr_SetString(PyExc_OverflowError,
68 "byte string is too large");
69 return NULL;
70 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020073 if (use_calloc)
74 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
75 else
76 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 if (op == NULL)
78 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010079 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020081 if (!use_calloc)
82 op->ob_sval[size] = '\0';
83 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 if (size == 0) {
85 nullstring = op;
86 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020087 }
88 return (PyObject *) op;
89}
90
91PyObject *
92PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
93{
94 PyBytesObject *op;
95 if (size < 0) {
96 PyErr_SetString(PyExc_SystemError,
97 "Negative size passed to PyBytes_FromStringAndSize");
98 return NULL;
99 }
100 if (size == 1 && str != NULL &&
101 (op = characters[*str & UCHAR_MAX]) != NULL)
102 {
103#ifdef COUNT_ALLOCS
104 one_strings++;
105#endif
106 Py_INCREF(op);
107 return (PyObject *)op;
108 }
109
110 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
111 if (op == NULL)
112 return NULL;
113 if (str == NULL)
114 return (PyObject *) op;
115
116 Py_MEMCPY(op->ob_sval, str, size);
117 /* share short strings */
118 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 characters[*str & UCHAR_MAX] = op;
120 Py_INCREF(op);
121 }
122 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000123}
124
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000125PyObject *
126PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000127{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200128 size_t size;
129 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000130
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000131 assert(str != NULL);
132 size = strlen(str);
133 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
134 PyErr_SetString(PyExc_OverflowError,
135 "byte string is too long");
136 return NULL;
137 }
138 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000139#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000140 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000141#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000142 Py_INCREF(op);
143 return (PyObject *)op;
144 }
145 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000146#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000148#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 Py_INCREF(op);
150 return (PyObject *)op;
151 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000152
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000153 /* Inline PyObject_NewVar */
154 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
155 if (op == NULL)
156 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100157 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000158 op->ob_shash = -1;
159 Py_MEMCPY(op->ob_sval, str, size+1);
160 /* share short strings */
161 if (size == 0) {
162 nullstring = op;
163 Py_INCREF(op);
164 } else if (size == 1) {
165 characters[*str & UCHAR_MAX] = op;
166 Py_INCREF(op);
167 }
168 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000169}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000170
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000171PyObject *
172PyBytes_FromFormatV(const char *format, va_list vargs)
173{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000174 va_list count;
175 Py_ssize_t n = 0;
176 const char* f;
177 char *s;
178 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000179
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000180 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 /* step 1: figure out how large a buffer we need */
182 for (f = format; *f; f++) {
183 if (*f == '%') {
184 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000185 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000186 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000187
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000188 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
189 * they don't affect the amount of space we reserve.
190 */
191 if ((*f == 'l' || *f == 'z') &&
192 (f[1] == 'd' || f[1] == 'u'))
193 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000194
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000195 switch (*f) {
196 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100197 {
198 int c = va_arg(count, int);
199 if (c < 0 || c > 255) {
200 PyErr_SetString(PyExc_OverflowError,
201 "PyBytes_FromFormatV(): %c format "
202 "expects an integer in range [0; 255]");
203 return NULL;
204 }
205 n++;
206 break;
207 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000208 case '%':
209 n++;
210 break;
211 case 'd': case 'u': case 'i': case 'x':
212 (void) va_arg(count, int);
213 /* 20 bytes is enough to hold a 64-bit
214 integer. Decimal takes the most space.
215 This isn't enough for octal. */
216 n += 20;
217 break;
218 case 's':
219 s = va_arg(count, char*);
220 n += strlen(s);
221 break;
222 case 'p':
223 (void) va_arg(count, int);
224 /* maximum 64-bit pointer representation:
225 * 0xffffffffffffffff
226 * so 19 characters is enough.
227 * XXX I count 18 -- what's the extra for?
228 */
229 n += 19;
230 break;
231 default:
232 /* if we stumble upon an unknown
233 formatting code, copy the rest of
234 the format string to the output
235 string. (we cannot just skip the
236 code, since there's no way to know
237 what's in the argument list) */
238 n += strlen(p);
239 goto expand;
240 }
241 } else
242 n++;
243 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000244 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000245 /* step 2: fill the buffer */
246 /* Since we've analyzed how much space we need for the worst case,
247 use sprintf directly instead of the slower PyOS_snprintf. */
248 string = PyBytes_FromStringAndSize(NULL, n);
249 if (!string)
250 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000251
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000253
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000254 for (f = format; *f; f++) {
255 if (*f == '%') {
256 const char* p = f++;
257 Py_ssize_t i;
258 int longflag = 0;
259 int size_tflag = 0;
260 /* parse the width.precision part (we're only
261 interested in the precision value, if any) */
262 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000263 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 n = (n*10) + *f++ - '0';
265 if (*f == '.') {
266 f++;
267 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000268 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000269 n = (n*10) + *f++ - '0';
270 }
David Malcolm96960882010-11-05 17:23:41 +0000271 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 f++;
273 /* handle the long flag, but only for %ld and %lu.
274 others can be added when necessary. */
275 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
276 longflag = 1;
277 ++f;
278 }
279 /* handle the size_t flag. */
280 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
281 size_tflag = 1;
282 ++f;
283 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000284
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000285 switch (*f) {
286 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100287 {
288 int c = va_arg(vargs, int);
289 /* c has been checked for overflow in the first step */
290 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000291 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100292 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000293 case 'd':
294 if (longflag)
295 sprintf(s, "%ld", va_arg(vargs, long));
296 else if (size_tflag)
297 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
298 va_arg(vargs, Py_ssize_t));
299 else
300 sprintf(s, "%d", va_arg(vargs, int));
301 s += strlen(s);
302 break;
303 case 'u':
304 if (longflag)
305 sprintf(s, "%lu",
306 va_arg(vargs, unsigned long));
307 else if (size_tflag)
308 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
309 va_arg(vargs, size_t));
310 else
311 sprintf(s, "%u",
312 va_arg(vargs, unsigned int));
313 s += strlen(s);
314 break;
315 case 'i':
316 sprintf(s, "%i", va_arg(vargs, int));
317 s += strlen(s);
318 break;
319 case 'x':
320 sprintf(s, "%x", va_arg(vargs, int));
321 s += strlen(s);
322 break;
323 case 's':
324 p = va_arg(vargs, char*);
325 i = strlen(p);
326 if (n > 0 && i > n)
327 i = n;
328 Py_MEMCPY(s, p, i);
329 s += i;
330 break;
331 case 'p':
332 sprintf(s, "%p", va_arg(vargs, void*));
333 /* %p is ill-defined: ensure leading 0x. */
334 if (s[1] == 'X')
335 s[1] = 'x';
336 else if (s[1] != 'x') {
337 memmove(s+2, s, strlen(s)+1);
338 s[0] = '0';
339 s[1] = 'x';
340 }
341 s += strlen(s);
342 break;
343 case '%':
344 *s++ = '%';
345 break;
346 default:
347 strcpy(s, p);
348 s += strlen(s);
349 goto end;
350 }
351 } else
352 *s++ = *f;
353 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000354
355 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000356 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
357 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000358}
359
360PyObject *
361PyBytes_FromFormat(const char *format, ...)
362{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000363 PyObject* ret;
364 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000365
366#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000369 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000370#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 ret = PyBytes_FromFormatV(format, vargs);
372 va_end(vargs);
373 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000374}
375
Ethan Furmanb95b5612015-01-23 20:05:18 -0800376/* Helpers for formatstring */
377
378Py_LOCAL_INLINE(PyObject *)
379getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
380{
381 Py_ssize_t argidx = *p_argidx;
382 if (argidx < arglen) {
383 (*p_argidx)++;
384 if (arglen < 0)
385 return args;
386 else
387 return PyTuple_GetItem(args, argidx);
388 }
389 PyErr_SetString(PyExc_TypeError,
390 "not enough arguments for format string");
391 return NULL;
392}
393
394/* Format codes
395 * F_LJUST '-'
396 * F_SIGN '+'
397 * F_BLANK ' '
398 * F_ALT '#'
399 * F_ZERO '0'
400 */
401#define F_LJUST (1<<0)
402#define F_SIGN (1<<1)
403#define F_BLANK (1<<2)
404#define F_ALT (1<<3)
405#define F_ZERO (1<<4)
406
407/* Returns a new reference to a PyBytes object, or NULL on failure. */
408
409static PyObject *
410formatfloat(PyObject *v, int flags, int prec, int type)
411{
412 char *p;
413 PyObject *result;
414 double x;
415
416 x = PyFloat_AsDouble(v);
417 if (x == -1.0 && PyErr_Occurred()) {
418 PyErr_Format(PyExc_TypeError, "float argument required, "
419 "not %.200s", Py_TYPE(v)->tp_name);
420 return NULL;
421 }
422
423 if (prec < 0)
424 prec = 6;
425
426 p = PyOS_double_to_string(x, type, prec,
427 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
428
429 if (p == NULL)
430 return NULL;
431 result = PyBytes_FromStringAndSize(p, strlen(p));
432 PyMem_Free(p);
433 return result;
434}
435
Ethan Furmanb95b5612015-01-23 20:05:18 -0800436Py_LOCAL_INLINE(int)
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200437byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800438{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200439 if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) {
440 *p = PyBytes_AS_STRING(arg)[0];
441 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800442 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200443 else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) {
444 *p = PyByteArray_AS_STRING(arg)[0];
445 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800446 }
447 else {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200448 long ival = PyLong_AsLong(arg);
449 if (0 <= ival && ival <= 255) {
450 *p = (char)ival;
451 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800452 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800453 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200454 PyErr_SetString(PyExc_TypeError,
455 "%c requires an integer in range(256) or a single byte");
456 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800457}
458
459static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200460format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800461{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200462 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800463 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800464 /* is it a bytes object? */
465 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200466 *pbuf = PyBytes_AS_STRING(v);
467 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800468 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200469 return v;
470 }
471 if (PyByteArray_Check(v)) {
472 *pbuf = PyByteArray_AS_STRING(v);
473 *plen = PyByteArray_GET_SIZE(v);
474 Py_INCREF(v);
475 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800476 }
477 /* does it support __bytes__? */
478 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
479 if (func != NULL) {
480 result = PyObject_CallFunctionObjArgs(func, NULL);
481 Py_DECREF(func);
482 if (result == NULL)
483 return NULL;
484 if (!PyBytes_Check(result)) {
485 PyErr_Format(PyExc_TypeError,
486 "__bytes__ returned non-bytes (type %.200s)",
487 Py_TYPE(result)->tp_name);
488 Py_DECREF(result);
489 return NULL;
490 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200491 *pbuf = PyBytes_AS_STRING(result);
492 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800493 return result;
494 }
495 PyErr_Format(PyExc_TypeError,
496 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
497 Py_TYPE(v)->tp_name);
498 return NULL;
499}
500
501/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
502
503 FORMATBUFLEN is the length of the buffer in which the ints &
504 chars are formatted. XXX This is a magic number. Each formatting
505 routine does bounds checking to ensure no overflow, but a better
506 solution may be to malloc a buffer of appropriate size for each
507 format. For now, the current solution is sufficient.
508*/
509#define FORMATBUFLEN (size_t)120
510
511PyObject *
512_PyBytes_Format(PyObject *format, PyObject *args)
513{
514 char *fmt, *res;
515 Py_ssize_t arglen, argidx;
516 Py_ssize_t reslen, rescnt, fmtcnt;
517 int args_owned = 0;
518 PyObject *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800519 PyObject *dict = NULL;
520 if (format == NULL || !PyBytes_Check(format) || args == NULL) {
521 PyErr_BadInternalCall();
522 return NULL;
523 }
524 fmt = PyBytes_AS_STRING(format);
525 fmtcnt = PyBytes_GET_SIZE(format);
526 reslen = rescnt = fmtcnt + 100;
527 result = PyBytes_FromStringAndSize((char *)NULL, reslen);
528 if (result == NULL)
529 return NULL;
530 res = PyBytes_AsString(result);
531 if (PyTuple_Check(args)) {
532 arglen = PyTuple_GET_SIZE(args);
533 argidx = 0;
534 }
535 else {
536 arglen = -1;
537 argidx = -2;
538 }
539 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
540 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
541 !PyByteArray_Check(args)) {
542 dict = args;
543 }
544 while (--fmtcnt >= 0) {
545 if (*fmt != '%') {
546 if (--rescnt < 0) {
547 rescnt = fmtcnt + 100;
548 reslen += rescnt;
549 if (_PyBytes_Resize(&result, reslen))
550 return NULL;
551 res = PyBytes_AS_STRING(result)
552 + reslen - rescnt;
553 --rescnt;
554 }
555 *res++ = *fmt++;
556 }
557 else {
558 /* Got a format specifier */
559 int flags = 0;
560 Py_ssize_t width = -1;
561 int prec = -1;
562 int c = '\0';
563 int fill;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200564 PyObject *iobj;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800565 PyObject *v = NULL;
566 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200567 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800568 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200569 Py_ssize_t len = 0;
570 char onechar; /* For byte_converter() */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800571
Ethan Furmanb95b5612015-01-23 20:05:18 -0800572 fmt++;
573 if (*fmt == '(') {
574 char *keystart;
575 Py_ssize_t keylen;
576 PyObject *key;
577 int pcount = 1;
578
579 if (dict == NULL) {
580 PyErr_SetString(PyExc_TypeError,
581 "format requires a mapping");
582 goto error;
583 }
584 ++fmt;
585 --fmtcnt;
586 keystart = fmt;
587 /* Skip over balanced parentheses */
588 while (pcount > 0 && --fmtcnt >= 0) {
589 if (*fmt == ')')
590 --pcount;
591 else if (*fmt == '(')
592 ++pcount;
593 fmt++;
594 }
595 keylen = fmt - keystart - 1;
596 if (fmtcnt < 0 || pcount > 0) {
597 PyErr_SetString(PyExc_ValueError,
598 "incomplete format key");
599 goto error;
600 }
601 key = PyBytes_FromStringAndSize(keystart,
602 keylen);
603 if (key == NULL)
604 goto error;
605 if (args_owned) {
606 Py_DECREF(args);
607 args_owned = 0;
608 }
609 args = PyObject_GetItem(dict, key);
610 Py_DECREF(key);
611 if (args == NULL) {
612 goto error;
613 }
614 args_owned = 1;
615 arglen = -1;
616 argidx = -2;
617 }
618 while (--fmtcnt >= 0) {
619 switch (c = *fmt++) {
620 case '-': flags |= F_LJUST; continue;
621 case '+': flags |= F_SIGN; continue;
622 case ' ': flags |= F_BLANK; continue;
623 case '#': flags |= F_ALT; continue;
624 case '0': flags |= F_ZERO; continue;
625 }
626 break;
627 }
628 if (c == '*') {
629 v = getnextarg(args, arglen, &argidx);
630 if (v == NULL)
631 goto error;
632 if (!PyLong_Check(v)) {
633 PyErr_SetString(PyExc_TypeError,
634 "* wants int");
635 goto error;
636 }
637 width = PyLong_AsSsize_t(v);
638 if (width == -1 && PyErr_Occurred())
639 goto error;
640 if (width < 0) {
641 flags |= F_LJUST;
642 width = -width;
643 }
644 if (--fmtcnt >= 0)
645 c = *fmt++;
646 }
647 else if (c >= 0 && isdigit(c)) {
648 width = c - '0';
649 while (--fmtcnt >= 0) {
650 c = Py_CHARMASK(*fmt++);
651 if (!isdigit(c))
652 break;
653 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
654 PyErr_SetString(
655 PyExc_ValueError,
656 "width too big");
657 goto error;
658 }
659 width = width*10 + (c - '0');
660 }
661 }
662 if (c == '.') {
663 prec = 0;
664 if (--fmtcnt >= 0)
665 c = *fmt++;
666 if (c == '*') {
667 v = getnextarg(args, arglen, &argidx);
668 if (v == NULL)
669 goto error;
670 if (!PyLong_Check(v)) {
671 PyErr_SetString(
672 PyExc_TypeError,
673 "* wants int");
674 goto error;
675 }
676 prec = PyLong_AsSsize_t(v);
677 if (prec == -1 && PyErr_Occurred())
678 goto error;
679 if (prec < 0)
680 prec = 0;
681 if (--fmtcnt >= 0)
682 c = *fmt++;
683 }
684 else if (c >= 0 && isdigit(c)) {
685 prec = c - '0';
686 while (--fmtcnt >= 0) {
687 c = Py_CHARMASK(*fmt++);
688 if (!isdigit(c))
689 break;
690 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
691 PyErr_SetString(
692 PyExc_ValueError,
693 "prec too big");
694 goto error;
695 }
696 prec = prec*10 + (c - '0');
697 }
698 }
699 } /* prec */
700 if (fmtcnt >= 0) {
701 if (c == 'h' || c == 'l' || c == 'L') {
702 if (--fmtcnt >= 0)
703 c = *fmt++;
704 }
705 }
706 if (fmtcnt < 0) {
707 PyErr_SetString(PyExc_ValueError,
708 "incomplete format");
709 goto error;
710 }
711 if (c != '%') {
712 v = getnextarg(args, arglen, &argidx);
713 if (v == NULL)
714 goto error;
715 }
716 sign = 0;
717 fill = ' ';
718 switch (c) {
719 case '%':
720 pbuf = "%";
721 len = 1;
722 break;
723 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200724 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800725 if (temp == NULL)
726 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200727 assert(PyUnicode_IS_ASCII(temp));
728 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
729 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800730 if (prec >= 0 && len > prec)
731 len = prec;
732 break;
733 case 's':
734 // %s is only for 2/3 code; 3 only code should use %b
735 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200736 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800737 if (temp == NULL)
738 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800739 if (prec >= 0 && len > prec)
740 len = prec;
741 break;
742 case 'i':
743 case 'd':
744 case 'u':
745 case 'o':
746 case 'x':
747 case 'X':
748 if (c == 'i')
749 c = 'd';
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200750 iobj = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800751 if (PyNumber_Check(v)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800752 if ((PyLong_Check(v))) {
753 iobj = v;
754 Py_INCREF(iobj);
755 }
756 else {
757 iobj = PyNumber_Long(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200758 if (iobj != NULL && !PyLong_Check(iobj))
759 Py_CLEAR(iobj);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800760 }
761 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200762 if (iobj == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800763 PyErr_Format(PyExc_TypeError,
764 "%%%c format: a number is required, "
765 "not %.200s", c, Py_TYPE(v)->tp_name);
766 goto error;
767 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200768 temp = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, c);
769 Py_DECREF(iobj);
770 if (!temp)
771 goto error;
772 assert(PyUnicode_IS_ASCII(temp));
773 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
774 len = PyUnicode_GET_LENGTH(temp);
775 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800776 if (flags & F_ZERO)
777 fill = '0';
778 break;
779 case 'e':
780 case 'E':
781 case 'f':
782 case 'F':
783 case 'g':
784 case 'G':
785 temp = formatfloat(v, flags, prec, c);
786 if (temp == NULL)
787 goto error;
788 pbuf = PyBytes_AS_STRING(temp);
789 len = PyBytes_GET_SIZE(temp);
790 sign = 1;
791 if (flags & F_ZERO)
792 fill = '0';
793 break;
794 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200795 pbuf = &onechar;
796 len = byte_converter(v, &onechar);
797 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800798 goto error;
799 break;
800 default:
801 PyErr_Format(PyExc_ValueError,
802 "unsupported format character '%c' (0x%x) "
803 "at index %zd",
804 c, c,
805 (Py_ssize_t)(fmt - 1 -
806 PyBytes_AsString(format)));
807 goto error;
808 }
809 if (sign) {
810 if (*pbuf == '-' || *pbuf == '+') {
811 sign = *pbuf++;
812 len--;
813 }
814 else if (flags & F_SIGN)
815 sign = '+';
816 else if (flags & F_BLANK)
817 sign = ' ';
818 else
819 sign = 0;
820 }
821 if (width < len)
822 width = len;
823 if (rescnt - (sign != 0) < width) {
824 reslen -= rescnt;
825 rescnt = width + fmtcnt + 100;
826 reslen += rescnt;
827 if (reslen < 0) {
828 Py_DECREF(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800829 Py_XDECREF(temp);
830 return PyErr_NoMemory();
831 }
832 if (_PyBytes_Resize(&result, reslen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800833 Py_XDECREF(temp);
834 return NULL;
835 }
836 res = PyBytes_AS_STRING(result)
837 + reslen - rescnt;
838 }
839 if (sign) {
840 if (fill != ' ')
841 *res++ = sign;
842 rescnt--;
843 if (width > len)
844 width--;
845 }
846 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
847 assert(pbuf[0] == '0');
848 assert(pbuf[1] == c);
849 if (fill != ' ') {
850 *res++ = *pbuf++;
851 *res++ = *pbuf++;
852 }
853 rescnt -= 2;
854 width -= 2;
855 if (width < 0)
856 width = 0;
857 len -= 2;
858 }
859 if (width > len && !(flags & F_LJUST)) {
860 do {
861 --rescnt;
862 *res++ = fill;
863 } while (--width > len);
864 }
865 if (fill == ' ') {
866 if (sign)
867 *res++ = sign;
868 if ((flags & F_ALT) &&
869 (c == 'x' || c == 'X')) {
870 assert(pbuf[0] == '0');
871 assert(pbuf[1] == c);
872 *res++ = *pbuf++;
873 *res++ = *pbuf++;
874 }
875 }
876 Py_MEMCPY(res, pbuf, len);
877 res += len;
878 rescnt -= len;
879 while (--width >= len) {
880 --rescnt;
881 *res++ = ' ';
882 }
883 if (dict && (argidx < arglen) && c != '%') {
884 PyErr_SetString(PyExc_TypeError,
885 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -0800886 Py_XDECREF(temp);
887 goto error;
888 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800889 Py_XDECREF(temp);
890 } /* '%' */
891 } /* until end */
892 if (argidx < arglen && !dict) {
893 PyErr_SetString(PyExc_TypeError,
894 "not all arguments converted during bytes formatting");
895 goto error;
896 }
897 if (args_owned) {
898 Py_DECREF(args);
899 }
900 if (_PyBytes_Resize(&result, reslen - rescnt))
901 return NULL;
902 return result;
903
904 error:
905 Py_DECREF(result);
906 if (args_owned) {
907 Py_DECREF(args);
908 }
909 return NULL;
910}
911
912/* =-= */
913
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000914static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000915bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000916{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000917 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000918}
919
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000920/* Unescape a backslash-escaped string. If unicode is non-zero,
921 the string is a u-literal. If recode_encoding is non-zero,
922 the string is UTF-8 encoded and should be re-encoded in the
923 specified encoding. */
924
925PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000926 Py_ssize_t len,
927 const char *errors,
928 Py_ssize_t unicode,
929 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000930{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000931 int c;
932 char *p, *buf;
933 const char *end;
934 PyObject *v;
935 Py_ssize_t newlen = recode_encoding ? 4*len:len;
936 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
937 if (v == NULL)
938 return NULL;
939 p = buf = PyBytes_AsString(v);
940 end = s + len;
941 while (s < end) {
942 if (*s != '\\') {
943 non_esc:
944 if (recode_encoding && (*s & 0x80)) {
945 PyObject *u, *w;
946 char *r;
947 const char* t;
948 Py_ssize_t rn;
949 t = s;
950 /* Decode non-ASCII bytes as UTF-8. */
951 while (t < end && (*t & 0x80)) t++;
952 u = PyUnicode_DecodeUTF8(s, t - s, errors);
953 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000954
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000955 /* Recode them in target encoding. */
956 w = PyUnicode_AsEncodedString(
957 u, recode_encoding, errors);
958 Py_DECREF(u);
959 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000960
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000961 /* Append bytes to output buffer. */
962 assert(PyBytes_Check(w));
963 r = PyBytes_AS_STRING(w);
964 rn = PyBytes_GET_SIZE(w);
965 Py_MEMCPY(p, r, rn);
966 p += rn;
967 Py_DECREF(w);
968 s = t;
969 } else {
970 *p++ = *s++;
971 }
972 continue;
973 }
974 s++;
975 if (s==end) {
976 PyErr_SetString(PyExc_ValueError,
977 "Trailing \\ in string");
978 goto failed;
979 }
980 switch (*s++) {
981 /* XXX This assumes ASCII! */
982 case '\n': break;
983 case '\\': *p++ = '\\'; break;
984 case '\'': *p++ = '\''; break;
985 case '\"': *p++ = '\"'; break;
986 case 'b': *p++ = '\b'; break;
987 case 'f': *p++ = '\014'; break; /* FF */
988 case 't': *p++ = '\t'; break;
989 case 'n': *p++ = '\n'; break;
990 case 'r': *p++ = '\r'; break;
991 case 'v': *p++ = '\013'; break; /* VT */
992 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
993 case '0': case '1': case '2': case '3':
994 case '4': case '5': case '6': case '7':
995 c = s[-1] - '0';
996 if (s < end && '0' <= *s && *s <= '7') {
997 c = (c<<3) + *s++ - '0';
998 if (s < end && '0' <= *s && *s <= '7')
999 c = (c<<3) + *s++ - '0';
1000 }
1001 *p++ = c;
1002 break;
1003 case 'x':
David Malcolm96960882010-11-05 17:23:41 +00001004 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001005 unsigned int x = 0;
1006 c = Py_CHARMASK(*s);
1007 s++;
David Malcolm96960882010-11-05 17:23:41 +00001008 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001009 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001010 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001011 x = 10 + c - 'a';
1012 else
1013 x = 10 + c - 'A';
1014 x = x << 4;
1015 c = Py_CHARMASK(*s);
1016 s++;
David Malcolm96960882010-11-05 17:23:41 +00001017 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001018 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001019 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 x += 10 + c - 'a';
1021 else
1022 x += 10 + c - 'A';
1023 *p++ = x;
1024 break;
1025 }
1026 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001027 PyErr_Format(PyExc_ValueError,
1028 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001029 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001030 goto failed;
1031 }
1032 if (strcmp(errors, "replace") == 0) {
1033 *p++ = '?';
1034 } else if (strcmp(errors, "ignore") == 0)
1035 /* do nothing */;
1036 else {
1037 PyErr_Format(PyExc_ValueError,
1038 "decoding error; unknown "
1039 "error handling code: %.400s",
1040 errors);
1041 goto failed;
1042 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001043 /* skip \x */
1044 if (s < end && Py_ISXDIGIT(s[0]))
1045 s++; /* and a hexdigit */
1046 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001047 default:
1048 *p++ = '\\';
1049 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001050 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001051 UTF-8 bytes may follow. */
1052 }
1053 }
1054 if (p-buf < newlen)
1055 _PyBytes_Resize(&v, p - buf);
1056 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001057 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001058 Py_DECREF(v);
1059 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001060}
1061
1062/* -------------------------------------------------------------------- */
1063/* object api */
1064
1065Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001066PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001067{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001068 if (!PyBytes_Check(op)) {
1069 PyErr_Format(PyExc_TypeError,
1070 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1071 return -1;
1072 }
1073 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001074}
1075
1076char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001077PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001078{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001079 if (!PyBytes_Check(op)) {
1080 PyErr_Format(PyExc_TypeError,
1081 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1082 return NULL;
1083 }
1084 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001085}
1086
1087int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001088PyBytes_AsStringAndSize(PyObject *obj,
1089 char **s,
1090 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001091{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 if (s == NULL) {
1093 PyErr_BadInternalCall();
1094 return -1;
1095 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 if (!PyBytes_Check(obj)) {
1098 PyErr_Format(PyExc_TypeError,
1099 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1100 return -1;
1101 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001102
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001103 *s = PyBytes_AS_STRING(obj);
1104 if (len != NULL)
1105 *len = PyBytes_GET_SIZE(obj);
1106 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001107 PyErr_SetString(PyExc_ValueError,
1108 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001109 return -1;
1110 }
1111 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001112}
Neal Norwitz6968b052007-02-27 19:02:19 +00001113
1114/* -------------------------------------------------------------------- */
1115/* Methods */
1116
Eric Smith0923d1d2009-04-16 20:16:10 +00001117#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001118
1119#include "stringlib/fastsearch.h"
1120#include "stringlib/count.h"
1121#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001122#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001123#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001124#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001125#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001126
Eric Smith0f78bff2009-11-30 01:01:42 +00001127#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001128
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001129PyObject *
1130PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001131{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001132 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001133 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001134 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001135 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001136 unsigned char quote, *s, *p;
1137
1138 /* Compute size of output string */
1139 squotes = dquotes = 0;
1140 newsize = 3; /* b'' */
1141 s = (unsigned char*)op->ob_sval;
1142 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001143 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001144 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001145 case '\'': squotes++; break;
1146 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001147 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001148 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001149 default:
1150 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001151 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001152 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001153 if (newsize > PY_SSIZE_T_MAX - incr)
1154 goto overflow;
1155 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001156 }
1157 quote = '\'';
1158 if (smartquotes && squotes && !dquotes)
1159 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001160 if (squotes && quote == '\'') {
1161 if (newsize > PY_SSIZE_T_MAX - squotes)
1162 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001163 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001164 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001165
1166 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001167 if (v == NULL) {
1168 return NULL;
1169 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001170 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001171
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001172 *p++ = 'b', *p++ = quote;
1173 for (i = 0; i < length; i++) {
1174 unsigned char c = op->ob_sval[i];
1175 if (c == quote || c == '\\')
1176 *p++ = '\\', *p++ = c;
1177 else if (c == '\t')
1178 *p++ = '\\', *p++ = 't';
1179 else if (c == '\n')
1180 *p++ = '\\', *p++ = 'n';
1181 else if (c == '\r')
1182 *p++ = '\\', *p++ = 'r';
1183 else if (c < ' ' || c >= 0x7f) {
1184 *p++ = '\\';
1185 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001186 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1187 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001188 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001189 else
1190 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001191 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001192 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001193 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001194 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001195
1196 overflow:
1197 PyErr_SetString(PyExc_OverflowError,
1198 "bytes object is too large to make repr");
1199 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001200}
1201
Neal Norwitz6968b052007-02-27 19:02:19 +00001202static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001203bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001204{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001205 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001206}
1207
Neal Norwitz6968b052007-02-27 19:02:19 +00001208static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001209bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001210{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001211 if (Py_BytesWarningFlag) {
1212 if (PyErr_WarnEx(PyExc_BytesWarning,
1213 "str() on a bytes instance", 1))
1214 return NULL;
1215 }
1216 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001217}
1218
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001219static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001220bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001221{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001222 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001223}
Neal Norwitz6968b052007-02-27 19:02:19 +00001224
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001225/* This is also used by PyBytes_Concat() */
1226static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001227bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001228{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001229 Py_ssize_t size;
1230 Py_buffer va, vb;
1231 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001232
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001233 va.len = -1;
1234 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001235 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1236 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001237 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1238 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1239 goto done;
1240 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001241
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001242 /* Optimize end cases */
1243 if (va.len == 0 && PyBytes_CheckExact(b)) {
1244 result = b;
1245 Py_INCREF(result);
1246 goto done;
1247 }
1248 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1249 result = a;
1250 Py_INCREF(result);
1251 goto done;
1252 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001253
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001254 size = va.len + vb.len;
1255 if (size < 0) {
1256 PyErr_NoMemory();
1257 goto done;
1258 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001259
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001260 result = PyBytes_FromStringAndSize(NULL, size);
1261 if (result != NULL) {
1262 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1263 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1264 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001265
1266 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001267 if (va.len != -1)
1268 PyBuffer_Release(&va);
1269 if (vb.len != -1)
1270 PyBuffer_Release(&vb);
1271 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001272}
Neal Norwitz6968b052007-02-27 19:02:19 +00001273
1274static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001275bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001276{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001277 Py_ssize_t i;
1278 Py_ssize_t j;
1279 Py_ssize_t size;
1280 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001281 size_t nbytes;
1282 if (n < 0)
1283 n = 0;
1284 /* watch out for overflows: the size can overflow int,
1285 * and the # of bytes needed can overflow size_t
1286 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001287 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001288 PyErr_SetString(PyExc_OverflowError,
1289 "repeated bytes are too long");
1290 return NULL;
1291 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001292 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001293 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1294 Py_INCREF(a);
1295 return (PyObject *)a;
1296 }
1297 nbytes = (size_t)size;
1298 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1299 PyErr_SetString(PyExc_OverflowError,
1300 "repeated bytes are too long");
1301 return NULL;
1302 }
1303 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1304 if (op == NULL)
1305 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001306 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001307 op->ob_shash = -1;
1308 op->ob_sval[size] = '\0';
1309 if (Py_SIZE(a) == 1 && n > 0) {
1310 memset(op->ob_sval, a->ob_sval[0] , n);
1311 return (PyObject *) op;
1312 }
1313 i = 0;
1314 if (i < size) {
1315 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1316 i = Py_SIZE(a);
1317 }
1318 while (i < size) {
1319 j = (i <= size-i) ? i : size-i;
1320 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1321 i += j;
1322 }
1323 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001324}
1325
Guido van Rossum98297ee2007-11-06 21:34:58 +00001326static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001327bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001328{
1329 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1330 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001331 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +00001332 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +00001333 PyErr_Clear();
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001334 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
Antoine Pitroud1188562010-06-09 16:38:55 +00001335 return -1;
1336 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
1337 varg.buf, varg.len, 0);
1338 PyBuffer_Release(&varg);
1339 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001340 }
1341 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001342 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1343 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001344 }
1345
Antoine Pitrou0010d372010-08-15 17:12:55 +00001346 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001347}
1348
Neal Norwitz6968b052007-02-27 19:02:19 +00001349static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001350bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001351{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001352 if (i < 0 || i >= Py_SIZE(a)) {
1353 PyErr_SetString(PyExc_IndexError, "index out of range");
1354 return NULL;
1355 }
1356 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001357}
1358
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001359Py_LOCAL(int)
1360bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1361{
1362 int cmp;
1363 Py_ssize_t len;
1364
1365 len = Py_SIZE(a);
1366 if (Py_SIZE(b) != len)
1367 return 0;
1368
1369 if (a->ob_sval[0] != b->ob_sval[0])
1370 return 0;
1371
1372 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1373 return (cmp == 0);
1374}
1375
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001376static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001377bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001378{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001379 int c;
1380 Py_ssize_t len_a, len_b;
1381 Py_ssize_t min_len;
1382 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001383
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001384 /* Make sure both arguments are strings. */
1385 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1386 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
1387 (PyObject_IsInstance((PyObject*)a,
1388 (PyObject*)&PyUnicode_Type) ||
1389 PyObject_IsInstance((PyObject*)b,
1390 (PyObject*)&PyUnicode_Type))) {
1391 if (PyErr_WarnEx(PyExc_BytesWarning,
1392 "Comparison between bytes and string", 1))
1393 return NULL;
1394 }
1395 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001396 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001397 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001398 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001399 case Py_EQ:
1400 case Py_LE:
1401 case Py_GE:
1402 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001403 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001404 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001405 case Py_NE:
1406 case Py_LT:
1407 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001408 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001409 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001410 default:
1411 PyErr_BadArgument();
1412 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001413 }
1414 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001415 else if (op == Py_EQ || op == Py_NE) {
1416 int eq = bytes_compare_eq(a, b);
1417 eq ^= (op == Py_NE);
1418 result = eq ? Py_True : Py_False;
1419 }
1420 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001421 len_a = Py_SIZE(a);
1422 len_b = Py_SIZE(b);
1423 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001424 if (min_len > 0) {
1425 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001426 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001427 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001429 else
1430 c = 0;
1431 if (c == 0)
1432 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1433 switch (op) {
1434 case Py_LT: c = c < 0; break;
1435 case Py_LE: c = c <= 0; break;
1436 case Py_GT: c = c > 0; break;
1437 case Py_GE: c = c >= 0; break;
1438 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001439 PyErr_BadArgument();
1440 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001441 }
1442 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001443 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001444
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001445 Py_INCREF(result);
1446 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001447}
1448
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001449static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001450bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001451{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001452 if (a->ob_shash == -1) {
1453 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001454 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001455 }
1456 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001457}
1458
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001459static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001460bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001461{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001462 if (PyIndex_Check(item)) {
1463 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1464 if (i == -1 && PyErr_Occurred())
1465 return NULL;
1466 if (i < 0)
1467 i += PyBytes_GET_SIZE(self);
1468 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1469 PyErr_SetString(PyExc_IndexError,
1470 "index out of range");
1471 return NULL;
1472 }
1473 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1474 }
1475 else if (PySlice_Check(item)) {
1476 Py_ssize_t start, stop, step, slicelength, cur, i;
1477 char* source_buf;
1478 char* result_buf;
1479 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001480
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001481 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001482 PyBytes_GET_SIZE(self),
1483 &start, &stop, &step, &slicelength) < 0) {
1484 return NULL;
1485 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001486
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001487 if (slicelength <= 0) {
1488 return PyBytes_FromStringAndSize("", 0);
1489 }
1490 else if (start == 0 && step == 1 &&
1491 slicelength == PyBytes_GET_SIZE(self) &&
1492 PyBytes_CheckExact(self)) {
1493 Py_INCREF(self);
1494 return (PyObject *)self;
1495 }
1496 else if (step == 1) {
1497 return PyBytes_FromStringAndSize(
1498 PyBytes_AS_STRING(self) + start,
1499 slicelength);
1500 }
1501 else {
1502 source_buf = PyBytes_AS_STRING(self);
1503 result = PyBytes_FromStringAndSize(NULL, slicelength);
1504 if (result == NULL)
1505 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001506
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001507 result_buf = PyBytes_AS_STRING(result);
1508 for (cur = start, i = 0; i < slicelength;
1509 cur += step, i++) {
1510 result_buf[i] = source_buf[cur];
1511 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001512
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001513 return result;
1514 }
1515 }
1516 else {
1517 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001518 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001519 Py_TYPE(item)->tp_name);
1520 return NULL;
1521 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001522}
1523
1524static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001525bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001526{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001527 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1528 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001529}
1530
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001531static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001532 (lenfunc)bytes_length, /*sq_length*/
1533 (binaryfunc)bytes_concat, /*sq_concat*/
1534 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1535 (ssizeargfunc)bytes_item, /*sq_item*/
1536 0, /*sq_slice*/
1537 0, /*sq_ass_item*/
1538 0, /*sq_ass_slice*/
1539 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001540};
1541
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001542static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001543 (lenfunc)bytes_length,
1544 (binaryfunc)bytes_subscript,
1545 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001546};
1547
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001548static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001549 (getbufferproc)bytes_buffer_getbuffer,
1550 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001551};
1552
1553
1554#define LEFTSTRIP 0
1555#define RIGHTSTRIP 1
1556#define BOTHSTRIP 2
1557
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001558/*[clinic input]
1559bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001560
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001561 sep: object = None
1562 The delimiter according which to split the bytes.
1563 None (the default value) means split on ASCII whitespace characters
1564 (space, tab, return, newline, formfeed, vertical tab).
1565 maxsplit: Py_ssize_t = -1
1566 Maximum number of splits to do.
1567 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001568
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001569Return a list of the sections in the bytes, using sep as the delimiter.
1570[clinic start generated code]*/
1571
1572PyDoc_STRVAR(bytes_split__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001573"split($self, /, sep=None, maxsplit=-1)\n"
1574"--\n"
1575"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001576"Return a list of the sections in the bytes, using sep as the delimiter.\n"
1577"\n"
1578" sep\n"
1579" The delimiter according which to split the bytes.\n"
1580" None (the default value) means split on ASCII whitespace characters\n"
1581" (space, tab, return, newline, formfeed, vertical tab).\n"
1582" maxsplit\n"
1583" Maximum number of splits to do.\n"
1584" -1 (the default value) means no limit.");
1585
1586#define BYTES_SPLIT_METHODDEF \
1587 {"split", (PyCFunction)bytes_split, METH_VARARGS|METH_KEYWORDS, bytes_split__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001588
1589static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001590bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001591
1592static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001593bytes_split(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Neal Norwitz6968b052007-02-27 19:02:19 +00001594{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001595 PyObject *return_value = NULL;
1596 static char *_keywords[] = {"sep", "maxsplit", NULL};
1597 PyObject *sep = Py_None;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001598 Py_ssize_t maxsplit = -1;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001599
1600 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
1601 "|On:split", _keywords,
1602 &sep, &maxsplit))
1603 goto exit;
1604 return_value = bytes_split_impl(self, sep, maxsplit);
1605
1606exit:
1607 return return_value;
1608}
1609
1610static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001611bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
1612/*[clinic end generated code: output=c80a47afdd505975 input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001613{
1614 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001615 const char *s = PyBytes_AS_STRING(self), *sub;
1616 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001617 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001618
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001619 if (maxsplit < 0)
1620 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001621 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001622 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001623 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001624 return NULL;
1625 sub = vsub.buf;
1626 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001627
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001628 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1629 PyBuffer_Release(&vsub);
1630 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001631}
1632
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001633/*[clinic input]
1634bytes.partition
1635
1636 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001637 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001638 /
1639
1640Partition the bytes into three parts using the given separator.
1641
1642This will search for the separator sep in the bytes. If the separator is found,
1643returns a 3-tuple containing the part before the separator, the separator
1644itself, and the part after it.
1645
1646If the separator is not found, returns a 3-tuple containing the original bytes
1647object and two empty bytes objects.
1648[clinic start generated code]*/
1649
1650PyDoc_STRVAR(bytes_partition__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001651"partition($self, sep, /)\n"
1652"--\n"
1653"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001654"Partition the bytes into three parts using the given separator.\n"
1655"\n"
1656"This will search for the separator sep in the bytes. If the separator is found,\n"
1657"returns a 3-tuple containing the part before the separator, the separator\n"
1658"itself, and the part after it.\n"
1659"\n"
1660"If the separator is not found, returns a 3-tuple containing the original bytes\n"
1661"object and two empty bytes objects.");
1662
1663#define BYTES_PARTITION_METHODDEF \
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001664 {"partition", (PyCFunction)bytes_partition, METH_VARARGS, bytes_partition__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001665
1666static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001667bytes_partition_impl(PyBytesObject *self, Py_buffer *sep);
1668
1669static PyObject *
1670bytes_partition(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001671{
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001672 PyObject *return_value = NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001673 Py_buffer sep = {NULL, NULL};
Neal Norwitz6968b052007-02-27 19:02:19 +00001674
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001675 if (!PyArg_ParseTuple(args,
1676 "y*:partition",
1677 &sep))
1678 goto exit;
1679 return_value = bytes_partition_impl(self, &sep);
Neal Norwitz6968b052007-02-27 19:02:19 +00001680
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001681exit:
1682 /* Cleanup for sep */
1683 if (sep.obj)
1684 PyBuffer_Release(&sep);
1685
1686 return return_value;
Neal Norwitz6968b052007-02-27 19:02:19 +00001687}
1688
Neal Norwitz6968b052007-02-27 19:02:19 +00001689static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001690bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1691/*[clinic end generated code: output=3006727cfbf83aa4 input=bc855dc63ca949de]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001692{
Neal Norwitz6968b052007-02-27 19:02:19 +00001693 return stringlib_partition(
1694 (PyObject*) self,
1695 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001696 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001697 );
1698}
1699
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001700/*[clinic input]
1701bytes.rpartition
1702
1703 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001704 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001705 /
1706
1707Partition the bytes into three parts using the given separator.
1708
1709This will search for the separator sep in the bytes, starting and the end. If
1710the separator is found, returns a 3-tuple containing the part before the
1711separator, the separator itself, and the part after it.
1712
1713If the separator is not found, returns a 3-tuple containing two empty bytes
1714objects and the original bytes object.
1715[clinic start generated code]*/
1716
1717PyDoc_STRVAR(bytes_rpartition__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001718"rpartition($self, sep, /)\n"
1719"--\n"
1720"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001721"Partition the bytes into three parts using the given separator.\n"
1722"\n"
1723"This will search for the separator sep in the bytes, starting and the end. If\n"
1724"the separator is found, returns a 3-tuple containing the part before the\n"
1725"separator, the separator itself, and the part after it.\n"
1726"\n"
1727"If the separator is not found, returns a 3-tuple containing two empty bytes\n"
1728"objects and the original bytes object.");
1729
1730#define BYTES_RPARTITION_METHODDEF \
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001731 {"rpartition", (PyCFunction)bytes_rpartition, METH_VARARGS, bytes_rpartition__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001732
1733static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001734bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep);
1735
1736static PyObject *
1737bytes_rpartition(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001738{
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001739 PyObject *return_value = NULL;
1740 Py_buffer sep = {NULL, NULL};
Neal Norwitz6968b052007-02-27 19:02:19 +00001741
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001742 if (!PyArg_ParseTuple(args,
1743 "y*:rpartition",
1744 &sep))
1745 goto exit;
1746 return_value = bytes_rpartition_impl(self, &sep);
Neal Norwitz6968b052007-02-27 19:02:19 +00001747
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001748exit:
1749 /* Cleanup for sep */
1750 if (sep.obj)
1751 PyBuffer_Release(&sep);
1752
1753 return return_value;
1754}
1755
1756static PyObject *
1757bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1758/*[clinic end generated code: output=57b169dc47fa90e8 input=6588fff262a9170e]*/
1759{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001760 return stringlib_rpartition(
1761 (PyObject*) self,
1762 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001763 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001764 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001765}
1766
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001767/*[clinic input]
1768bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001769
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001770Return a list of the sections in the bytes, using sep as the delimiter.
1771
1772Splitting is done starting at the end of the bytes and working to the front.
1773[clinic start generated code]*/
1774
1775PyDoc_STRVAR(bytes_rsplit__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001776"rsplit($self, /, sep=None, maxsplit=-1)\n"
1777"--\n"
1778"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001779"Return a list of the sections in the bytes, using sep as the delimiter.\n"
1780"\n"
1781" sep\n"
1782" The delimiter according which to split the bytes.\n"
1783" None (the default value) means split on ASCII whitespace characters\n"
1784" (space, tab, return, newline, formfeed, vertical tab).\n"
1785" maxsplit\n"
1786" Maximum number of splits to do.\n"
1787" -1 (the default value) means no limit.\n"
1788"\n"
1789"Splitting is done starting at the end of the bytes and working to the front.");
1790
1791#define BYTES_RSPLIT_METHODDEF \
1792 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS|METH_KEYWORDS, bytes_rsplit__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001793
Neal Norwitz6968b052007-02-27 19:02:19 +00001794static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001795bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001796
1797static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001798bytes_rsplit(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Neal Norwitz6968b052007-02-27 19:02:19 +00001799{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001800 PyObject *return_value = NULL;
1801 static char *_keywords[] = {"sep", "maxsplit", NULL};
1802 PyObject *sep = Py_None;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001803 Py_ssize_t maxsplit = -1;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001804
1805 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
1806 "|On:rsplit", _keywords,
1807 &sep, &maxsplit))
1808 goto exit;
1809 return_value = bytes_rsplit_impl(self, sep, maxsplit);
1810
1811exit:
1812 return return_value;
1813}
1814
1815static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001816bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
1817/*[clinic end generated code: output=f86feddedbd7b26d input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001818{
1819 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001820 const char *s = PyBytes_AS_STRING(self), *sub;
1821 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001822 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001823
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001824 if (maxsplit < 0)
1825 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001826 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001827 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001828 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001829 return NULL;
1830 sub = vsub.buf;
1831 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001832
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001833 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1834 PyBuffer_Release(&vsub);
1835 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001836}
1837
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001838
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001839/*[clinic input]
1840bytes.join
1841
1842 iterable_of_bytes: object
1843 /
1844
1845Concatenate any number of bytes objects.
1846
1847The bytes whose method is called is inserted in between each pair.
1848
1849The result is returned as a new bytes object.
1850
1851Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1852[clinic start generated code]*/
1853
1854PyDoc_STRVAR(bytes_join__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001855"join($self, iterable_of_bytes, /)\n"
1856"--\n"
1857"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001858"Concatenate any number of bytes objects.\n"
1859"\n"
1860"The bytes whose method is called is inserted in between each pair.\n"
1861"\n"
1862"The result is returned as a new bytes object.\n"
1863"\n"
1864"Example: b\'.\'.join([b\'ab\', b\'pq\', b\'rs\']) -> b\'ab.pq.rs\'.");
1865
1866#define BYTES_JOIN_METHODDEF \
1867 {"join", (PyCFunction)bytes_join, METH_O, bytes_join__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001868
Neal Norwitz6968b052007-02-27 19:02:19 +00001869static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001870bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
1871/*[clinic end generated code: output=e541a14a8da97908 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001872{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001873 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001874}
1875
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001876PyObject *
1877_PyBytes_Join(PyObject *sep, PyObject *x)
1878{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001879 assert(sep != NULL && PyBytes_Check(sep));
1880 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001881 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001882}
1883
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001884/* helper macro to fixup start/end slice values */
1885#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001886 if (end > len) \
1887 end = len; \
1888 else if (end < 0) { \
1889 end += len; \
1890 if (end < 0) \
1891 end = 0; \
1892 } \
1893 if (start < 0) { \
1894 start += len; \
1895 if (start < 0) \
1896 start = 0; \
1897 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001898
1899Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001900bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001901{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001902 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001903 char byte;
1904 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001905 const char *sub;
1906 Py_ssize_t sub_len;
1907 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001908 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001909
Antoine Pitrouac65d962011-10-20 23:54:17 +02001910 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1911 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001912 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001913
Antoine Pitrouac65d962011-10-20 23:54:17 +02001914 if (subobj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001915 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001916 return -2;
1917
1918 sub = subbuf.buf;
1919 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001920 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001921 else {
1922 sub = &byte;
1923 sub_len = 1;
1924 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001925
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001926 if (dir > 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001927 res = stringlib_find_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001928 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1929 sub, sub_len, start, end);
1930 else
Antoine Pitrouac65d962011-10-20 23:54:17 +02001931 res = stringlib_rfind_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001932 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1933 sub, sub_len, start, end);
Antoine Pitrouac65d962011-10-20 23:54:17 +02001934
1935 if (subobj)
1936 PyBuffer_Release(&subbuf);
1937
1938 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001939}
1940
1941
1942PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001943"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001944\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001945Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001946such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001947arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001948\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001949Return -1 on failure.");
1950
Neal Norwitz6968b052007-02-27 19:02:19 +00001951static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001952bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001953{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001954 Py_ssize_t result = bytes_find_internal(self, args, +1);
1955 if (result == -2)
1956 return NULL;
1957 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001958}
1959
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001960
1961PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001962"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001963\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001964Like B.find() but raise ValueError when the substring is not found.");
1965
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001966static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001967bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001968{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001969 Py_ssize_t result = bytes_find_internal(self, args, +1);
1970 if (result == -2)
1971 return NULL;
1972 if (result == -1) {
1973 PyErr_SetString(PyExc_ValueError,
1974 "substring not found");
1975 return NULL;
1976 }
1977 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001978}
1979
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001980
1981PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001982"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001983\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001984Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001985such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001986arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001987\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001988Return -1 on failure.");
1989
Neal Norwitz6968b052007-02-27 19:02:19 +00001990static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001991bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001992{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001993 Py_ssize_t result = bytes_find_internal(self, args, -1);
1994 if (result == -2)
1995 return NULL;
1996 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001997}
1998
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001999
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002000PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002001"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002002\n\
2003Like B.rfind() but raise ValueError when the substring is not found.");
2004
2005static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002006bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002007{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002008 Py_ssize_t result = bytes_find_internal(self, args, -1);
2009 if (result == -2)
2010 return NULL;
2011 if (result == -1) {
2012 PyErr_SetString(PyExc_ValueError,
2013 "substring not found");
2014 return NULL;
2015 }
2016 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002017}
2018
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002019
2020Py_LOCAL_INLINE(PyObject *)
2021do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002022{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002023 Py_buffer vsep;
2024 char *s = PyBytes_AS_STRING(self);
2025 Py_ssize_t len = PyBytes_GET_SIZE(self);
2026 char *sep;
2027 Py_ssize_t seplen;
2028 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002029
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002030 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002031 return NULL;
2032 sep = vsep.buf;
2033 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002034
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002035 i = 0;
2036 if (striptype != RIGHTSTRIP) {
2037 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2038 i++;
2039 }
2040 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002041
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002042 j = len;
2043 if (striptype != LEFTSTRIP) {
2044 do {
2045 j--;
2046 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2047 j++;
2048 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002049
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002050 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002051
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002052 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2053 Py_INCREF(self);
2054 return (PyObject*)self;
2055 }
2056 else
2057 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002058}
2059
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002060
2061Py_LOCAL_INLINE(PyObject *)
2062do_strip(PyBytesObject *self, int striptype)
2063{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002064 char *s = PyBytes_AS_STRING(self);
2065 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002066
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002067 i = 0;
2068 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00002069 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002070 i++;
2071 }
2072 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002073
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002074 j = len;
2075 if (striptype != LEFTSTRIP) {
2076 do {
2077 j--;
David Malcolm96960882010-11-05 17:23:41 +00002078 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002079 j++;
2080 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002082 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2083 Py_INCREF(self);
2084 return (PyObject*)self;
2085 }
2086 else
2087 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002088}
2089
2090
2091Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002092do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002093{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002094 if (bytes != NULL && bytes != Py_None) {
2095 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002096 }
2097 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002098}
2099
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002100/*[clinic input]
2101bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002102
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002103 self: self(type="PyBytesObject *")
2104 bytes: object = None
2105 /
2106
2107Strip leading and trailing bytes contained in the argument.
2108
2109If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2110[clinic start generated code]*/
2111
2112PyDoc_STRVAR(bytes_strip__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002113"strip($self, bytes=None, /)\n"
2114"--\n"
2115"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002116"Strip leading and trailing bytes contained in the argument.\n"
2117"\n"
2118"If the argument is omitted or None, strip leading and trailing ASCII whitespace.");
2119
2120#define BYTES_STRIP_METHODDEF \
2121 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, bytes_strip__doc__},
2122
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002123static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002124bytes_strip_impl(PyBytesObject *self, PyObject *bytes);
2125
2126static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002127bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002128{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002129 PyObject *return_value = NULL;
2130 PyObject *bytes = Py_None;
2131
2132 if (!PyArg_UnpackTuple(args, "strip",
2133 0, 1,
2134 &bytes))
2135 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02002136 return_value = bytes_strip_impl(self, bytes);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002137
2138exit:
2139 return return_value;
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002140}
2141
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002142static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002143bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Martin v. Löwis0efea322014-07-27 17:29:17 +02002144/*[clinic end generated code: output=c8234a599ba5ec35 input=37daa5fad1395d95]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002145{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002146 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002147}
2148
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002149/*[clinic input]
2150bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002151
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002152 self: self(type="PyBytesObject *")
2153 bytes: object = None
2154 /
2155
2156Strip leading bytes contained in the argument.
2157
2158If the argument is omitted or None, strip leading ASCII whitespace.
2159[clinic start generated code]*/
2160
2161PyDoc_STRVAR(bytes_lstrip__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002162"lstrip($self, bytes=None, /)\n"
2163"--\n"
2164"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002165"Strip leading bytes contained in the argument.\n"
2166"\n"
2167"If the argument is omitted or None, strip leading ASCII whitespace.");
2168
2169#define BYTES_LSTRIP_METHODDEF \
2170 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, bytes_lstrip__doc__},
2171
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002172static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002173bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes);
2174
2175static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002176bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002177{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002178 PyObject *return_value = NULL;
2179 PyObject *bytes = Py_None;
2180
2181 if (!PyArg_UnpackTuple(args, "lstrip",
2182 0, 1,
2183 &bytes))
2184 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02002185 return_value = bytes_lstrip_impl(self, bytes);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002186
2187exit:
2188 return return_value;
2189}
2190
2191static PyObject *
2192bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Martin v. Löwis0efea322014-07-27 17:29:17 +02002193/*[clinic end generated code: output=529e8511ab6f1115 input=88811b09dfbc2988]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002194{
2195 return do_argstrip(self, LEFTSTRIP, bytes);
2196}
2197
2198/*[clinic input]
2199bytes.rstrip
2200
2201 self: self(type="PyBytesObject *")
2202 bytes: object = None
2203 /
2204
2205Strip trailing bytes contained in the argument.
2206
2207If the argument is omitted or None, strip trailing ASCII whitespace.
2208[clinic start generated code]*/
2209
2210PyDoc_STRVAR(bytes_rstrip__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002211"rstrip($self, bytes=None, /)\n"
2212"--\n"
2213"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002214"Strip trailing bytes contained in the argument.\n"
2215"\n"
2216"If the argument is omitted or None, strip trailing ASCII whitespace.");
2217
2218#define BYTES_RSTRIP_METHODDEF \
2219 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, bytes_rstrip__doc__},
2220
2221static PyObject *
2222bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes);
2223
2224static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002225bytes_rstrip(PyBytesObject *self, PyObject *args)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002226{
2227 PyObject *return_value = NULL;
2228 PyObject *bytes = Py_None;
2229
2230 if (!PyArg_UnpackTuple(args, "rstrip",
2231 0, 1,
2232 &bytes))
2233 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02002234 return_value = bytes_rstrip_impl(self, bytes);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002235
2236exit:
2237 return return_value;
2238}
2239
2240static PyObject *
2241bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Martin v. Löwis0efea322014-07-27 17:29:17 +02002242/*[clinic end generated code: output=e98730bd133e6593 input=8f93c9cd361f0140]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002243{
2244 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002245}
Neal Norwitz6968b052007-02-27 19:02:19 +00002246
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002247
2248PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002249"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002250\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002251Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002252string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002253as in slice notation.");
2254
2255static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002256bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002257{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002258 PyObject *sub_obj;
2259 const char *str = PyBytes_AS_STRING(self), *sub;
2260 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02002261 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002262 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002263
Antoine Pitrouac65d962011-10-20 23:54:17 +02002264 Py_buffer vsub;
2265 PyObject *count_obj;
2266
2267 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
2268 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002269 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002270
Antoine Pitrouac65d962011-10-20 23:54:17 +02002271 if (sub_obj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002272 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02002273 return NULL;
2274
2275 sub = vsub.buf;
2276 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002277 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02002278 else {
2279 sub = &byte;
2280 sub_len = 1;
2281 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002282
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002283 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002284
Antoine Pitrouac65d962011-10-20 23:54:17 +02002285 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002286 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2287 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02002288
2289 if (sub_obj)
2290 PyBuffer_Release(&vsub);
2291
2292 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002293}
2294
2295
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002296/*[clinic input]
2297bytes.translate
2298
2299 self: self(type="PyBytesObject *")
Victor Stinner049e5092014-08-17 22:20:00 +02002300 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002301 Translation table, which must be a bytes object of length 256.
2302 [
2303 deletechars: object
2304 ]
2305 /
2306
2307Return a copy with each character mapped by the given translation table.
2308
2309All characters occurring in the optional argument deletechars are removed.
2310The remaining characters are mapped through the given translation table.
2311[clinic start generated code]*/
2312
2313PyDoc_STRVAR(bytes_translate__doc__,
2314"translate(table, [deletechars])\n"
2315"Return a copy with each character mapped by the given translation table.\n"
2316"\n"
2317" table\n"
2318" Translation table, which must be a bytes object of length 256.\n"
2319"\n"
2320"All characters occurring in the optional argument deletechars are removed.\n"
2321"The remaining characters are mapped through the given translation table.");
2322
2323#define BYTES_TRANSLATE_METHODDEF \
2324 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, bytes_translate__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002325
2326static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002327bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1, PyObject *deletechars);
2328
2329static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002330bytes_translate(PyBytesObject *self, PyObject *args)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002331{
2332 PyObject *return_value = NULL;
2333 PyObject *table;
2334 int group_right_1 = 0;
2335 PyObject *deletechars = NULL;
2336
2337 switch (PyTuple_GET_SIZE(args)) {
2338 case 1:
2339 if (!PyArg_ParseTuple(args, "O:translate", &table))
Martin v. Löwis0efea322014-07-27 17:29:17 +02002340 goto exit;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002341 break;
2342 case 2:
2343 if (!PyArg_ParseTuple(args, "OO:translate", &table, &deletechars))
Martin v. Löwis0efea322014-07-27 17:29:17 +02002344 goto exit;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002345 group_right_1 = 1;
2346 break;
2347 default:
2348 PyErr_SetString(PyExc_TypeError, "bytes.translate requires 1 to 2 arguments");
Martin v. Löwis0efea322014-07-27 17:29:17 +02002349 goto exit;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002350 }
Martin v. Löwis0efea322014-07-27 17:29:17 +02002351 return_value = bytes_translate_impl(self, table, group_right_1, deletechars);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002352
Martin v. Löwis0efea322014-07-27 17:29:17 +02002353exit:
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002354 return return_value;
2355}
2356
2357static PyObject *
2358bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1, PyObject *deletechars)
Larry Hastingsdfbeb162014-10-13 10:39:41 +01002359/*[clinic end generated code: output=f0f29a57f41df5d8 input=d8fa5519d7cc4be7]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002360{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002361 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002362 Py_buffer table_view = {NULL, NULL};
2363 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002364 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002365 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002366 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002367 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002368 Py_ssize_t inlen, tablen, dellen = 0;
2369 PyObject *result;
2370 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002371
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002372 if (PyBytes_Check(table)) {
2373 table_chars = PyBytes_AS_STRING(table);
2374 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002375 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002376 else if (table == Py_None) {
2377 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002378 tablen = 256;
2379 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002380 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002381 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002382 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002383 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002384 tablen = table_view.len;
2385 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002386
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002387 if (tablen != 256) {
2388 PyErr_SetString(PyExc_ValueError,
2389 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002390 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002391 return NULL;
2392 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002393
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002394 if (deletechars != NULL) {
2395 if (PyBytes_Check(deletechars)) {
2396 del_table_chars = PyBytes_AS_STRING(deletechars);
2397 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002398 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002399 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002400 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002401 PyBuffer_Release(&table_view);
2402 return NULL;
2403 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002404 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002405 dellen = del_table_view.len;
2406 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002407 }
2408 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002409 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002410 dellen = 0;
2411 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002412
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002413 inlen = PyBytes_GET_SIZE(input_obj);
2414 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002415 if (result == NULL) {
2416 PyBuffer_Release(&del_table_view);
2417 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002418 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002419 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002420 output_start = output = PyBytes_AsString(result);
2421 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002422
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002423 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002424 /* If no deletions are required, use faster code */
2425 for (i = inlen; --i >= 0; ) {
2426 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002427 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002428 changed = 1;
2429 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002430 if (!changed && PyBytes_CheckExact(input_obj)) {
2431 Py_INCREF(input_obj);
2432 Py_DECREF(result);
2433 result = input_obj;
2434 }
2435 PyBuffer_Release(&del_table_view);
2436 PyBuffer_Release(&table_view);
2437 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002438 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002439
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002440 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002441 for (i = 0; i < 256; i++)
2442 trans_table[i] = Py_CHARMASK(i);
2443 } else {
2444 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002445 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002446 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002447 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002448
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002449 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002450 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002451 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002452
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002453 for (i = inlen; --i >= 0; ) {
2454 c = Py_CHARMASK(*input++);
2455 if (trans_table[c] != -1)
2456 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2457 continue;
2458 changed = 1;
2459 }
2460 if (!changed && PyBytes_CheckExact(input_obj)) {
2461 Py_DECREF(result);
2462 Py_INCREF(input_obj);
2463 return input_obj;
2464 }
2465 /* Fix the size of the resulting string */
2466 if (inlen > 0)
2467 _PyBytes_Resize(&result, output - output_start);
2468 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002469}
2470
2471
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002472/*[clinic input]
2473
2474@staticmethod
2475bytes.maketrans
2476
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002477 frm: Py_buffer
2478 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002479 /
2480
2481Return a translation table useable for the bytes or bytearray translate method.
2482
2483The returned table will be one where each byte in frm is mapped to the byte at
2484the same position in to.
2485
2486The bytes objects frm and to must be of the same length.
2487[clinic start generated code]*/
2488
2489PyDoc_STRVAR(bytes_maketrans__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002490"maketrans(frm, to, /)\n"
2491"--\n"
2492"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002493"Return a translation table useable for the bytes or bytearray translate method.\n"
2494"\n"
2495"The returned table will be one where each byte in frm is mapped to the byte at\n"
2496"the same position in to.\n"
2497"\n"
2498"The bytes objects frm and to must be of the same length.");
2499
2500#define BYTES_MAKETRANS_METHODDEF \
2501 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC, bytes_maketrans__doc__},
2502
Georg Brandlabc38772009-04-12 15:51:51 +00002503static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002504bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002505
2506static PyObject *
2507bytes_maketrans(void *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00002508{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002509 PyObject *return_value = NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002510 Py_buffer frm = {NULL, NULL};
2511 Py_buffer to = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002512
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002513 if (!PyArg_ParseTuple(args,
2514 "y*y*:maketrans",
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002515 &frm, &to))
2516 goto exit;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002517 return_value = bytes_maketrans_impl(&frm, &to);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002518
2519exit:
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002520 /* Cleanup for frm */
2521 if (frm.obj)
2522 PyBuffer_Release(&frm);
2523 /* Cleanup for to */
2524 if (to.obj)
2525 PyBuffer_Release(&to);
2526
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002527 return return_value;
2528}
2529
2530static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002531bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2532/*[clinic end generated code: output=7df47390c476ac60 input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002533{
2534 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002535}
2536
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002537/* find and count characters and substrings */
2538
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002539#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002540 ((char *)memchr((const void *)(target), c, target_len))
2541
2542/* String ops must return a string. */
2543/* If the object is subclass of string, create a copy */
2544Py_LOCAL(PyBytesObject *)
2545return_self(PyBytesObject *self)
2546{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002547 if (PyBytes_CheckExact(self)) {
2548 Py_INCREF(self);
2549 return self;
2550 }
2551 return (PyBytesObject *)PyBytes_FromStringAndSize(
2552 PyBytes_AS_STRING(self),
2553 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002554}
2555
2556Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00002557countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002558{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002559 Py_ssize_t count=0;
2560 const char *start=target;
2561 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002562
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002563 while ( (start=findchar(start, end-start, c)) != NULL ) {
2564 count++;
2565 if (count >= maxcount)
2566 break;
2567 start += 1;
2568 }
2569 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002570}
2571
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002572
2573/* Algorithms for different cases of string replacement */
2574
2575/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2576Py_LOCAL(PyBytesObject *)
2577replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002578 const char *to_s, Py_ssize_t to_len,
2579 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002580{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002581 char *self_s, *result_s;
2582 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002583 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002584 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002585
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002586 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002587
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002588 /* 1 at the end plus 1 after every character;
2589 count = min(maxcount, self_len + 1) */
2590 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002591 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002592 else
2593 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2594 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002595
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002596 /* Check for overflow */
2597 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002598 assert(count > 0);
2599 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002600 PyErr_SetString(PyExc_OverflowError,
2601 "replacement bytes are too long");
2602 return NULL;
2603 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002604 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002605
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002606 if (! (result = (PyBytesObject *)
2607 PyBytes_FromStringAndSize(NULL, result_len)) )
2608 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002609
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002610 self_s = PyBytes_AS_STRING(self);
2611 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002612
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002613 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002614
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002615 /* Lay the first one down (guaranteed this will occur) */
2616 Py_MEMCPY(result_s, to_s, to_len);
2617 result_s += to_len;
2618 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002619
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002620 for (i=0; i<count; i++) {
2621 *result_s++ = *self_s++;
2622 Py_MEMCPY(result_s, to_s, to_len);
2623 result_s += to_len;
2624 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002625
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002626 /* Copy the rest of the original string */
2627 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002628
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002629 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002630}
2631
2632/* Special case for deleting a single character */
2633/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2634Py_LOCAL(PyBytesObject *)
2635replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002636 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002637{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002638 char *self_s, *result_s;
2639 char *start, *next, *end;
2640 Py_ssize_t self_len, result_len;
2641 Py_ssize_t count;
2642 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002643
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002644 self_len = PyBytes_GET_SIZE(self);
2645 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002646
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002647 count = countchar(self_s, self_len, from_c, maxcount);
2648 if (count == 0) {
2649 return return_self(self);
2650 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002651
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002652 result_len = self_len - count; /* from_len == 1 */
2653 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002654
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002655 if ( (result = (PyBytesObject *)
2656 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2657 return NULL;
2658 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002659
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002660 start = self_s;
2661 end = self_s + self_len;
2662 while (count-- > 0) {
2663 next = findchar(start, end-start, from_c);
2664 if (next == NULL)
2665 break;
2666 Py_MEMCPY(result_s, start, next-start);
2667 result_s += (next-start);
2668 start = next+1;
2669 }
2670 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002672 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002673}
2674
2675/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2676
2677Py_LOCAL(PyBytesObject *)
2678replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002679 const char *from_s, Py_ssize_t from_len,
2680 Py_ssize_t maxcount) {
2681 char *self_s, *result_s;
2682 char *start, *next, *end;
2683 Py_ssize_t self_len, result_len;
2684 Py_ssize_t count, offset;
2685 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002686
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002687 self_len = PyBytes_GET_SIZE(self);
2688 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002689
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002690 count = stringlib_count(self_s, self_len,
2691 from_s, from_len,
2692 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002693
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002694 if (count == 0) {
2695 /* no matches */
2696 return return_self(self);
2697 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002699 result_len = self_len - (count * from_len);
2700 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002701
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002702 if ( (result = (PyBytesObject *)
2703 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2704 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002705
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002706 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002707
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002708 start = self_s;
2709 end = self_s + self_len;
2710 while (count-- > 0) {
2711 offset = stringlib_find(start, end-start,
2712 from_s, from_len,
2713 0);
2714 if (offset == -1)
2715 break;
2716 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002717
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002718 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002719
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002720 result_s += (next-start);
2721 start = next+from_len;
2722 }
2723 Py_MEMCPY(result_s, start, end-start);
2724 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002725}
2726
2727/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2728Py_LOCAL(PyBytesObject *)
2729replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002730 char from_c, char to_c,
2731 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002732{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002733 char *self_s, *result_s, *start, *end, *next;
2734 Py_ssize_t self_len;
2735 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002736
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002737 /* The result string will be the same size */
2738 self_s = PyBytes_AS_STRING(self);
2739 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002740
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002741 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002742
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002743 if (next == NULL) {
2744 /* No matches; return the original string */
2745 return return_self(self);
2746 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002747
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002748 /* Need to make a new string */
2749 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2750 if (result == NULL)
2751 return NULL;
2752 result_s = PyBytes_AS_STRING(result);
2753 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002754
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002755 /* change everything in-place, starting with this one */
2756 start = result_s + (next-self_s);
2757 *start = to_c;
2758 start++;
2759 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002760
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002761 while (--maxcount > 0) {
2762 next = findchar(start, end-start, from_c);
2763 if (next == NULL)
2764 break;
2765 *next = to_c;
2766 start = next+1;
2767 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002768
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002769 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002770}
2771
2772/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2773Py_LOCAL(PyBytesObject *)
2774replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002775 const char *from_s, Py_ssize_t from_len,
2776 const char *to_s, Py_ssize_t to_len,
2777 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002778{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002779 char *result_s, *start, *end;
2780 char *self_s;
2781 Py_ssize_t self_len, offset;
2782 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002783
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002784 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002785
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002786 self_s = PyBytes_AS_STRING(self);
2787 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002788
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002789 offset = stringlib_find(self_s, self_len,
2790 from_s, from_len,
2791 0);
2792 if (offset == -1) {
2793 /* No matches; return the original string */
2794 return return_self(self);
2795 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002796
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002797 /* Need to make a new string */
2798 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2799 if (result == NULL)
2800 return NULL;
2801 result_s = PyBytes_AS_STRING(result);
2802 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002803
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002804 /* change everything in-place, starting with this one */
2805 start = result_s + offset;
2806 Py_MEMCPY(start, to_s, from_len);
2807 start += from_len;
2808 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002809
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002810 while ( --maxcount > 0) {
2811 offset = stringlib_find(start, end-start,
2812 from_s, from_len,
2813 0);
2814 if (offset==-1)
2815 break;
2816 Py_MEMCPY(start+offset, to_s, from_len);
2817 start += offset+from_len;
2818 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002819
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002820 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002821}
2822
2823/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2824Py_LOCAL(PyBytesObject *)
2825replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002826 char from_c,
2827 const char *to_s, Py_ssize_t to_len,
2828 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002829{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002830 char *self_s, *result_s;
2831 char *start, *next, *end;
2832 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002833 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002834 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002835
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002836 self_s = PyBytes_AS_STRING(self);
2837 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002838
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002839 count = countchar(self_s, self_len, from_c, maxcount);
2840 if (count == 0) {
2841 /* no matches, return unchanged */
2842 return return_self(self);
2843 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002844
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002845 /* use the difference between current and new, hence the "-1" */
2846 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002847 assert(count > 0);
2848 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002849 PyErr_SetString(PyExc_OverflowError,
2850 "replacement bytes are too long");
2851 return NULL;
2852 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002853 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002854
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002855 if ( (result = (PyBytesObject *)
2856 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2857 return NULL;
2858 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002859
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002860 start = self_s;
2861 end = self_s + self_len;
2862 while (count-- > 0) {
2863 next = findchar(start, end-start, from_c);
2864 if (next == NULL)
2865 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002866
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002867 if (next == start) {
2868 /* replace with the 'to' */
2869 Py_MEMCPY(result_s, to_s, to_len);
2870 result_s += to_len;
2871 start += 1;
2872 } else {
2873 /* copy the unchanged old then the 'to' */
2874 Py_MEMCPY(result_s, start, next-start);
2875 result_s += (next-start);
2876 Py_MEMCPY(result_s, to_s, to_len);
2877 result_s += to_len;
2878 start = next+1;
2879 }
2880 }
2881 /* Copy the remainder of the remaining string */
2882 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002883
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002884 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002885}
2886
2887/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2888Py_LOCAL(PyBytesObject *)
2889replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002890 const char *from_s, Py_ssize_t from_len,
2891 const char *to_s, Py_ssize_t to_len,
2892 Py_ssize_t maxcount) {
2893 char *self_s, *result_s;
2894 char *start, *next, *end;
2895 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002896 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002897 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002898
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002899 self_s = PyBytes_AS_STRING(self);
2900 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002901
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002902 count = stringlib_count(self_s, self_len,
2903 from_s, from_len,
2904 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002905
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002906 if (count == 0) {
2907 /* no matches, return unchanged */
2908 return return_self(self);
2909 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002910
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002911 /* Check for overflow */
2912 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002913 assert(count > 0);
2914 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002915 PyErr_SetString(PyExc_OverflowError,
2916 "replacement bytes are too long");
2917 return NULL;
2918 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002919 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002920
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002921 if ( (result = (PyBytesObject *)
2922 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2923 return NULL;
2924 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002925
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002926 start = self_s;
2927 end = self_s + self_len;
2928 while (count-- > 0) {
2929 offset = stringlib_find(start, end-start,
2930 from_s, from_len,
2931 0);
2932 if (offset == -1)
2933 break;
2934 next = start+offset;
2935 if (next == start) {
2936 /* replace with the 'to' */
2937 Py_MEMCPY(result_s, to_s, to_len);
2938 result_s += to_len;
2939 start += from_len;
2940 } else {
2941 /* copy the unchanged old then the 'to' */
2942 Py_MEMCPY(result_s, start, next-start);
2943 result_s += (next-start);
2944 Py_MEMCPY(result_s, to_s, to_len);
2945 result_s += to_len;
2946 start = next+from_len;
2947 }
2948 }
2949 /* Copy the remainder of the remaining string */
2950 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002951
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002952 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002953}
2954
2955
2956Py_LOCAL(PyBytesObject *)
2957replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002958 const char *from_s, Py_ssize_t from_len,
2959 const char *to_s, Py_ssize_t to_len,
2960 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002961{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002962 if (maxcount < 0) {
2963 maxcount = PY_SSIZE_T_MAX;
2964 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2965 /* nothing to do; return the original string */
2966 return return_self(self);
2967 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002968
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002969 if (maxcount == 0 ||
2970 (from_len == 0 && to_len == 0)) {
2971 /* nothing to do; return the original string */
2972 return return_self(self);
2973 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002974
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002975 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002976
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002977 if (from_len == 0) {
2978 /* insert the 'to' string everywhere. */
2979 /* >>> "Python".replace("", ".") */
2980 /* '.P.y.t.h.o.n.' */
2981 return replace_interleave(self, to_s, to_len, maxcount);
2982 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002983
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002984 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2985 /* point for an empty self string to generate a non-empty string */
2986 /* Special case so the remaining code always gets a non-empty string */
2987 if (PyBytes_GET_SIZE(self) == 0) {
2988 return return_self(self);
2989 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002990
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002991 if (to_len == 0) {
2992 /* delete all occurrences of 'from' string */
2993 if (from_len == 1) {
2994 return replace_delete_single_character(
2995 self, from_s[0], maxcount);
2996 } else {
2997 return replace_delete_substring(self, from_s,
2998 from_len, maxcount);
2999 }
3000 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003001
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003002 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003003
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003004 if (from_len == to_len) {
3005 if (from_len == 1) {
3006 return replace_single_character_in_place(
3007 self,
3008 from_s[0],
3009 to_s[0],
3010 maxcount);
3011 } else {
3012 return replace_substring_in_place(
3013 self, from_s, from_len, to_s, to_len,
3014 maxcount);
3015 }
3016 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003017
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003018 /* Otherwise use the more generic algorithms */
3019 if (from_len == 1) {
3020 return replace_single_character(self, from_s[0],
3021 to_s, to_len, maxcount);
3022 } else {
3023 /* len('from')>=2, len('to')>=1 */
3024 return replace_substring(self, from_s, from_len, to_s, to_len,
3025 maxcount);
3026 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003027}
3028
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003029
3030/*[clinic input]
3031bytes.replace
3032
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003033 old: Py_buffer
3034 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003035 count: Py_ssize_t = -1
3036 Maximum number of occurrences to replace.
3037 -1 (the default value) means replace all occurrences.
3038 /
3039
3040Return a copy with all occurrences of substring old replaced by new.
3041
3042If the optional argument count is given, only the first count occurrences are
3043replaced.
3044[clinic start generated code]*/
3045
3046PyDoc_STRVAR(bytes_replace__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02003047"replace($self, old, new, count=-1, /)\n"
3048"--\n"
3049"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003050"Return a copy with all occurrences of substring old replaced by new.\n"
3051"\n"
3052" count\n"
3053" Maximum number of occurrences to replace.\n"
3054" -1 (the default value) means replace all occurrences.\n"
3055"\n"
3056"If the optional argument count is given, only the first count occurrences are\n"
3057"replaced.");
3058
3059#define BYTES_REPLACE_METHODDEF \
3060 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, bytes_replace__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003061
3062static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003063bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new, Py_ssize_t count);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003064
3065static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003066bytes_replace(PyBytesObject*self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003067{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003068 PyObject *return_value = NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02003069 Py_buffer old = {NULL, NULL};
3070 Py_buffer new = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003071 Py_ssize_t count = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003072
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003073 if (!PyArg_ParseTuple(args,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003074 "y*y*|n:replace",
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003075 &old, &new, &count))
3076 goto exit;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003077 return_value = bytes_replace_impl(self, &old, &new, count);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003078
3079exit:
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003080 /* Cleanup for old */
3081 if (old.obj)
3082 PyBuffer_Release(&old);
3083 /* Cleanup for new */
3084 if (new.obj)
3085 PyBuffer_Release(&new);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003086
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003087 return return_value;
3088}
3089
3090static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003091bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new, Py_ssize_t count)
3092/*[clinic end generated code: output=f07bd9ecf29ee8d8 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003093{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003094 return (PyObject *)replace((PyBytesObject *) self,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003095 (const char *)old->buf, old->len,
3096 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003097}
3098
3099/** End DALKE **/
3100
3101/* Matches the end (direction >= 0) or start (direction < 0) of self
3102 * against substr, using the start and end arguments. Returns
3103 * -1 on error, 0 if not found and 1 if found.
3104 */
3105Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003106_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003107 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003108{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003109 Py_ssize_t len = PyBytes_GET_SIZE(self);
3110 Py_ssize_t slen;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02003111 Py_buffer sub_view = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003112 const char* sub;
3113 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003114
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003115 if (PyBytes_Check(substr)) {
3116 sub = PyBytes_AS_STRING(substr);
3117 slen = PyBytes_GET_SIZE(substr);
3118 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02003119 else {
3120 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
3121 return -1;
3122 sub = sub_view.buf;
3123 slen = sub_view.len;
3124 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003125 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003127 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003128
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003129 if (direction < 0) {
3130 /* startswith */
3131 if (start+slen > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02003132 goto notfound;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003133 } else {
3134 /* endswith */
3135 if (end-start < slen || start > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02003136 goto notfound;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003138 if (end-slen > start)
3139 start = end - slen;
3140 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02003141 if (end-start < slen)
3142 goto notfound;
3143 if (memcmp(str+start, sub, slen) != 0)
3144 goto notfound;
3145
3146 PyBuffer_Release(&sub_view);
3147 return 1;
3148
3149notfound:
3150 PyBuffer_Release(&sub_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003151 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003152}
3153
3154
3155PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003156"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003157\n\
3158Return True if B starts with the specified prefix, False otherwise.\n\
3159With optional start, test B beginning at that position.\n\
3160With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00003161prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003162
3163static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003164bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003165{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003166 Py_ssize_t start = 0;
3167 Py_ssize_t end = PY_SSIZE_T_MAX;
3168 PyObject *subobj;
3169 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003170
Jesus Ceaac451502011-04-20 17:09:23 +02003171 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003172 return NULL;
3173 if (PyTuple_Check(subobj)) {
3174 Py_ssize_t i;
3175 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3176 result = _bytes_tailmatch(self,
3177 PyTuple_GET_ITEM(subobj, i),
3178 start, end, -1);
3179 if (result == -1)
3180 return NULL;
3181 else if (result) {
3182 Py_RETURN_TRUE;
3183 }
3184 }
3185 Py_RETURN_FALSE;
3186 }
3187 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03003188 if (result == -1) {
3189 if (PyErr_ExceptionMatches(PyExc_TypeError))
3190 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
3191 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003192 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03003193 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003194 else
3195 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003196}
3197
3198
3199PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003200"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003201\n\
3202Return True if B ends with the specified suffix, False otherwise.\n\
3203With optional start, test B beginning at that position.\n\
3204With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00003205suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003206
3207static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003208bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003209{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003210 Py_ssize_t start = 0;
3211 Py_ssize_t end = PY_SSIZE_T_MAX;
3212 PyObject *subobj;
3213 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003214
Jesus Ceaac451502011-04-20 17:09:23 +02003215 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003216 return NULL;
3217 if (PyTuple_Check(subobj)) {
3218 Py_ssize_t i;
3219 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3220 result = _bytes_tailmatch(self,
3221 PyTuple_GET_ITEM(subobj, i),
3222 start, end, +1);
3223 if (result == -1)
3224 return NULL;
3225 else if (result) {
3226 Py_RETURN_TRUE;
3227 }
3228 }
3229 Py_RETURN_FALSE;
3230 }
3231 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03003232 if (result == -1) {
3233 if (PyErr_ExceptionMatches(PyExc_TypeError))
3234 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
3235 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003236 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03003237 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003238 else
3239 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003240}
3241
3242
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003243/*[clinic input]
3244bytes.decode
3245
3246 encoding: str(c_default="NULL") = 'utf-8'
3247 The encoding with which to decode the bytes.
3248 errors: str(c_default="NULL") = 'strict'
3249 The error handling scheme to use for the handling of decoding errors.
3250 The default is 'strict' meaning that decoding errors raise a
3251 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
3252 as well as any other name registered with codecs.register_error that
3253 can handle UnicodeDecodeErrors.
3254
3255Decode the bytes using the codec registered for encoding.
3256[clinic start generated code]*/
3257
3258PyDoc_STRVAR(bytes_decode__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02003259"decode($self, /, encoding=\'utf-8\', errors=\'strict\')\n"
3260"--\n"
3261"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003262"Decode the bytes using the codec registered for encoding.\n"
3263"\n"
3264" encoding\n"
3265" The encoding with which to decode the bytes.\n"
3266" errors\n"
3267" The error handling scheme to use for the handling of decoding errors.\n"
3268" The default is \'strict\' meaning that decoding errors raise a\n"
3269" UnicodeDecodeError. Other possible values are \'ignore\' and \'replace\'\n"
3270" as well as any other name registered with codecs.register_error that\n"
3271" can handle UnicodeDecodeErrors.");
3272
3273#define BYTES_DECODE_METHODDEF \
3274 {"decode", (PyCFunction)bytes_decode, METH_VARARGS|METH_KEYWORDS, bytes_decode__doc__},
3275
3276static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003277bytes_decode_impl(PyBytesObject*self, const char *encoding, const char *errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00003278
3279static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003280bytes_decode(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00003281{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003282 PyObject *return_value = NULL;
3283 static char *_keywords[] = {"encoding", "errors", NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003284 const char *encoding = NULL;
3285 const char *errors = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +00003286
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003287 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
3288 "|ss:decode", _keywords,
3289 &encoding, &errors))
3290 goto exit;
3291 return_value = bytes_decode_impl(self, encoding, errors);
3292
3293exit:
3294 return return_value;
3295}
3296
3297static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003298bytes_decode_impl(PyBytesObject*self, const char *encoding, const char *errors)
3299/*[clinic end generated code: output=61a80290bbfce696 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003300{
Martin v. Löwis0efea322014-07-27 17:29:17 +02003301 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00003302}
3303
Guido van Rossum20188312006-05-05 15:15:40 +00003304
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003305/*[clinic input]
3306bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003307
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003308 keepends: int(py_default="False") = 0
3309
3310Return a list of the lines in the bytes, breaking at line boundaries.
3311
3312Line breaks are not included in the resulting list unless keepends is given and
3313true.
3314[clinic start generated code]*/
3315
3316PyDoc_STRVAR(bytes_splitlines__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02003317"splitlines($self, /, keepends=False)\n"
3318"--\n"
3319"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003320"Return a list of the lines in the bytes, breaking at line boundaries.\n"
3321"\n"
3322"Line breaks are not included in the resulting list unless keepends is given and\n"
3323"true.");
3324
3325#define BYTES_SPLITLINES_METHODDEF \
3326 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS|METH_KEYWORDS, bytes_splitlines__doc__},
3327
3328static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003329bytes_splitlines_impl(PyBytesObject*self, int keepends);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003330
3331static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003332bytes_splitlines(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003333{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003334 PyObject *return_value = NULL;
3335 static char *_keywords[] = {"keepends", NULL};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003336 int keepends = 0;
3337
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003338 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
3339 "|i:splitlines", _keywords,
3340 &keepends))
3341 goto exit;
3342 return_value = bytes_splitlines_impl(self, keepends);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003343
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003344exit:
3345 return return_value;
3346}
3347
3348static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003349bytes_splitlines_impl(PyBytesObject*self, int keepends)
3350/*[clinic end generated code: output=79da057d05d126de input=ddb93e3351080c8c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003351{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003352 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00003353 (PyObject*) self, PyBytes_AS_STRING(self),
3354 PyBytes_GET_SIZE(self), keepends
3355 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003356}
3357
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003358static int
Victor Stinner6430fd52011-09-29 04:02:13 +02003359hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003360{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003361 if (c >= 128)
3362 return -1;
David Malcolm96960882010-11-05 17:23:41 +00003363 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003364 return c - '0';
3365 else {
David Malcolm96960882010-11-05 17:23:41 +00003366 if (Py_ISUPPER(c))
3367 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003368 if (c >= 'a' && c <= 'f')
3369 return c - 'a' + 10;
3370 }
3371 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003372}
3373
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003374/*[clinic input]
3375@classmethod
3376bytes.fromhex
3377
3378 string: unicode
3379 /
3380
3381Create a bytes object from a string of hexadecimal numbers.
3382
3383Spaces between two numbers are accepted.
3384Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
3385[clinic start generated code]*/
3386
3387PyDoc_STRVAR(bytes_fromhex__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02003388"fromhex($type, string, /)\n"
3389"--\n"
3390"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003391"Create a bytes object from a string of hexadecimal numbers.\n"
3392"\n"
3393"Spaces between two numbers are accepted.\n"
Martin v. Löwis0efea322014-07-27 17:29:17 +02003394"Example: bytes.fromhex(\'B9 01EF\') -> b\'\\\\xb9\\\\x01\\\\xef\'.");
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003395
3396#define BYTES_FROMHEX_METHODDEF \
3397 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS, bytes_fromhex__doc__},
3398
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003399static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003400bytes_fromhex_impl(PyTypeObject *type, PyObject *string);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003401
3402static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003403bytes_fromhex(PyTypeObject *type, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003404{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003405 PyObject *return_value = NULL;
3406 PyObject *string;
3407
3408 if (!PyArg_ParseTuple(args,
3409 "U:fromhex",
3410 &string))
3411 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02003412 return_value = bytes_fromhex_impl(type, string);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003413
3414exit:
3415 return return_value;
3416}
3417
3418static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003419bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
3420/*[clinic end generated code: output=09e6cbef56cbbb65 input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003421{
3422 PyObject *newstring;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003423 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003424 Py_ssize_t hexlen, byteslen, i, j;
3425 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003426 void *data;
3427 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003428
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003429 assert(PyUnicode_Check(string));
3430 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003431 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003432 kind = PyUnicode_KIND(string);
3433 data = PyUnicode_DATA(string);
3434 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003435
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003436 byteslen = hexlen/2; /* This overestimates if there are spaces */
3437 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
3438 if (!newstring)
3439 return NULL;
3440 buf = PyBytes_AS_STRING(newstring);
3441 for (i = j = 0; i < hexlen; i += 2) {
3442 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003443 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003444 i++;
3445 if (i >= hexlen)
3446 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003447 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
3448 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003449 if (top == -1 || bot == -1) {
3450 PyErr_Format(PyExc_ValueError,
3451 "non-hexadecimal number found in "
3452 "fromhex() arg at position %zd", i);
3453 goto error;
3454 }
3455 buf[j++] = (top << 4) + bot;
3456 }
3457 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
3458 goto error;
3459 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003460
3461 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003462 Py_XDECREF(newstring);
3463 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003464}
3465
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003466/*[clinic input]
3467bytes.__sizeof__ as bytes_sizeof
3468
3469 self: self(type="PyBytesObject *")
3470
3471Returns the size of the bytes object in memory, in bytes.
3472[clinic start generated code]*/
3473
3474PyDoc_STRVAR(bytes_sizeof__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02003475"__sizeof__($self, /)\n"
3476"--\n"
3477"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003478"Returns the size of the bytes object in memory, in bytes.");
3479
3480#define BYTES_SIZEOF_METHODDEF \
3481 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS, bytes_sizeof__doc__},
Martin v. Löwis00709aa2008-06-04 14:18:43 +00003482
3483static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003484bytes_sizeof_impl(PyBytesObject *self);
3485
3486static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003487bytes_sizeof(PyBytesObject *self, PyObject *Py_UNUSED(ignored))
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003488{
Martin v. Löwis0efea322014-07-27 17:29:17 +02003489 return bytes_sizeof_impl(self);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003490}
3491
3492static PyObject *
3493bytes_sizeof_impl(PyBytesObject *self)
Martin v. Löwis0efea322014-07-27 17:29:17 +02003494/*[clinic end generated code: output=44933279343f24ae input=bee4c64bb42078ed]*/
Martin v. Löwis00709aa2008-06-04 14:18:43 +00003495{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003496 Py_ssize_t res;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003497 res = PyBytesObject_SIZE + Py_SIZE(self) * Py_TYPE(self)->tp_itemsize;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003498 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00003499}
3500
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003501
3502static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003503bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003504{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003505 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003506}
3507
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003508
3509static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003510bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003511 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
3512 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3513 _Py_capitalize__doc__},
3514 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3515 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003516 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003517 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
3518 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02003519 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003520 expandtabs__doc__},
3521 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003522 BYTES_FROMHEX_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003523 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3524 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3525 _Py_isalnum__doc__},
3526 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3527 _Py_isalpha__doc__},
3528 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3529 _Py_isdigit__doc__},
3530 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3531 _Py_islower__doc__},
3532 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3533 _Py_isspace__doc__},
3534 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3535 _Py_istitle__doc__},
3536 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3537 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003538 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003539 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3540 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003541 BYTES_LSTRIP_METHODDEF
3542 BYTES_MAKETRANS_METHODDEF
3543 BYTES_PARTITION_METHODDEF
3544 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003545 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3546 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3547 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003548 BYTES_RPARTITION_METHODDEF
3549 BYTES_RSPLIT_METHODDEF
3550 BYTES_RSTRIP_METHODDEF
3551 BYTES_SPLIT_METHODDEF
3552 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003553 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
3554 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003555 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003556 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3557 _Py_swapcase__doc__},
3558 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003559 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003560 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3561 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003562 BYTES_SIZEOF_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003563 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003564};
3565
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003566static PyObject *
Ethan Furmanb95b5612015-01-23 20:05:18 -08003567bytes_mod(PyObject *v, PyObject *w)
3568{
3569 if (!PyBytes_Check(v))
3570 Py_RETURN_NOTIMPLEMENTED;
3571 return _PyBytes_Format(v, w);
3572}
3573
3574static PyNumberMethods bytes_as_number = {
3575 0, /*nb_add*/
3576 0, /*nb_subtract*/
3577 0, /*nb_multiply*/
3578 bytes_mod, /*nb_remainder*/
3579};
3580
3581static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003582str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3583
3584static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003585bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003586{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003587 PyObject *x = NULL;
3588 const char *encoding = NULL;
3589 const char *errors = NULL;
3590 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003591 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003592 Py_ssize_t size;
3593 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003594 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003595
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003596 if (type != &PyBytes_Type)
3597 return str_subtype_new(type, args, kwds);
3598 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3599 &encoding, &errors))
3600 return NULL;
3601 if (x == NULL) {
3602 if (encoding != NULL || errors != NULL) {
3603 PyErr_SetString(PyExc_TypeError,
3604 "encoding or errors without sequence "
3605 "argument");
3606 return NULL;
3607 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003608 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003609 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003610
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003611 if (PyUnicode_Check(x)) {
3612 /* Encode via the codec registry */
3613 if (encoding == NULL) {
3614 PyErr_SetString(PyExc_TypeError,
3615 "string argument without an encoding");
3616 return NULL;
3617 }
3618 new = PyUnicode_AsEncodedString(x, encoding, errors);
3619 if (new == NULL)
3620 return NULL;
3621 assert(PyBytes_Check(new));
3622 return new;
3623 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003624
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003625 /* If it's not unicode, there can't be encoding or errors */
3626 if (encoding != NULL || errors != NULL) {
3627 PyErr_SetString(PyExc_TypeError,
3628 "encoding or errors without a string argument");
3629 return NULL;
3630 }
3631
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003632 /* We'd like to call PyObject_Bytes here, but we need to check for an
3633 integer argument before deferring to PyBytes_FromObject, something
3634 PyObject_Bytes doesn't do. */
3635 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3636 if (func != NULL) {
3637 new = PyObject_CallFunctionObjArgs(func, NULL);
3638 Py_DECREF(func);
3639 if (new == NULL)
3640 return NULL;
3641 if (!PyBytes_Check(new)) {
3642 PyErr_Format(PyExc_TypeError,
3643 "__bytes__ returned non-bytes (type %.200s)",
3644 Py_TYPE(new)->tp_name);
3645 Py_DECREF(new);
3646 return NULL;
3647 }
3648 return new;
3649 }
3650 else if (PyErr_Occurred())
3651 return NULL;
3652
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003653 /* Is it an integer? */
3654 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3655 if (size == -1 && PyErr_Occurred()) {
3656 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3657 return NULL;
3658 PyErr_Clear();
3659 }
3660 else if (size < 0) {
3661 PyErr_SetString(PyExc_ValueError, "negative count");
3662 return NULL;
3663 }
3664 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003665 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003666 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003667 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003668 return new;
3669 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003670
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003671 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003672}
3673
3674PyObject *
3675PyBytes_FromObject(PyObject *x)
3676{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003677 PyObject *new, *it;
3678 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003679
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003680 if (x == NULL) {
3681 PyErr_BadInternalCall();
3682 return NULL;
3683 }
Larry Hastingsca28e992012-05-24 22:58:30 -07003684
3685 if (PyBytes_CheckExact(x)) {
3686 Py_INCREF(x);
3687 return x;
3688 }
3689
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003690 /* Use the modern buffer interface */
3691 if (PyObject_CheckBuffer(x)) {
3692 Py_buffer view;
3693 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3694 return NULL;
3695 new = PyBytes_FromStringAndSize(NULL, view.len);
3696 if (!new)
3697 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003698 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3699 &view, view.len, 'C') < 0)
3700 goto fail;
3701 PyBuffer_Release(&view);
3702 return new;
3703 fail:
3704 Py_XDECREF(new);
3705 PyBuffer_Release(&view);
3706 return NULL;
3707 }
3708 if (PyUnicode_Check(x)) {
3709 PyErr_SetString(PyExc_TypeError,
3710 "cannot convert unicode object to bytes");
3711 return NULL;
3712 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003713
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003714 if (PyList_CheckExact(x)) {
3715 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3716 if (new == NULL)
3717 return NULL;
3718 for (i = 0; i < Py_SIZE(x); i++) {
3719 Py_ssize_t value = PyNumber_AsSsize_t(
3720 PyList_GET_ITEM(x, i), PyExc_ValueError);
3721 if (value == -1 && PyErr_Occurred()) {
3722 Py_DECREF(new);
3723 return NULL;
3724 }
3725 if (value < 0 || value >= 256) {
3726 PyErr_SetString(PyExc_ValueError,
3727 "bytes must be in range(0, 256)");
3728 Py_DECREF(new);
3729 return NULL;
3730 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003731 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003732 }
3733 return new;
3734 }
3735 if (PyTuple_CheckExact(x)) {
3736 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3737 if (new == NULL)
3738 return NULL;
3739 for (i = 0; i < Py_SIZE(x); i++) {
3740 Py_ssize_t value = PyNumber_AsSsize_t(
3741 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
3742 if (value == -1 && PyErr_Occurred()) {
3743 Py_DECREF(new);
3744 return NULL;
3745 }
3746 if (value < 0 || value >= 256) {
3747 PyErr_SetString(PyExc_ValueError,
3748 "bytes must be in range(0, 256)");
3749 Py_DECREF(new);
3750 return NULL;
3751 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003752 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003753 }
3754 return new;
3755 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00003756
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003757 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003758 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003759 if (size == -1 && PyErr_Occurred())
3760 return NULL;
3761 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
3762 returning a shared empty bytes string. This required because we
3763 want to call _PyBytes_Resize() the returned object, which we can
3764 only do on bytes objects with refcount == 1. */
Victor Stinner88d146b2014-08-17 21:12:18 +02003765 if (size == 0)
3766 size = 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003767 new = PyBytes_FromStringAndSize(NULL, size);
3768 if (new == NULL)
3769 return NULL;
Victor Stinner88d146b2014-08-17 21:12:18 +02003770 assert(Py_REFCNT(new) == 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003771
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003772 /* Get the iterator */
3773 it = PyObject_GetIter(x);
3774 if (it == NULL)
3775 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003776
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003777 /* Run the iterator to exhaustion */
3778 for (i = 0; ; i++) {
3779 PyObject *item;
3780 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003781
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003782 /* Get the next item */
3783 item = PyIter_Next(it);
3784 if (item == NULL) {
3785 if (PyErr_Occurred())
3786 goto error;
3787 break;
3788 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003789
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003790 /* Interpret it as an int (__index__) */
3791 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3792 Py_DECREF(item);
3793 if (value == -1 && PyErr_Occurred())
3794 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003795
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003796 /* Range check */
3797 if (value < 0 || value >= 256) {
3798 PyErr_SetString(PyExc_ValueError,
3799 "bytes must be in range(0, 256)");
3800 goto error;
3801 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003802
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003803 /* Append the byte */
3804 if (i >= size) {
3805 size = 2 * size + 1;
3806 if (_PyBytes_Resize(&new, size) < 0)
3807 goto error;
3808 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003809 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003810 }
3811 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003812
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003813 /* Clean up and return success */
3814 Py_DECREF(it);
3815 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003816
3817 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003818 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01003819 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003820 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003821}
3822
3823static PyObject *
3824str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3825{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003826 PyObject *tmp, *pnew;
3827 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003828
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003829 assert(PyType_IsSubtype(type, &PyBytes_Type));
3830 tmp = bytes_new(&PyBytes_Type, args, kwds);
3831 if (tmp == NULL)
3832 return NULL;
3833 assert(PyBytes_CheckExact(tmp));
3834 n = PyBytes_GET_SIZE(tmp);
3835 pnew = type->tp_alloc(type, n);
3836 if (pnew != NULL) {
3837 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3838 PyBytes_AS_STRING(tmp), n+1);
3839 ((PyBytesObject *)pnew)->ob_shash =
3840 ((PyBytesObject *)tmp)->ob_shash;
3841 }
3842 Py_DECREF(tmp);
3843 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003844}
3845
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003846PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003847"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003848bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003849bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003850bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3851bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003852\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003853Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003854 - an iterable yielding integers in range(256)\n\
3855 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003856 - any object implementing the buffer API.\n\
3857 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003858
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003859static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003860
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003861PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003862 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3863 "bytes",
3864 PyBytesObject_SIZE,
3865 sizeof(char),
3866 bytes_dealloc, /* tp_dealloc */
3867 0, /* tp_print */
3868 0, /* tp_getattr */
3869 0, /* tp_setattr */
3870 0, /* tp_reserved */
3871 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08003872 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003873 &bytes_as_sequence, /* tp_as_sequence */
3874 &bytes_as_mapping, /* tp_as_mapping */
3875 (hashfunc)bytes_hash, /* tp_hash */
3876 0, /* tp_call */
3877 bytes_str, /* tp_str */
3878 PyObject_GenericGetAttr, /* tp_getattro */
3879 0, /* tp_setattro */
3880 &bytes_as_buffer, /* tp_as_buffer */
3881 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3882 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3883 bytes_doc, /* tp_doc */
3884 0, /* tp_traverse */
3885 0, /* tp_clear */
3886 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3887 0, /* tp_weaklistoffset */
3888 bytes_iter, /* tp_iter */
3889 0, /* tp_iternext */
3890 bytes_methods, /* tp_methods */
3891 0, /* tp_members */
3892 0, /* tp_getset */
3893 &PyBaseObject_Type, /* tp_base */
3894 0, /* tp_dict */
3895 0, /* tp_descr_get */
3896 0, /* tp_descr_set */
3897 0, /* tp_dictoffset */
3898 0, /* tp_init */
3899 0, /* tp_alloc */
3900 bytes_new, /* tp_new */
3901 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003902};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003903
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003904void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003905PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003906{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003907 assert(pv != NULL);
3908 if (*pv == NULL)
3909 return;
3910 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003911 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003912 return;
3913 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003914
3915 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3916 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003917 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003918 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02003919
Antoine Pitrou161d6952014-05-01 14:36:20 +02003920 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003921 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02003922 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3923 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3924 Py_CLEAR(*pv);
3925 return;
3926 }
3927
3928 oldsize = PyBytes_GET_SIZE(*pv);
3929 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3930 PyErr_NoMemory();
3931 goto error;
3932 }
3933 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3934 goto error;
3935
3936 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3937 PyBuffer_Release(&wb);
3938 return;
3939
3940 error:
3941 PyBuffer_Release(&wb);
3942 Py_CLEAR(*pv);
3943 return;
3944 }
3945
3946 else {
3947 /* Multiple references, need to create new object */
3948 PyObject *v;
3949 v = bytes_concat(*pv, w);
3950 Py_DECREF(*pv);
3951 *pv = v;
3952 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003953}
3954
3955void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003956PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003957{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003958 PyBytes_Concat(pv, w);
3959 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003960}
3961
3962
Ethan Furmanb95b5612015-01-23 20:05:18 -08003963/* The following function breaks the notion that bytes are immutable:
3964 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003965 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08003966 as creating a new bytes object and destroying the old one, only
3967 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003968 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08003969 Note that if there's not enough memory to resize the bytes object, the
3970 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003971 memory" exception is set, and -1 is returned. Else (on success) 0 is
3972 returned, and the value in *pv may or may not be the same as on input.
3973 As always, an extra byte is allocated for a trailing \0 byte (newsize
3974 does *not* include that), and a trailing \0 byte is stored.
3975*/
3976
3977int
3978_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3979{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003980 PyObject *v;
3981 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003982 v = *pv;
3983 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3984 *pv = 0;
3985 Py_DECREF(v);
3986 PyErr_BadInternalCall();
3987 return -1;
3988 }
3989 /* XXX UNREF/NEWREF interface should be more symmetrical */
3990 _Py_DEC_REFTOTAL;
3991 _Py_ForgetReference(v);
3992 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003993 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003994 if (*pv == NULL) {
3995 PyObject_Del(v);
3996 PyErr_NoMemory();
3997 return -1;
3998 }
3999 _Py_NewReference(*pv);
4000 sv = (PyBytesObject *) *pv;
4001 Py_SIZE(sv) = newsize;
4002 sv->ob_sval[newsize] = '\0';
4003 sv->ob_shash = -1; /* invalidate cached hash value */
4004 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004005}
4006
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004007void
4008PyBytes_Fini(void)
4009{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004010 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02004011 for (i = 0; i < UCHAR_MAX + 1; i++)
4012 Py_CLEAR(characters[i]);
4013 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004014}
4015
Benjamin Peterson4116f362008-05-27 00:36:20 +00004016/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004017
4018typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004019 PyObject_HEAD
4020 Py_ssize_t it_index;
4021 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004022} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004023
4024static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004025striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004026{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004027 _PyObject_GC_UNTRACK(it);
4028 Py_XDECREF(it->it_seq);
4029 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004030}
4031
4032static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004033striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004034{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004035 Py_VISIT(it->it_seq);
4036 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004037}
4038
4039static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004040striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004041{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004042 PyBytesObject *seq;
4043 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004044
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004045 assert(it != NULL);
4046 seq = it->it_seq;
4047 if (seq == NULL)
4048 return NULL;
4049 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004050
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004051 if (it->it_index < PyBytes_GET_SIZE(seq)) {
4052 item = PyLong_FromLong(
4053 (unsigned char)seq->ob_sval[it->it_index]);
4054 if (item != NULL)
4055 ++it->it_index;
4056 return item;
4057 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004058
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004059 Py_DECREF(seq);
4060 it->it_seq = NULL;
4061 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004062}
4063
4064static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004065striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004066{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004067 Py_ssize_t len = 0;
4068 if (it->it_seq)
4069 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
4070 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004071}
4072
4073PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004074 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004075
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004076static PyObject *
4077striter_reduce(striterobject *it)
4078{
4079 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02004080 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004081 it->it_seq, it->it_index);
4082 } else {
4083 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
4084 if (u == NULL)
4085 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02004086 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004087 }
4088}
4089
4090PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
4091
4092static PyObject *
4093striter_setstate(striterobject *it, PyObject *state)
4094{
4095 Py_ssize_t index = PyLong_AsSsize_t(state);
4096 if (index == -1 && PyErr_Occurred())
4097 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00004098 if (it->it_seq != NULL) {
4099 if (index < 0)
4100 index = 0;
4101 else if (index > PyBytes_GET_SIZE(it->it_seq))
4102 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
4103 it->it_index = index;
4104 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004105 Py_RETURN_NONE;
4106}
4107
4108PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
4109
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004110static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004111 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
4112 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004113 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
4114 reduce_doc},
4115 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
4116 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004117 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004118};
4119
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004120PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004121 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4122 "bytes_iterator", /* tp_name */
4123 sizeof(striterobject), /* tp_basicsize */
4124 0, /* tp_itemsize */
4125 /* methods */
4126 (destructor)striter_dealloc, /* tp_dealloc */
4127 0, /* tp_print */
4128 0, /* tp_getattr */
4129 0, /* tp_setattr */
4130 0, /* tp_reserved */
4131 0, /* tp_repr */
4132 0, /* tp_as_number */
4133 0, /* tp_as_sequence */
4134 0, /* tp_as_mapping */
4135 0, /* tp_hash */
4136 0, /* tp_call */
4137 0, /* tp_str */
4138 PyObject_GenericGetAttr, /* tp_getattro */
4139 0, /* tp_setattro */
4140 0, /* tp_as_buffer */
4141 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
4142 0, /* tp_doc */
4143 (traverseproc)striter_traverse, /* tp_traverse */
4144 0, /* tp_clear */
4145 0, /* tp_richcompare */
4146 0, /* tp_weaklistoffset */
4147 PyObject_SelfIter, /* tp_iter */
4148 (iternextfunc)striter_next, /* tp_iternext */
4149 striter_methods, /* tp_methods */
4150 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004151};
4152
4153static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00004154bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004155{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004156 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004157
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004158 if (!PyBytes_Check(seq)) {
4159 PyErr_BadInternalCall();
4160 return NULL;
4161 }
4162 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
4163 if (it == NULL)
4164 return NULL;
4165 it->it_index = 0;
4166 Py_INCREF(seq);
4167 it->it_seq = (PyBytesObject *)seq;
4168 _PyObject_GC_TRACK(it);
4169 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004170}