blob: 4d6b3e4abe150d76acf203efbee82d5c9a209c36 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020010/*[clinic input]
Martin v. Löwis0efea322014-07-27 17:29:17 +020011class bytes "PyBytesObject*" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020012[clinic start generated code]*/
Martin v. Löwis0efea322014-07-27 17:29:17 +020013/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020014
Christian Heimes2c9c7a52008-05-26 13:42:13 +000015#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000016Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000017#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000018
Christian Heimes2c9c7a52008-05-26 13:42:13 +000019static PyBytesObject *characters[UCHAR_MAX + 1];
20static PyBytesObject *nullstring;
21
Mark Dickinsonfd24b322008-12-06 15:33:31 +000022/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
23 for a string of length n should request PyBytesObject_SIZE + n bytes.
24
25 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
26 3 bytes per string allocation on a typical system.
27*/
28#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
29
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000031 For PyBytes_FromString(), the parameter `str' points to a null-terminated
32 string containing exactly `size' bytes.
33
34 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000042 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043 alter the data yourself, since the strings may be shared.
44
45 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020047 allocated for string data, not counting the null terminating character.
48 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000049 PyBytes_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyBytes_FromString()).
51*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020052static PyObject *
53_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000054{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020055 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020056 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020057
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000058 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000059#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000060 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000061#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000062 Py_INCREF(op);
63 return (PyObject *)op;
64 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000065
Victor Stinner049e5092014-08-17 22:20:00 +020066 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 PyErr_SetString(PyExc_OverflowError,
68 "byte string is too large");
69 return NULL;
70 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020073 if (use_calloc)
74 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
75 else
76 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 if (op == NULL)
78 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010079 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020081 if (!use_calloc)
82 op->ob_sval[size] = '\0';
83 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 if (size == 0) {
85 nullstring = op;
86 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020087 }
88 return (PyObject *) op;
89}
90
91PyObject *
92PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
93{
94 PyBytesObject *op;
95 if (size < 0) {
96 PyErr_SetString(PyExc_SystemError,
97 "Negative size passed to PyBytes_FromStringAndSize");
98 return NULL;
99 }
100 if (size == 1 && str != NULL &&
101 (op = characters[*str & UCHAR_MAX]) != NULL)
102 {
103#ifdef COUNT_ALLOCS
104 one_strings++;
105#endif
106 Py_INCREF(op);
107 return (PyObject *)op;
108 }
109
110 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
111 if (op == NULL)
112 return NULL;
113 if (str == NULL)
114 return (PyObject *) op;
115
116 Py_MEMCPY(op->ob_sval, str, size);
117 /* share short strings */
118 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 characters[*str & UCHAR_MAX] = op;
120 Py_INCREF(op);
121 }
122 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000123}
124
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000125PyObject *
126PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000127{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200128 size_t size;
129 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000130
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000131 assert(str != NULL);
132 size = strlen(str);
133 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
134 PyErr_SetString(PyExc_OverflowError,
135 "byte string is too long");
136 return NULL;
137 }
138 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000139#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000140 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000141#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000142 Py_INCREF(op);
143 return (PyObject *)op;
144 }
145 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000146#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000148#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 Py_INCREF(op);
150 return (PyObject *)op;
151 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000152
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000153 /* Inline PyObject_NewVar */
154 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
155 if (op == NULL)
156 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100157 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000158 op->ob_shash = -1;
159 Py_MEMCPY(op->ob_sval, str, size+1);
160 /* share short strings */
161 if (size == 0) {
162 nullstring = op;
163 Py_INCREF(op);
164 } else if (size == 1) {
165 characters[*str & UCHAR_MAX] = op;
166 Py_INCREF(op);
167 }
168 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000169}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000170
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000171PyObject *
172PyBytes_FromFormatV(const char *format, va_list vargs)
173{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000174 va_list count;
175 Py_ssize_t n = 0;
176 const char* f;
177 char *s;
178 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000179
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000180 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 /* step 1: figure out how large a buffer we need */
182 for (f = format; *f; f++) {
183 if (*f == '%') {
184 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000185 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000186 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000187
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000188 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
189 * they don't affect the amount of space we reserve.
190 */
191 if ((*f == 'l' || *f == 'z') &&
192 (f[1] == 'd' || f[1] == 'u'))
193 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000194
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000195 switch (*f) {
196 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100197 {
198 int c = va_arg(count, int);
199 if (c < 0 || c > 255) {
200 PyErr_SetString(PyExc_OverflowError,
201 "PyBytes_FromFormatV(): %c format "
202 "expects an integer in range [0; 255]");
203 return NULL;
204 }
205 n++;
206 break;
207 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000208 case '%':
209 n++;
210 break;
211 case 'd': case 'u': case 'i': case 'x':
212 (void) va_arg(count, int);
213 /* 20 bytes is enough to hold a 64-bit
214 integer. Decimal takes the most space.
215 This isn't enough for octal. */
216 n += 20;
217 break;
218 case 's':
219 s = va_arg(count, char*);
220 n += strlen(s);
221 break;
222 case 'p':
223 (void) va_arg(count, int);
224 /* maximum 64-bit pointer representation:
225 * 0xffffffffffffffff
226 * so 19 characters is enough.
227 * XXX I count 18 -- what's the extra for?
228 */
229 n += 19;
230 break;
231 default:
232 /* if we stumble upon an unknown
233 formatting code, copy the rest of
234 the format string to the output
235 string. (we cannot just skip the
236 code, since there's no way to know
237 what's in the argument list) */
238 n += strlen(p);
239 goto expand;
240 }
241 } else
242 n++;
243 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000244 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000245 /* step 2: fill the buffer */
246 /* Since we've analyzed how much space we need for the worst case,
247 use sprintf directly instead of the slower PyOS_snprintf. */
248 string = PyBytes_FromStringAndSize(NULL, n);
249 if (!string)
250 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000251
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000253
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000254 for (f = format; *f; f++) {
255 if (*f == '%') {
256 const char* p = f++;
257 Py_ssize_t i;
258 int longflag = 0;
259 int size_tflag = 0;
260 /* parse the width.precision part (we're only
261 interested in the precision value, if any) */
262 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000263 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 n = (n*10) + *f++ - '0';
265 if (*f == '.') {
266 f++;
267 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000268 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000269 n = (n*10) + *f++ - '0';
270 }
David Malcolm96960882010-11-05 17:23:41 +0000271 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 f++;
273 /* handle the long flag, but only for %ld and %lu.
274 others can be added when necessary. */
275 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
276 longflag = 1;
277 ++f;
278 }
279 /* handle the size_t flag. */
280 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
281 size_tflag = 1;
282 ++f;
283 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000284
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000285 switch (*f) {
286 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100287 {
288 int c = va_arg(vargs, int);
289 /* c has been checked for overflow in the first step */
290 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000291 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100292 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000293 case 'd':
294 if (longflag)
295 sprintf(s, "%ld", va_arg(vargs, long));
296 else if (size_tflag)
297 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
298 va_arg(vargs, Py_ssize_t));
299 else
300 sprintf(s, "%d", va_arg(vargs, int));
301 s += strlen(s);
302 break;
303 case 'u':
304 if (longflag)
305 sprintf(s, "%lu",
306 va_arg(vargs, unsigned long));
307 else if (size_tflag)
308 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
309 va_arg(vargs, size_t));
310 else
311 sprintf(s, "%u",
312 va_arg(vargs, unsigned int));
313 s += strlen(s);
314 break;
315 case 'i':
316 sprintf(s, "%i", va_arg(vargs, int));
317 s += strlen(s);
318 break;
319 case 'x':
320 sprintf(s, "%x", va_arg(vargs, int));
321 s += strlen(s);
322 break;
323 case 's':
324 p = va_arg(vargs, char*);
325 i = strlen(p);
326 if (n > 0 && i > n)
327 i = n;
328 Py_MEMCPY(s, p, i);
329 s += i;
330 break;
331 case 'p':
332 sprintf(s, "%p", va_arg(vargs, void*));
333 /* %p is ill-defined: ensure leading 0x. */
334 if (s[1] == 'X')
335 s[1] = 'x';
336 else if (s[1] != 'x') {
337 memmove(s+2, s, strlen(s)+1);
338 s[0] = '0';
339 s[1] = 'x';
340 }
341 s += strlen(s);
342 break;
343 case '%':
344 *s++ = '%';
345 break;
346 default:
347 strcpy(s, p);
348 s += strlen(s);
349 goto end;
350 }
351 } else
352 *s++ = *f;
353 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000354
355 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000356 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
357 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000358}
359
360PyObject *
361PyBytes_FromFormat(const char *format, ...)
362{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000363 PyObject* ret;
364 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000365
366#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000369 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000370#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 ret = PyBytes_FromFormatV(format, vargs);
372 va_end(vargs);
373 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000374}
375
Ethan Furmanb95b5612015-01-23 20:05:18 -0800376/* Helpers for formatstring */
377
378Py_LOCAL_INLINE(PyObject *)
379getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
380{
381 Py_ssize_t argidx = *p_argidx;
382 if (argidx < arglen) {
383 (*p_argidx)++;
384 if (arglen < 0)
385 return args;
386 else
387 return PyTuple_GetItem(args, argidx);
388 }
389 PyErr_SetString(PyExc_TypeError,
390 "not enough arguments for format string");
391 return NULL;
392}
393
394/* Format codes
395 * F_LJUST '-'
396 * F_SIGN '+'
397 * F_BLANK ' '
398 * F_ALT '#'
399 * F_ZERO '0'
400 */
401#define F_LJUST (1<<0)
402#define F_SIGN (1<<1)
403#define F_BLANK (1<<2)
404#define F_ALT (1<<3)
405#define F_ZERO (1<<4)
406
407/* Returns a new reference to a PyBytes object, or NULL on failure. */
408
409static PyObject *
410formatfloat(PyObject *v, int flags, int prec, int type)
411{
412 char *p;
413 PyObject *result;
414 double x;
415
416 x = PyFloat_AsDouble(v);
417 if (x == -1.0 && PyErr_Occurred()) {
418 PyErr_Format(PyExc_TypeError, "float argument required, "
419 "not %.200s", Py_TYPE(v)->tp_name);
420 return NULL;
421 }
422
423 if (prec < 0)
424 prec = 6;
425
426 p = PyOS_double_to_string(x, type, prec,
427 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
428
429 if (p == NULL)
430 return NULL;
431 result = PyBytes_FromStringAndSize(p, strlen(p));
432 PyMem_Free(p);
433 return result;
434}
435
Ethan Furmanb95b5612015-01-23 20:05:18 -0800436Py_LOCAL_INLINE(int)
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200437byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800438{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200439 if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) {
440 *p = PyBytes_AS_STRING(arg)[0];
441 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800442 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200443 else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) {
444 *p = PyByteArray_AS_STRING(arg)[0];
445 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800446 }
447 else {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200448 long ival = PyLong_AsLong(arg);
449 if (0 <= ival && ival <= 255) {
450 *p = (char)ival;
451 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800452 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800453 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200454 PyErr_SetString(PyExc_TypeError,
455 "%c requires an integer in range(256) or a single byte");
456 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800457}
458
459static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200460format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800461{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200462 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800463 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800464 /* is it a bytes object? */
465 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200466 *pbuf = PyBytes_AS_STRING(v);
467 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800468 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200469 return v;
470 }
471 if (PyByteArray_Check(v)) {
472 *pbuf = PyByteArray_AS_STRING(v);
473 *plen = PyByteArray_GET_SIZE(v);
474 Py_INCREF(v);
475 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800476 }
477 /* does it support __bytes__? */
478 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
479 if (func != NULL) {
480 result = PyObject_CallFunctionObjArgs(func, NULL);
481 Py_DECREF(func);
482 if (result == NULL)
483 return NULL;
484 if (!PyBytes_Check(result)) {
485 PyErr_Format(PyExc_TypeError,
486 "__bytes__ returned non-bytes (type %.200s)",
487 Py_TYPE(result)->tp_name);
488 Py_DECREF(result);
489 return NULL;
490 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200491 *pbuf = PyBytes_AS_STRING(result);
492 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800493 return result;
494 }
495 PyErr_Format(PyExc_TypeError,
496 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
497 Py_TYPE(v)->tp_name);
498 return NULL;
499}
500
501/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
502
503 FORMATBUFLEN is the length of the buffer in which the ints &
504 chars are formatted. XXX This is a magic number. Each formatting
505 routine does bounds checking to ensure no overflow, but a better
506 solution may be to malloc a buffer of appropriate size for each
507 format. For now, the current solution is sufficient.
508*/
509#define FORMATBUFLEN (size_t)120
510
511PyObject *
512_PyBytes_Format(PyObject *format, PyObject *args)
513{
514 char *fmt, *res;
515 Py_ssize_t arglen, argidx;
516 Py_ssize_t reslen, rescnt, fmtcnt;
517 int args_owned = 0;
518 PyObject *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800519 PyObject *dict = NULL;
520 if (format == NULL || !PyBytes_Check(format) || args == NULL) {
521 PyErr_BadInternalCall();
522 return NULL;
523 }
524 fmt = PyBytes_AS_STRING(format);
525 fmtcnt = PyBytes_GET_SIZE(format);
526 reslen = rescnt = fmtcnt + 100;
527 result = PyBytes_FromStringAndSize((char *)NULL, reslen);
528 if (result == NULL)
529 return NULL;
530 res = PyBytes_AsString(result);
531 if (PyTuple_Check(args)) {
532 arglen = PyTuple_GET_SIZE(args);
533 argidx = 0;
534 }
535 else {
536 arglen = -1;
537 argidx = -2;
538 }
539 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
540 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
541 !PyByteArray_Check(args)) {
542 dict = args;
543 }
544 while (--fmtcnt >= 0) {
545 if (*fmt != '%') {
546 if (--rescnt < 0) {
547 rescnt = fmtcnt + 100;
548 reslen += rescnt;
549 if (_PyBytes_Resize(&result, reslen))
550 return NULL;
551 res = PyBytes_AS_STRING(result)
552 + reslen - rescnt;
553 --rescnt;
554 }
555 *res++ = *fmt++;
556 }
557 else {
558 /* Got a format specifier */
559 int flags = 0;
560 Py_ssize_t width = -1;
561 int prec = -1;
562 int c = '\0';
563 int fill;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200564 PyObject *iobj;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800565 PyObject *v = NULL;
566 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200567 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800568 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200569 Py_ssize_t len = 0;
570 char onechar; /* For byte_converter() */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800571
Ethan Furmanb95b5612015-01-23 20:05:18 -0800572 fmt++;
573 if (*fmt == '(') {
574 char *keystart;
575 Py_ssize_t keylen;
576 PyObject *key;
577 int pcount = 1;
578
579 if (dict == NULL) {
580 PyErr_SetString(PyExc_TypeError,
581 "format requires a mapping");
582 goto error;
583 }
584 ++fmt;
585 --fmtcnt;
586 keystart = fmt;
587 /* Skip over balanced parentheses */
588 while (pcount > 0 && --fmtcnt >= 0) {
589 if (*fmt == ')')
590 --pcount;
591 else if (*fmt == '(')
592 ++pcount;
593 fmt++;
594 }
595 keylen = fmt - keystart - 1;
596 if (fmtcnt < 0 || pcount > 0) {
597 PyErr_SetString(PyExc_ValueError,
598 "incomplete format key");
599 goto error;
600 }
601 key = PyBytes_FromStringAndSize(keystart,
602 keylen);
603 if (key == NULL)
604 goto error;
605 if (args_owned) {
606 Py_DECREF(args);
607 args_owned = 0;
608 }
609 args = PyObject_GetItem(dict, key);
610 Py_DECREF(key);
611 if (args == NULL) {
612 goto error;
613 }
614 args_owned = 1;
615 arglen = -1;
616 argidx = -2;
617 }
618 while (--fmtcnt >= 0) {
619 switch (c = *fmt++) {
620 case '-': flags |= F_LJUST; continue;
621 case '+': flags |= F_SIGN; continue;
622 case ' ': flags |= F_BLANK; continue;
623 case '#': flags |= F_ALT; continue;
624 case '0': flags |= F_ZERO; continue;
625 }
626 break;
627 }
628 if (c == '*') {
629 v = getnextarg(args, arglen, &argidx);
630 if (v == NULL)
631 goto error;
632 if (!PyLong_Check(v)) {
633 PyErr_SetString(PyExc_TypeError,
634 "* wants int");
635 goto error;
636 }
637 width = PyLong_AsSsize_t(v);
638 if (width == -1 && PyErr_Occurred())
639 goto error;
640 if (width < 0) {
641 flags |= F_LJUST;
642 width = -width;
643 }
644 if (--fmtcnt >= 0)
645 c = *fmt++;
646 }
647 else if (c >= 0 && isdigit(c)) {
648 width = c - '0';
649 while (--fmtcnt >= 0) {
650 c = Py_CHARMASK(*fmt++);
651 if (!isdigit(c))
652 break;
653 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
654 PyErr_SetString(
655 PyExc_ValueError,
656 "width too big");
657 goto error;
658 }
659 width = width*10 + (c - '0');
660 }
661 }
662 if (c == '.') {
663 prec = 0;
664 if (--fmtcnt >= 0)
665 c = *fmt++;
666 if (c == '*') {
667 v = getnextarg(args, arglen, &argidx);
668 if (v == NULL)
669 goto error;
670 if (!PyLong_Check(v)) {
671 PyErr_SetString(
672 PyExc_TypeError,
673 "* wants int");
674 goto error;
675 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200676 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800677 if (prec == -1 && PyErr_Occurred())
678 goto error;
679 if (prec < 0)
680 prec = 0;
681 if (--fmtcnt >= 0)
682 c = *fmt++;
683 }
684 else if (c >= 0 && isdigit(c)) {
685 prec = c - '0';
686 while (--fmtcnt >= 0) {
687 c = Py_CHARMASK(*fmt++);
688 if (!isdigit(c))
689 break;
690 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
691 PyErr_SetString(
692 PyExc_ValueError,
693 "prec too big");
694 goto error;
695 }
696 prec = prec*10 + (c - '0');
697 }
698 }
699 } /* prec */
700 if (fmtcnt >= 0) {
701 if (c == 'h' || c == 'l' || c == 'L') {
702 if (--fmtcnt >= 0)
703 c = *fmt++;
704 }
705 }
706 if (fmtcnt < 0) {
707 PyErr_SetString(PyExc_ValueError,
708 "incomplete format");
709 goto error;
710 }
711 if (c != '%') {
712 v = getnextarg(args, arglen, &argidx);
713 if (v == NULL)
714 goto error;
715 }
716 sign = 0;
717 fill = ' ';
718 switch (c) {
719 case '%':
720 pbuf = "%";
721 len = 1;
722 break;
Ethan Furman62e977f2015-03-11 08:17:00 -0700723 case 'r':
724 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800725 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200726 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800727 if (temp == NULL)
728 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200729 assert(PyUnicode_IS_ASCII(temp));
730 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
731 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800732 if (prec >= 0 && len > prec)
733 len = prec;
734 break;
735 case 's':
736 // %s is only for 2/3 code; 3 only code should use %b
737 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200738 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800739 if (temp == NULL)
740 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800741 if (prec >= 0 && len > prec)
742 len = prec;
743 break;
744 case 'i':
745 case 'd':
746 case 'u':
747 case 'o':
748 case 'x':
749 case 'X':
750 if (c == 'i')
751 c = 'd';
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200752 iobj = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800753 if (PyNumber_Check(v)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800754 if ((PyLong_Check(v))) {
755 iobj = v;
756 Py_INCREF(iobj);
757 }
758 else {
759 iobj = PyNumber_Long(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200760 if (iobj != NULL && !PyLong_Check(iobj))
761 Py_CLEAR(iobj);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800762 }
763 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200764 if (iobj == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800765 PyErr_Format(PyExc_TypeError,
766 "%%%c format: a number is required, "
767 "not %.200s", c, Py_TYPE(v)->tp_name);
768 goto error;
769 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200770 temp = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, c);
771 Py_DECREF(iobj);
772 if (!temp)
773 goto error;
774 assert(PyUnicode_IS_ASCII(temp));
775 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
776 len = PyUnicode_GET_LENGTH(temp);
777 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800778 if (flags & F_ZERO)
779 fill = '0';
780 break;
781 case 'e':
782 case 'E':
783 case 'f':
784 case 'F':
785 case 'g':
786 case 'G':
787 temp = formatfloat(v, flags, prec, c);
788 if (temp == NULL)
789 goto error;
790 pbuf = PyBytes_AS_STRING(temp);
791 len = PyBytes_GET_SIZE(temp);
792 sign = 1;
793 if (flags & F_ZERO)
794 fill = '0';
795 break;
796 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200797 pbuf = &onechar;
798 len = byte_converter(v, &onechar);
799 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800800 goto error;
801 break;
802 default:
803 PyErr_Format(PyExc_ValueError,
804 "unsupported format character '%c' (0x%x) "
805 "at index %zd",
806 c, c,
807 (Py_ssize_t)(fmt - 1 -
808 PyBytes_AsString(format)));
809 goto error;
810 }
811 if (sign) {
812 if (*pbuf == '-' || *pbuf == '+') {
813 sign = *pbuf++;
814 len--;
815 }
816 else if (flags & F_SIGN)
817 sign = '+';
818 else if (flags & F_BLANK)
819 sign = ' ';
820 else
821 sign = 0;
822 }
823 if (width < len)
824 width = len;
825 if (rescnt - (sign != 0) < width) {
826 reslen -= rescnt;
827 rescnt = width + fmtcnt + 100;
828 reslen += rescnt;
829 if (reslen < 0) {
830 Py_DECREF(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800831 Py_XDECREF(temp);
832 return PyErr_NoMemory();
833 }
834 if (_PyBytes_Resize(&result, reslen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800835 Py_XDECREF(temp);
836 return NULL;
837 }
838 res = PyBytes_AS_STRING(result)
839 + reslen - rescnt;
840 }
841 if (sign) {
842 if (fill != ' ')
843 *res++ = sign;
844 rescnt--;
845 if (width > len)
846 width--;
847 }
848 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
849 assert(pbuf[0] == '0');
850 assert(pbuf[1] == c);
851 if (fill != ' ') {
852 *res++ = *pbuf++;
853 *res++ = *pbuf++;
854 }
855 rescnt -= 2;
856 width -= 2;
857 if (width < 0)
858 width = 0;
859 len -= 2;
860 }
861 if (width > len && !(flags & F_LJUST)) {
862 do {
863 --rescnt;
864 *res++ = fill;
865 } while (--width > len);
866 }
867 if (fill == ' ') {
868 if (sign)
869 *res++ = sign;
870 if ((flags & F_ALT) &&
871 (c == 'x' || c == 'X')) {
872 assert(pbuf[0] == '0');
873 assert(pbuf[1] == c);
874 *res++ = *pbuf++;
875 *res++ = *pbuf++;
876 }
877 }
878 Py_MEMCPY(res, pbuf, len);
879 res += len;
880 rescnt -= len;
881 while (--width >= len) {
882 --rescnt;
883 *res++ = ' ';
884 }
885 if (dict && (argidx < arglen) && c != '%') {
886 PyErr_SetString(PyExc_TypeError,
887 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -0800888 Py_XDECREF(temp);
889 goto error;
890 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800891 Py_XDECREF(temp);
892 } /* '%' */
893 } /* until end */
894 if (argidx < arglen && !dict) {
895 PyErr_SetString(PyExc_TypeError,
896 "not all arguments converted during bytes formatting");
897 goto error;
898 }
899 if (args_owned) {
900 Py_DECREF(args);
901 }
902 if (_PyBytes_Resize(&result, reslen - rescnt))
903 return NULL;
904 return result;
905
906 error:
907 Py_DECREF(result);
908 if (args_owned) {
909 Py_DECREF(args);
910 }
911 return NULL;
912}
913
914/* =-= */
915
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000916static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000917bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000918{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000919 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000920}
921
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000922/* Unescape a backslash-escaped string. If unicode is non-zero,
923 the string is a u-literal. If recode_encoding is non-zero,
924 the string is UTF-8 encoded and should be re-encoded in the
925 specified encoding. */
926
927PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000928 Py_ssize_t len,
929 const char *errors,
930 Py_ssize_t unicode,
931 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000932{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000933 int c;
934 char *p, *buf;
935 const char *end;
936 PyObject *v;
937 Py_ssize_t newlen = recode_encoding ? 4*len:len;
938 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
939 if (v == NULL)
940 return NULL;
941 p = buf = PyBytes_AsString(v);
942 end = s + len;
943 while (s < end) {
944 if (*s != '\\') {
945 non_esc:
946 if (recode_encoding && (*s & 0x80)) {
947 PyObject *u, *w;
948 char *r;
949 const char* t;
950 Py_ssize_t rn;
951 t = s;
952 /* Decode non-ASCII bytes as UTF-8. */
953 while (t < end && (*t & 0x80)) t++;
954 u = PyUnicode_DecodeUTF8(s, t - s, errors);
955 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000956
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000957 /* Recode them in target encoding. */
958 w = PyUnicode_AsEncodedString(
959 u, recode_encoding, errors);
960 Py_DECREF(u);
961 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000962
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000963 /* Append bytes to output buffer. */
964 assert(PyBytes_Check(w));
965 r = PyBytes_AS_STRING(w);
966 rn = PyBytes_GET_SIZE(w);
967 Py_MEMCPY(p, r, rn);
968 p += rn;
969 Py_DECREF(w);
970 s = t;
971 } else {
972 *p++ = *s++;
973 }
974 continue;
975 }
976 s++;
977 if (s==end) {
978 PyErr_SetString(PyExc_ValueError,
979 "Trailing \\ in string");
980 goto failed;
981 }
982 switch (*s++) {
983 /* XXX This assumes ASCII! */
984 case '\n': break;
985 case '\\': *p++ = '\\'; break;
986 case '\'': *p++ = '\''; break;
987 case '\"': *p++ = '\"'; break;
988 case 'b': *p++ = '\b'; break;
989 case 'f': *p++ = '\014'; break; /* FF */
990 case 't': *p++ = '\t'; break;
991 case 'n': *p++ = '\n'; break;
992 case 'r': *p++ = '\r'; break;
993 case 'v': *p++ = '\013'; break; /* VT */
994 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
995 case '0': case '1': case '2': case '3':
996 case '4': case '5': case '6': case '7':
997 c = s[-1] - '0';
998 if (s < end && '0' <= *s && *s <= '7') {
999 c = (c<<3) + *s++ - '0';
1000 if (s < end && '0' <= *s && *s <= '7')
1001 c = (c<<3) + *s++ - '0';
1002 }
1003 *p++ = c;
1004 break;
1005 case 'x':
David Malcolm96960882010-11-05 17:23:41 +00001006 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001007 unsigned int x = 0;
1008 c = Py_CHARMASK(*s);
1009 s++;
David Malcolm96960882010-11-05 17:23:41 +00001010 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001011 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001012 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001013 x = 10 + c - 'a';
1014 else
1015 x = 10 + c - 'A';
1016 x = x << 4;
1017 c = Py_CHARMASK(*s);
1018 s++;
David Malcolm96960882010-11-05 17:23:41 +00001019 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001021 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001022 x += 10 + c - 'a';
1023 else
1024 x += 10 + c - 'A';
1025 *p++ = x;
1026 break;
1027 }
1028 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001029 PyErr_Format(PyExc_ValueError,
1030 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001031 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001032 goto failed;
1033 }
1034 if (strcmp(errors, "replace") == 0) {
1035 *p++ = '?';
1036 } else if (strcmp(errors, "ignore") == 0)
1037 /* do nothing */;
1038 else {
1039 PyErr_Format(PyExc_ValueError,
1040 "decoding error; unknown "
1041 "error handling code: %.400s",
1042 errors);
1043 goto failed;
1044 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001045 /* skip \x */
1046 if (s < end && Py_ISXDIGIT(s[0]))
1047 s++; /* and a hexdigit */
1048 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001049 default:
1050 *p++ = '\\';
1051 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001052 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001053 UTF-8 bytes may follow. */
1054 }
1055 }
1056 if (p-buf < newlen)
1057 _PyBytes_Resize(&v, p - buf);
1058 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001059 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001060 Py_DECREF(v);
1061 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001062}
1063
1064/* -------------------------------------------------------------------- */
1065/* object api */
1066
1067Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001068PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001069{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001070 if (!PyBytes_Check(op)) {
1071 PyErr_Format(PyExc_TypeError,
1072 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1073 return -1;
1074 }
1075 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001076}
1077
1078char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001079PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001080{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001081 if (!PyBytes_Check(op)) {
1082 PyErr_Format(PyExc_TypeError,
1083 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1084 return NULL;
1085 }
1086 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001087}
1088
1089int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001090PyBytes_AsStringAndSize(PyObject *obj,
1091 char **s,
1092 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001093{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001094 if (s == NULL) {
1095 PyErr_BadInternalCall();
1096 return -1;
1097 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001098
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001099 if (!PyBytes_Check(obj)) {
1100 PyErr_Format(PyExc_TypeError,
1101 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1102 return -1;
1103 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001104
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001105 *s = PyBytes_AS_STRING(obj);
1106 if (len != NULL)
1107 *len = PyBytes_GET_SIZE(obj);
1108 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001109 PyErr_SetString(PyExc_ValueError,
1110 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001111 return -1;
1112 }
1113 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001114}
Neal Norwitz6968b052007-02-27 19:02:19 +00001115
1116/* -------------------------------------------------------------------- */
1117/* Methods */
1118
Eric Smith0923d1d2009-04-16 20:16:10 +00001119#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001120
1121#include "stringlib/fastsearch.h"
1122#include "stringlib/count.h"
1123#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001124#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001125#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001126#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001127#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001128
Eric Smith0f78bff2009-11-30 01:01:42 +00001129#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001130
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001131PyObject *
1132PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001133{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001134 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001135 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001136 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001137 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001138 unsigned char quote, *s, *p;
1139
1140 /* Compute size of output string */
1141 squotes = dquotes = 0;
1142 newsize = 3; /* b'' */
1143 s = (unsigned char*)op->ob_sval;
1144 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001145 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001146 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001147 case '\'': squotes++; break;
1148 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001149 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001150 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001151 default:
1152 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001153 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001154 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001155 if (newsize > PY_SSIZE_T_MAX - incr)
1156 goto overflow;
1157 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001158 }
1159 quote = '\'';
1160 if (smartquotes && squotes && !dquotes)
1161 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001162 if (squotes && quote == '\'') {
1163 if (newsize > PY_SSIZE_T_MAX - squotes)
1164 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001165 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001166 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001167
1168 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001169 if (v == NULL) {
1170 return NULL;
1171 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001172 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001173
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001174 *p++ = 'b', *p++ = quote;
1175 for (i = 0; i < length; i++) {
1176 unsigned char c = op->ob_sval[i];
1177 if (c == quote || c == '\\')
1178 *p++ = '\\', *p++ = c;
1179 else if (c == '\t')
1180 *p++ = '\\', *p++ = 't';
1181 else if (c == '\n')
1182 *p++ = '\\', *p++ = 'n';
1183 else if (c == '\r')
1184 *p++ = '\\', *p++ = 'r';
1185 else if (c < ' ' || c >= 0x7f) {
1186 *p++ = '\\';
1187 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001188 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1189 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001190 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001191 else
1192 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001193 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001194 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001195 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001196 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001197
1198 overflow:
1199 PyErr_SetString(PyExc_OverflowError,
1200 "bytes object is too large to make repr");
1201 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001202}
1203
Neal Norwitz6968b052007-02-27 19:02:19 +00001204static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001205bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001206{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001207 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001208}
1209
Neal Norwitz6968b052007-02-27 19:02:19 +00001210static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001211bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001212{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001213 if (Py_BytesWarningFlag) {
1214 if (PyErr_WarnEx(PyExc_BytesWarning,
1215 "str() on a bytes instance", 1))
1216 return NULL;
1217 }
1218 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001219}
1220
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001221static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001222bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001223{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001224 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001225}
Neal Norwitz6968b052007-02-27 19:02:19 +00001226
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001227/* This is also used by PyBytes_Concat() */
1228static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001229bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001230{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001231 Py_ssize_t size;
1232 Py_buffer va, vb;
1233 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001235 va.len = -1;
1236 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001237 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1238 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001239 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1240 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1241 goto done;
1242 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001243
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001244 /* Optimize end cases */
1245 if (va.len == 0 && PyBytes_CheckExact(b)) {
1246 result = b;
1247 Py_INCREF(result);
1248 goto done;
1249 }
1250 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1251 result = a;
1252 Py_INCREF(result);
1253 goto done;
1254 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001255
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001256 size = va.len + vb.len;
1257 if (size < 0) {
1258 PyErr_NoMemory();
1259 goto done;
1260 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001261
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001262 result = PyBytes_FromStringAndSize(NULL, size);
1263 if (result != NULL) {
1264 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1265 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1266 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001267
1268 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001269 if (va.len != -1)
1270 PyBuffer_Release(&va);
1271 if (vb.len != -1)
1272 PyBuffer_Release(&vb);
1273 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001274}
Neal Norwitz6968b052007-02-27 19:02:19 +00001275
1276static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001277bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001278{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001279 Py_ssize_t i;
1280 Py_ssize_t j;
1281 Py_ssize_t size;
1282 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001283 size_t nbytes;
1284 if (n < 0)
1285 n = 0;
1286 /* watch out for overflows: the size can overflow int,
1287 * and the # of bytes needed can overflow size_t
1288 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001289 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001290 PyErr_SetString(PyExc_OverflowError,
1291 "repeated bytes are too long");
1292 return NULL;
1293 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001294 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001295 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1296 Py_INCREF(a);
1297 return (PyObject *)a;
1298 }
1299 nbytes = (size_t)size;
1300 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1301 PyErr_SetString(PyExc_OverflowError,
1302 "repeated bytes are too long");
1303 return NULL;
1304 }
1305 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1306 if (op == NULL)
1307 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001308 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001309 op->ob_shash = -1;
1310 op->ob_sval[size] = '\0';
1311 if (Py_SIZE(a) == 1 && n > 0) {
1312 memset(op->ob_sval, a->ob_sval[0] , n);
1313 return (PyObject *) op;
1314 }
1315 i = 0;
1316 if (i < size) {
1317 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1318 i = Py_SIZE(a);
1319 }
1320 while (i < size) {
1321 j = (i <= size-i) ? i : size-i;
1322 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1323 i += j;
1324 }
1325 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001326}
1327
Guido van Rossum98297ee2007-11-06 21:34:58 +00001328static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001329bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001330{
1331 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1332 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001333 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +00001334 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +00001335 PyErr_Clear();
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001336 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
Antoine Pitroud1188562010-06-09 16:38:55 +00001337 return -1;
1338 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
1339 varg.buf, varg.len, 0);
1340 PyBuffer_Release(&varg);
1341 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001342 }
1343 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001344 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1345 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001346 }
1347
Antoine Pitrou0010d372010-08-15 17:12:55 +00001348 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001349}
1350
Neal Norwitz6968b052007-02-27 19:02:19 +00001351static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001352bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001353{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001354 if (i < 0 || i >= Py_SIZE(a)) {
1355 PyErr_SetString(PyExc_IndexError, "index out of range");
1356 return NULL;
1357 }
1358 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001359}
1360
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001361Py_LOCAL(int)
1362bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1363{
1364 int cmp;
1365 Py_ssize_t len;
1366
1367 len = Py_SIZE(a);
1368 if (Py_SIZE(b) != len)
1369 return 0;
1370
1371 if (a->ob_sval[0] != b->ob_sval[0])
1372 return 0;
1373
1374 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1375 return (cmp == 0);
1376}
1377
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001378static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001379bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001380{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001381 int c;
1382 Py_ssize_t len_a, len_b;
1383 Py_ssize_t min_len;
1384 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001385
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001386 /* Make sure both arguments are strings. */
1387 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001388 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
1389 if (PyObject_IsInstance((PyObject*)a,
1390 (PyObject*)&PyUnicode_Type) ||
1391 PyObject_IsInstance((PyObject*)b,
1392 (PyObject*)&PyUnicode_Type)) {
1393 if (PyErr_WarnEx(PyExc_BytesWarning,
1394 "Comparison between bytes and string", 1))
1395 return NULL;
1396 }
1397 else if (PyObject_IsInstance((PyObject*)a,
1398 (PyObject*)&PyLong_Type) ||
1399 PyObject_IsInstance((PyObject*)b,
1400 (PyObject*)&PyLong_Type)) {
1401 if (PyErr_WarnEx(PyExc_BytesWarning,
1402 "Comparison between bytes and int", 1))
1403 return NULL;
1404 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001405 }
1406 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001407 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001408 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001409 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001410 case Py_EQ:
1411 case Py_LE:
1412 case Py_GE:
1413 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001414 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001415 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001416 case Py_NE:
1417 case Py_LT:
1418 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001419 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001420 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001421 default:
1422 PyErr_BadArgument();
1423 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001424 }
1425 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001426 else if (op == Py_EQ || op == Py_NE) {
1427 int eq = bytes_compare_eq(a, b);
1428 eq ^= (op == Py_NE);
1429 result = eq ? Py_True : Py_False;
1430 }
1431 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001432 len_a = Py_SIZE(a);
1433 len_b = Py_SIZE(b);
1434 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001435 if (min_len > 0) {
1436 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001437 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001438 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001439 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001440 else
1441 c = 0;
1442 if (c == 0)
1443 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1444 switch (op) {
1445 case Py_LT: c = c < 0; break;
1446 case Py_LE: c = c <= 0; break;
1447 case Py_GT: c = c > 0; break;
1448 case Py_GE: c = c >= 0; break;
1449 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001450 PyErr_BadArgument();
1451 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001452 }
1453 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001454 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001455
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001456 Py_INCREF(result);
1457 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001458}
1459
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001460static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001461bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001462{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001463 if (a->ob_shash == -1) {
1464 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001465 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001466 }
1467 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001468}
1469
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001470static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001471bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001472{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001473 if (PyIndex_Check(item)) {
1474 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1475 if (i == -1 && PyErr_Occurred())
1476 return NULL;
1477 if (i < 0)
1478 i += PyBytes_GET_SIZE(self);
1479 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1480 PyErr_SetString(PyExc_IndexError,
1481 "index out of range");
1482 return NULL;
1483 }
1484 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1485 }
1486 else if (PySlice_Check(item)) {
1487 Py_ssize_t start, stop, step, slicelength, cur, i;
1488 char* source_buf;
1489 char* result_buf;
1490 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001491
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001492 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001493 PyBytes_GET_SIZE(self),
1494 &start, &stop, &step, &slicelength) < 0) {
1495 return NULL;
1496 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001497
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001498 if (slicelength <= 0) {
1499 return PyBytes_FromStringAndSize("", 0);
1500 }
1501 else if (start == 0 && step == 1 &&
1502 slicelength == PyBytes_GET_SIZE(self) &&
1503 PyBytes_CheckExact(self)) {
1504 Py_INCREF(self);
1505 return (PyObject *)self;
1506 }
1507 else if (step == 1) {
1508 return PyBytes_FromStringAndSize(
1509 PyBytes_AS_STRING(self) + start,
1510 slicelength);
1511 }
1512 else {
1513 source_buf = PyBytes_AS_STRING(self);
1514 result = PyBytes_FromStringAndSize(NULL, slicelength);
1515 if (result == NULL)
1516 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001517
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001518 result_buf = PyBytes_AS_STRING(result);
1519 for (cur = start, i = 0; i < slicelength;
1520 cur += step, i++) {
1521 result_buf[i] = source_buf[cur];
1522 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001523
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001524 return result;
1525 }
1526 }
1527 else {
1528 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001529 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001530 Py_TYPE(item)->tp_name);
1531 return NULL;
1532 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001533}
1534
1535static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001536bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001537{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001538 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1539 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001540}
1541
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001542static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001543 (lenfunc)bytes_length, /*sq_length*/
1544 (binaryfunc)bytes_concat, /*sq_concat*/
1545 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1546 (ssizeargfunc)bytes_item, /*sq_item*/
1547 0, /*sq_slice*/
1548 0, /*sq_ass_item*/
1549 0, /*sq_ass_slice*/
1550 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001551};
1552
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001553static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001554 (lenfunc)bytes_length,
1555 (binaryfunc)bytes_subscript,
1556 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001557};
1558
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001559static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001560 (getbufferproc)bytes_buffer_getbuffer,
1561 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001562};
1563
1564
1565#define LEFTSTRIP 0
1566#define RIGHTSTRIP 1
1567#define BOTHSTRIP 2
1568
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001569/*[clinic input]
1570bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001571
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001572 sep: object = None
1573 The delimiter according which to split the bytes.
1574 None (the default value) means split on ASCII whitespace characters
1575 (space, tab, return, newline, formfeed, vertical tab).
1576 maxsplit: Py_ssize_t = -1
1577 Maximum number of splits to do.
1578 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001579
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001580Return a list of the sections in the bytes, using sep as the delimiter.
1581[clinic start generated code]*/
1582
1583PyDoc_STRVAR(bytes_split__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001584"split($self, /, sep=None, maxsplit=-1)\n"
1585"--\n"
1586"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001587"Return a list of the sections in the bytes, using sep as the delimiter.\n"
1588"\n"
1589" sep\n"
1590" The delimiter according which to split the bytes.\n"
1591" None (the default value) means split on ASCII whitespace characters\n"
1592" (space, tab, return, newline, formfeed, vertical tab).\n"
1593" maxsplit\n"
1594" Maximum number of splits to do.\n"
1595" -1 (the default value) means no limit.");
1596
1597#define BYTES_SPLIT_METHODDEF \
1598 {"split", (PyCFunction)bytes_split, METH_VARARGS|METH_KEYWORDS, bytes_split__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001599
1600static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001601bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001602
1603static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001604bytes_split(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Neal Norwitz6968b052007-02-27 19:02:19 +00001605{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001606 PyObject *return_value = NULL;
1607 static char *_keywords[] = {"sep", "maxsplit", NULL};
1608 PyObject *sep = Py_None;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001609 Py_ssize_t maxsplit = -1;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001610
1611 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
1612 "|On:split", _keywords,
1613 &sep, &maxsplit))
1614 goto exit;
1615 return_value = bytes_split_impl(self, sep, maxsplit);
1616
1617exit:
1618 return return_value;
1619}
1620
1621static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001622bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
1623/*[clinic end generated code: output=c80a47afdd505975 input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001624{
1625 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001626 const char *s = PyBytes_AS_STRING(self), *sub;
1627 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001628 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001629
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001630 if (maxsplit < 0)
1631 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001632 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001633 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001634 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001635 return NULL;
1636 sub = vsub.buf;
1637 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001638
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001639 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1640 PyBuffer_Release(&vsub);
1641 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001642}
1643
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001644/*[clinic input]
1645bytes.partition
1646
1647 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001648 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001649 /
1650
1651Partition the bytes into three parts using the given separator.
1652
1653This will search for the separator sep in the bytes. If the separator is found,
1654returns a 3-tuple containing the part before the separator, the separator
1655itself, and the part after it.
1656
1657If the separator is not found, returns a 3-tuple containing the original bytes
1658object and two empty bytes objects.
1659[clinic start generated code]*/
1660
1661PyDoc_STRVAR(bytes_partition__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001662"partition($self, sep, /)\n"
1663"--\n"
1664"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001665"Partition the bytes into three parts using the given separator.\n"
1666"\n"
1667"This will search for the separator sep in the bytes. If the separator is found,\n"
1668"returns a 3-tuple containing the part before the separator, the separator\n"
1669"itself, and the part after it.\n"
1670"\n"
1671"If the separator is not found, returns a 3-tuple containing the original bytes\n"
1672"object and two empty bytes objects.");
1673
1674#define BYTES_PARTITION_METHODDEF \
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001675 {"partition", (PyCFunction)bytes_partition, METH_VARARGS, bytes_partition__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001676
1677static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001678bytes_partition_impl(PyBytesObject *self, Py_buffer *sep);
1679
1680static PyObject *
1681bytes_partition(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001682{
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001683 PyObject *return_value = NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001684 Py_buffer sep = {NULL, NULL};
Neal Norwitz6968b052007-02-27 19:02:19 +00001685
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001686 if (!PyArg_ParseTuple(args,
1687 "y*:partition",
1688 &sep))
1689 goto exit;
1690 return_value = bytes_partition_impl(self, &sep);
Neal Norwitz6968b052007-02-27 19:02:19 +00001691
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001692exit:
1693 /* Cleanup for sep */
1694 if (sep.obj)
1695 PyBuffer_Release(&sep);
1696
1697 return return_value;
Neal Norwitz6968b052007-02-27 19:02:19 +00001698}
1699
Neal Norwitz6968b052007-02-27 19:02:19 +00001700static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001701bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1702/*[clinic end generated code: output=3006727cfbf83aa4 input=bc855dc63ca949de]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001703{
Neal Norwitz6968b052007-02-27 19:02:19 +00001704 return stringlib_partition(
1705 (PyObject*) self,
1706 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001707 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001708 );
1709}
1710
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001711/*[clinic input]
1712bytes.rpartition
1713
1714 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001715 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001716 /
1717
1718Partition the bytes into three parts using the given separator.
1719
1720This will search for the separator sep in the bytes, starting and the end. If
1721the separator is found, returns a 3-tuple containing the part before the
1722separator, the separator itself, and the part after it.
1723
1724If the separator is not found, returns a 3-tuple containing two empty bytes
1725objects and the original bytes object.
1726[clinic start generated code]*/
1727
1728PyDoc_STRVAR(bytes_rpartition__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001729"rpartition($self, sep, /)\n"
1730"--\n"
1731"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001732"Partition the bytes into three parts using the given separator.\n"
1733"\n"
1734"This will search for the separator sep in the bytes, starting and the end. If\n"
1735"the separator is found, returns a 3-tuple containing the part before the\n"
1736"separator, the separator itself, and the part after it.\n"
1737"\n"
1738"If the separator is not found, returns a 3-tuple containing two empty bytes\n"
1739"objects and the original bytes object.");
1740
1741#define BYTES_RPARTITION_METHODDEF \
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001742 {"rpartition", (PyCFunction)bytes_rpartition, METH_VARARGS, bytes_rpartition__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001743
1744static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001745bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep);
1746
1747static PyObject *
1748bytes_rpartition(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001749{
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001750 PyObject *return_value = NULL;
1751 Py_buffer sep = {NULL, NULL};
Neal Norwitz6968b052007-02-27 19:02:19 +00001752
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001753 if (!PyArg_ParseTuple(args,
1754 "y*:rpartition",
1755 &sep))
1756 goto exit;
1757 return_value = bytes_rpartition_impl(self, &sep);
Neal Norwitz6968b052007-02-27 19:02:19 +00001758
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001759exit:
1760 /* Cleanup for sep */
1761 if (sep.obj)
1762 PyBuffer_Release(&sep);
1763
1764 return return_value;
1765}
1766
1767static PyObject *
1768bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1769/*[clinic end generated code: output=57b169dc47fa90e8 input=6588fff262a9170e]*/
1770{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001771 return stringlib_rpartition(
1772 (PyObject*) self,
1773 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001774 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001775 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001776}
1777
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001778/*[clinic input]
1779bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001780
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001781Return a list of the sections in the bytes, using sep as the delimiter.
1782
1783Splitting is done starting at the end of the bytes and working to the front.
1784[clinic start generated code]*/
1785
1786PyDoc_STRVAR(bytes_rsplit__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001787"rsplit($self, /, sep=None, maxsplit=-1)\n"
1788"--\n"
1789"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001790"Return a list of the sections in the bytes, using sep as the delimiter.\n"
1791"\n"
1792" sep\n"
1793" The delimiter according which to split the bytes.\n"
1794" None (the default value) means split on ASCII whitespace characters\n"
1795" (space, tab, return, newline, formfeed, vertical tab).\n"
1796" maxsplit\n"
1797" Maximum number of splits to do.\n"
1798" -1 (the default value) means no limit.\n"
1799"\n"
1800"Splitting is done starting at the end of the bytes and working to the front.");
1801
1802#define BYTES_RSPLIT_METHODDEF \
1803 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS|METH_KEYWORDS, bytes_rsplit__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001804
Neal Norwitz6968b052007-02-27 19:02:19 +00001805static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001806bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001807
1808static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001809bytes_rsplit(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Neal Norwitz6968b052007-02-27 19:02:19 +00001810{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001811 PyObject *return_value = NULL;
1812 static char *_keywords[] = {"sep", "maxsplit", NULL};
1813 PyObject *sep = Py_None;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001814 Py_ssize_t maxsplit = -1;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001815
1816 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
1817 "|On:rsplit", _keywords,
1818 &sep, &maxsplit))
1819 goto exit;
1820 return_value = bytes_rsplit_impl(self, sep, maxsplit);
1821
1822exit:
1823 return return_value;
1824}
1825
1826static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001827bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
1828/*[clinic end generated code: output=f86feddedbd7b26d input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001829{
1830 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001831 const char *s = PyBytes_AS_STRING(self), *sub;
1832 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001833 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001834
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001835 if (maxsplit < 0)
1836 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001837 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001838 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001839 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001840 return NULL;
1841 sub = vsub.buf;
1842 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001843
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001844 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1845 PyBuffer_Release(&vsub);
1846 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001847}
1848
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001849
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001850/*[clinic input]
1851bytes.join
1852
1853 iterable_of_bytes: object
1854 /
1855
1856Concatenate any number of bytes objects.
1857
1858The bytes whose method is called is inserted in between each pair.
1859
1860The result is returned as a new bytes object.
1861
1862Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1863[clinic start generated code]*/
1864
1865PyDoc_STRVAR(bytes_join__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001866"join($self, iterable_of_bytes, /)\n"
1867"--\n"
1868"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001869"Concatenate any number of bytes objects.\n"
1870"\n"
1871"The bytes whose method is called is inserted in between each pair.\n"
1872"\n"
1873"The result is returned as a new bytes object.\n"
1874"\n"
1875"Example: b\'.\'.join([b\'ab\', b\'pq\', b\'rs\']) -> b\'ab.pq.rs\'.");
1876
1877#define BYTES_JOIN_METHODDEF \
1878 {"join", (PyCFunction)bytes_join, METH_O, bytes_join__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001879
Neal Norwitz6968b052007-02-27 19:02:19 +00001880static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001881bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
1882/*[clinic end generated code: output=e541a14a8da97908 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001883{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001884 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001885}
1886
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001887PyObject *
1888_PyBytes_Join(PyObject *sep, PyObject *x)
1889{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001890 assert(sep != NULL && PyBytes_Check(sep));
1891 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001892 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001893}
1894
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001895/* helper macro to fixup start/end slice values */
1896#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001897 if (end > len) \
1898 end = len; \
1899 else if (end < 0) { \
1900 end += len; \
1901 if (end < 0) \
1902 end = 0; \
1903 } \
1904 if (start < 0) { \
1905 start += len; \
1906 if (start < 0) \
1907 start = 0; \
1908 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001909
1910Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001911bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001912{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001913 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001914 char byte;
1915 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001916 const char *sub;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001917 Py_ssize_t len, sub_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001918 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001919 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001920
Antoine Pitrouac65d962011-10-20 23:54:17 +02001921 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1922 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001923 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001924
Antoine Pitrouac65d962011-10-20 23:54:17 +02001925 if (subobj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001926 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001927 return -2;
1928
1929 sub = subbuf.buf;
1930 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001931 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001932 else {
1933 sub = &byte;
1934 sub_len = 1;
1935 }
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001936 len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001937
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001938 ADJUST_INDICES(start, end, len);
1939 if (end - start < sub_len)
1940 res = -1;
Victor Stinnerdabbfe72015-03-25 03:16:32 +01001941 /* Issue #23573: FIXME, windows has no memrchr() */
1942 else if (sub_len == 1 && dir > 0) {
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001943 unsigned char needle = *sub;
1944 int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
1945 res = stringlib_fastsearch_memchr_1char(
1946 PyBytes_AS_STRING(self) + start, end - start,
1947 needle, needle, mode);
1948 if (res >= 0)
1949 res += start;
1950 }
1951 else {
1952 if (dir > 0)
1953 res = stringlib_find_slice(
1954 PyBytes_AS_STRING(self), len,
1955 sub, sub_len, start, end);
1956 else
1957 res = stringlib_rfind_slice(
1958 PyBytes_AS_STRING(self), len,
1959 sub, sub_len, start, end);
1960 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001961
1962 if (subobj)
1963 PyBuffer_Release(&subbuf);
1964
1965 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001966}
1967
1968
1969PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001970"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001971\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001972Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001973such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001974arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001975\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001976Return -1 on failure.");
1977
Neal Norwitz6968b052007-02-27 19:02:19 +00001978static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001979bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001980{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001981 Py_ssize_t result = bytes_find_internal(self, args, +1);
1982 if (result == -2)
1983 return NULL;
1984 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001985}
1986
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001987
1988PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001989"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001990\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001991Like B.find() but raise ValueError when the substring is not found.");
1992
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001993static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001994bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001995{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001996 Py_ssize_t result = bytes_find_internal(self, args, +1);
1997 if (result == -2)
1998 return NULL;
1999 if (result == -1) {
2000 PyErr_SetString(PyExc_ValueError,
2001 "substring not found");
2002 return NULL;
2003 }
2004 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00002005}
2006
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002007
2008PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002009"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002010\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002011Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08002012such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002013arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002014\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002015Return -1 on failure.");
2016
Neal Norwitz6968b052007-02-27 19:02:19 +00002017static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002018bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00002019{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002020 Py_ssize_t result = bytes_find_internal(self, args, -1);
2021 if (result == -2)
2022 return NULL;
2023 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00002024}
2025
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002026
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002027PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002028"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002029\n\
2030Like B.rfind() but raise ValueError when the substring is not found.");
2031
2032static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002033bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002034{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002035 Py_ssize_t result = bytes_find_internal(self, args, -1);
2036 if (result == -2)
2037 return NULL;
2038 if (result == -1) {
2039 PyErr_SetString(PyExc_ValueError,
2040 "substring not found");
2041 return NULL;
2042 }
2043 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002044}
2045
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002046
2047Py_LOCAL_INLINE(PyObject *)
2048do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002049{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002050 Py_buffer vsep;
2051 char *s = PyBytes_AS_STRING(self);
2052 Py_ssize_t len = PyBytes_GET_SIZE(self);
2053 char *sep;
2054 Py_ssize_t seplen;
2055 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002056
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002057 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002058 return NULL;
2059 sep = vsep.buf;
2060 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002061
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002062 i = 0;
2063 if (striptype != RIGHTSTRIP) {
2064 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2065 i++;
2066 }
2067 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002068
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002069 j = len;
2070 if (striptype != LEFTSTRIP) {
2071 do {
2072 j--;
2073 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2074 j++;
2075 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002077 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002079 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2080 Py_INCREF(self);
2081 return (PyObject*)self;
2082 }
2083 else
2084 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002085}
2086
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002087
2088Py_LOCAL_INLINE(PyObject *)
2089do_strip(PyBytesObject *self, int striptype)
2090{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002091 char *s = PyBytes_AS_STRING(self);
2092 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002093
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002094 i = 0;
2095 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00002096 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002097 i++;
2098 }
2099 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002100
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002101 j = len;
2102 if (striptype != LEFTSTRIP) {
2103 do {
2104 j--;
David Malcolm96960882010-11-05 17:23:41 +00002105 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002106 j++;
2107 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002108
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002109 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2110 Py_INCREF(self);
2111 return (PyObject*)self;
2112 }
2113 else
2114 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002115}
2116
2117
2118Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002119do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002120{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002121 if (bytes != NULL && bytes != Py_None) {
2122 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002123 }
2124 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002125}
2126
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002127/*[clinic input]
2128bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002129
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002130 self: self(type="PyBytesObject *")
2131 bytes: object = None
2132 /
2133
2134Strip leading and trailing bytes contained in the argument.
2135
2136If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2137[clinic start generated code]*/
2138
2139PyDoc_STRVAR(bytes_strip__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002140"strip($self, bytes=None, /)\n"
2141"--\n"
2142"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002143"Strip leading and trailing bytes contained in the argument.\n"
2144"\n"
2145"If the argument is omitted or None, strip leading and trailing ASCII whitespace.");
2146
2147#define BYTES_STRIP_METHODDEF \
2148 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, bytes_strip__doc__},
2149
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002150static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002151bytes_strip_impl(PyBytesObject *self, PyObject *bytes);
2152
2153static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002154bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002155{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002156 PyObject *return_value = NULL;
2157 PyObject *bytes = Py_None;
2158
2159 if (!PyArg_UnpackTuple(args, "strip",
2160 0, 1,
2161 &bytes))
2162 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02002163 return_value = bytes_strip_impl(self, bytes);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002164
2165exit:
2166 return return_value;
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002167}
2168
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002169static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002170bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Martin v. Löwis0efea322014-07-27 17:29:17 +02002171/*[clinic end generated code: output=c8234a599ba5ec35 input=37daa5fad1395d95]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002172{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002173 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002174}
2175
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002176/*[clinic input]
2177bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002178
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002179 self: self(type="PyBytesObject *")
2180 bytes: object = None
2181 /
2182
2183Strip leading bytes contained in the argument.
2184
2185If the argument is omitted or None, strip leading ASCII whitespace.
2186[clinic start generated code]*/
2187
2188PyDoc_STRVAR(bytes_lstrip__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002189"lstrip($self, bytes=None, /)\n"
2190"--\n"
2191"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002192"Strip leading bytes contained in the argument.\n"
2193"\n"
2194"If the argument is omitted or None, strip leading ASCII whitespace.");
2195
2196#define BYTES_LSTRIP_METHODDEF \
2197 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, bytes_lstrip__doc__},
2198
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002199static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002200bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes);
2201
2202static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002203bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002204{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002205 PyObject *return_value = NULL;
2206 PyObject *bytes = Py_None;
2207
2208 if (!PyArg_UnpackTuple(args, "lstrip",
2209 0, 1,
2210 &bytes))
2211 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02002212 return_value = bytes_lstrip_impl(self, bytes);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002213
2214exit:
2215 return return_value;
2216}
2217
2218static PyObject *
2219bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Martin v. Löwis0efea322014-07-27 17:29:17 +02002220/*[clinic end generated code: output=529e8511ab6f1115 input=88811b09dfbc2988]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002221{
2222 return do_argstrip(self, LEFTSTRIP, bytes);
2223}
2224
2225/*[clinic input]
2226bytes.rstrip
2227
2228 self: self(type="PyBytesObject *")
2229 bytes: object = None
2230 /
2231
2232Strip trailing bytes contained in the argument.
2233
2234If the argument is omitted or None, strip trailing ASCII whitespace.
2235[clinic start generated code]*/
2236
2237PyDoc_STRVAR(bytes_rstrip__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002238"rstrip($self, bytes=None, /)\n"
2239"--\n"
2240"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002241"Strip trailing bytes contained in the argument.\n"
2242"\n"
2243"If the argument is omitted or None, strip trailing ASCII whitespace.");
2244
2245#define BYTES_RSTRIP_METHODDEF \
2246 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, bytes_rstrip__doc__},
2247
2248static PyObject *
2249bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes);
2250
2251static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002252bytes_rstrip(PyBytesObject *self, PyObject *args)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002253{
2254 PyObject *return_value = NULL;
2255 PyObject *bytes = Py_None;
2256
2257 if (!PyArg_UnpackTuple(args, "rstrip",
2258 0, 1,
2259 &bytes))
2260 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02002261 return_value = bytes_rstrip_impl(self, bytes);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002262
2263exit:
2264 return return_value;
2265}
2266
2267static PyObject *
2268bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Martin v. Löwis0efea322014-07-27 17:29:17 +02002269/*[clinic end generated code: output=e98730bd133e6593 input=8f93c9cd361f0140]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002270{
2271 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002272}
Neal Norwitz6968b052007-02-27 19:02:19 +00002273
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002274
2275PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002276"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002277\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002278Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002279string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002280as in slice notation.");
2281
2282static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002283bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002284{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002285 PyObject *sub_obj;
2286 const char *str = PyBytes_AS_STRING(self), *sub;
2287 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02002288 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002289 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002290
Antoine Pitrouac65d962011-10-20 23:54:17 +02002291 Py_buffer vsub;
2292 PyObject *count_obj;
2293
2294 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
2295 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002296 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002297
Antoine Pitrouac65d962011-10-20 23:54:17 +02002298 if (sub_obj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002299 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02002300 return NULL;
2301
2302 sub = vsub.buf;
2303 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002304 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02002305 else {
2306 sub = &byte;
2307 sub_len = 1;
2308 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002309
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002310 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002311
Antoine Pitrouac65d962011-10-20 23:54:17 +02002312 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002313 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2314 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02002315
2316 if (sub_obj)
2317 PyBuffer_Release(&vsub);
2318
2319 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002320}
2321
2322
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002323/*[clinic input]
2324bytes.translate
2325
2326 self: self(type="PyBytesObject *")
Victor Stinner049e5092014-08-17 22:20:00 +02002327 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002328 Translation table, which must be a bytes object of length 256.
2329 [
2330 deletechars: object
2331 ]
2332 /
2333
2334Return a copy with each character mapped by the given translation table.
2335
2336All characters occurring in the optional argument deletechars are removed.
2337The remaining characters are mapped through the given translation table.
2338[clinic start generated code]*/
2339
2340PyDoc_STRVAR(bytes_translate__doc__,
2341"translate(table, [deletechars])\n"
2342"Return a copy with each character mapped by the given translation table.\n"
2343"\n"
2344" table\n"
2345" Translation table, which must be a bytes object of length 256.\n"
2346"\n"
2347"All characters occurring in the optional argument deletechars are removed.\n"
2348"The remaining characters are mapped through the given translation table.");
2349
2350#define BYTES_TRANSLATE_METHODDEF \
2351 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, bytes_translate__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002352
2353static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002354bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1, PyObject *deletechars);
2355
2356static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002357bytes_translate(PyBytesObject *self, PyObject *args)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002358{
2359 PyObject *return_value = NULL;
2360 PyObject *table;
2361 int group_right_1 = 0;
2362 PyObject *deletechars = NULL;
2363
2364 switch (PyTuple_GET_SIZE(args)) {
2365 case 1:
2366 if (!PyArg_ParseTuple(args, "O:translate", &table))
Martin v. Löwis0efea322014-07-27 17:29:17 +02002367 goto exit;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002368 break;
2369 case 2:
2370 if (!PyArg_ParseTuple(args, "OO:translate", &table, &deletechars))
Martin v. Löwis0efea322014-07-27 17:29:17 +02002371 goto exit;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002372 group_right_1 = 1;
2373 break;
2374 default:
2375 PyErr_SetString(PyExc_TypeError, "bytes.translate requires 1 to 2 arguments");
Martin v. Löwis0efea322014-07-27 17:29:17 +02002376 goto exit;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002377 }
Martin v. Löwis0efea322014-07-27 17:29:17 +02002378 return_value = bytes_translate_impl(self, table, group_right_1, deletechars);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002379
Martin v. Löwis0efea322014-07-27 17:29:17 +02002380exit:
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002381 return return_value;
2382}
2383
2384static PyObject *
2385bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1, PyObject *deletechars)
Larry Hastingsdfbeb162014-10-13 10:39:41 +01002386/*[clinic end generated code: output=f0f29a57f41df5d8 input=d8fa5519d7cc4be7]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002387{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002388 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002389 Py_buffer table_view = {NULL, NULL};
2390 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002391 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002392 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002393 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002394 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002395 Py_ssize_t inlen, tablen, dellen = 0;
2396 PyObject *result;
2397 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002398
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002399 if (PyBytes_Check(table)) {
2400 table_chars = PyBytes_AS_STRING(table);
2401 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002402 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002403 else if (table == Py_None) {
2404 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002405 tablen = 256;
2406 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002407 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002408 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002409 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002410 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002411 tablen = table_view.len;
2412 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002413
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002414 if (tablen != 256) {
2415 PyErr_SetString(PyExc_ValueError,
2416 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002417 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002418 return NULL;
2419 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002420
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002421 if (deletechars != NULL) {
2422 if (PyBytes_Check(deletechars)) {
2423 del_table_chars = PyBytes_AS_STRING(deletechars);
2424 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002425 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002426 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002427 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002428 PyBuffer_Release(&table_view);
2429 return NULL;
2430 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002431 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002432 dellen = del_table_view.len;
2433 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002434 }
2435 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002436 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002437 dellen = 0;
2438 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002439
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002440 inlen = PyBytes_GET_SIZE(input_obj);
2441 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002442 if (result == NULL) {
2443 PyBuffer_Release(&del_table_view);
2444 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002445 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002446 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002447 output_start = output = PyBytes_AsString(result);
2448 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002449
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002450 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002451 /* If no deletions are required, use faster code */
2452 for (i = inlen; --i >= 0; ) {
2453 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002454 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002455 changed = 1;
2456 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002457 if (!changed && PyBytes_CheckExact(input_obj)) {
2458 Py_INCREF(input_obj);
2459 Py_DECREF(result);
2460 result = input_obj;
2461 }
2462 PyBuffer_Release(&del_table_view);
2463 PyBuffer_Release(&table_view);
2464 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002465 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002466
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002467 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002468 for (i = 0; i < 256; i++)
2469 trans_table[i] = Py_CHARMASK(i);
2470 } else {
2471 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002472 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002473 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002474 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002475
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002476 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002477 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002478 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002479
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002480 for (i = inlen; --i >= 0; ) {
2481 c = Py_CHARMASK(*input++);
2482 if (trans_table[c] != -1)
2483 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2484 continue;
2485 changed = 1;
2486 }
2487 if (!changed && PyBytes_CheckExact(input_obj)) {
2488 Py_DECREF(result);
2489 Py_INCREF(input_obj);
2490 return input_obj;
2491 }
2492 /* Fix the size of the resulting string */
2493 if (inlen > 0)
2494 _PyBytes_Resize(&result, output - output_start);
2495 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002496}
2497
2498
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002499/*[clinic input]
2500
2501@staticmethod
2502bytes.maketrans
2503
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002504 frm: Py_buffer
2505 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002506 /
2507
2508Return a translation table useable for the bytes or bytearray translate method.
2509
2510The returned table will be one where each byte in frm is mapped to the byte at
2511the same position in to.
2512
2513The bytes objects frm and to must be of the same length.
2514[clinic start generated code]*/
2515
2516PyDoc_STRVAR(bytes_maketrans__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002517"maketrans(frm, to, /)\n"
2518"--\n"
2519"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002520"Return a translation table useable for the bytes or bytearray translate method.\n"
2521"\n"
2522"The returned table will be one where each byte in frm is mapped to the byte at\n"
2523"the same position in to.\n"
2524"\n"
2525"The bytes objects frm and to must be of the same length.");
2526
2527#define BYTES_MAKETRANS_METHODDEF \
2528 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC, bytes_maketrans__doc__},
2529
Georg Brandlabc38772009-04-12 15:51:51 +00002530static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002531bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002532
2533static PyObject *
2534bytes_maketrans(void *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00002535{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002536 PyObject *return_value = NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002537 Py_buffer frm = {NULL, NULL};
2538 Py_buffer to = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002539
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002540 if (!PyArg_ParseTuple(args,
2541 "y*y*:maketrans",
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002542 &frm, &to))
2543 goto exit;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002544 return_value = bytes_maketrans_impl(&frm, &to);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002545
2546exit:
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002547 /* Cleanup for frm */
2548 if (frm.obj)
2549 PyBuffer_Release(&frm);
2550 /* Cleanup for to */
2551 if (to.obj)
2552 PyBuffer_Release(&to);
2553
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002554 return return_value;
2555}
2556
2557static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002558bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2559/*[clinic end generated code: output=7df47390c476ac60 input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002560{
2561 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002562}
2563
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002564/* find and count characters and substrings */
2565
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002566#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002567 ((char *)memchr((const void *)(target), c, target_len))
2568
2569/* String ops must return a string. */
2570/* If the object is subclass of string, create a copy */
2571Py_LOCAL(PyBytesObject *)
2572return_self(PyBytesObject *self)
2573{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002574 if (PyBytes_CheckExact(self)) {
2575 Py_INCREF(self);
2576 return self;
2577 }
2578 return (PyBytesObject *)PyBytes_FromStringAndSize(
2579 PyBytes_AS_STRING(self),
2580 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002581}
2582
2583Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00002584countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002585{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002586 Py_ssize_t count=0;
2587 const char *start=target;
2588 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002589
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002590 while ( (start=findchar(start, end-start, c)) != NULL ) {
2591 count++;
2592 if (count >= maxcount)
2593 break;
2594 start += 1;
2595 }
2596 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002597}
2598
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002599
2600/* Algorithms for different cases of string replacement */
2601
2602/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2603Py_LOCAL(PyBytesObject *)
2604replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002605 const char *to_s, Py_ssize_t to_len,
2606 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002607{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002608 char *self_s, *result_s;
2609 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002610 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002611 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002612
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002613 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002614
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002615 /* 1 at the end plus 1 after every character;
2616 count = min(maxcount, self_len + 1) */
2617 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002618 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002619 else
2620 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2621 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002622
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002623 /* Check for overflow */
2624 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002625 assert(count > 0);
2626 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002627 PyErr_SetString(PyExc_OverflowError,
2628 "replacement bytes are too long");
2629 return NULL;
2630 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002631 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002632
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002633 if (! (result = (PyBytesObject *)
2634 PyBytes_FromStringAndSize(NULL, result_len)) )
2635 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002636
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002637 self_s = PyBytes_AS_STRING(self);
2638 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002639
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002640 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002641
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002642 /* Lay the first one down (guaranteed this will occur) */
2643 Py_MEMCPY(result_s, to_s, to_len);
2644 result_s += to_len;
2645 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002646
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002647 for (i=0; i<count; i++) {
2648 *result_s++ = *self_s++;
2649 Py_MEMCPY(result_s, to_s, to_len);
2650 result_s += to_len;
2651 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002652
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002653 /* Copy the rest of the original string */
2654 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002655
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002656 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002657}
2658
2659/* Special case for deleting a single character */
2660/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2661Py_LOCAL(PyBytesObject *)
2662replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002663 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002664{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002665 char *self_s, *result_s;
2666 char *start, *next, *end;
2667 Py_ssize_t self_len, result_len;
2668 Py_ssize_t count;
2669 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002670
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002671 self_len = PyBytes_GET_SIZE(self);
2672 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002673
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002674 count = countchar(self_s, self_len, from_c, maxcount);
2675 if (count == 0) {
2676 return return_self(self);
2677 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002678
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002679 result_len = self_len - count; /* from_len == 1 */
2680 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002681
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002682 if ( (result = (PyBytesObject *)
2683 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2684 return NULL;
2685 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002686
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002687 start = self_s;
2688 end = self_s + self_len;
2689 while (count-- > 0) {
2690 next = findchar(start, end-start, from_c);
2691 if (next == NULL)
2692 break;
2693 Py_MEMCPY(result_s, start, next-start);
2694 result_s += (next-start);
2695 start = next+1;
2696 }
2697 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002699 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002700}
2701
2702/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2703
2704Py_LOCAL(PyBytesObject *)
2705replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002706 const char *from_s, Py_ssize_t from_len,
2707 Py_ssize_t maxcount) {
2708 char *self_s, *result_s;
2709 char *start, *next, *end;
2710 Py_ssize_t self_len, result_len;
2711 Py_ssize_t count, offset;
2712 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002713
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002714 self_len = PyBytes_GET_SIZE(self);
2715 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002716
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002717 count = stringlib_count(self_s, self_len,
2718 from_s, from_len,
2719 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002720
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002721 if (count == 0) {
2722 /* no matches */
2723 return return_self(self);
2724 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002725
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002726 result_len = self_len - (count * from_len);
2727 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002728
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002729 if ( (result = (PyBytesObject *)
2730 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2731 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002732
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002733 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002734
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002735 start = self_s;
2736 end = self_s + self_len;
2737 while (count-- > 0) {
2738 offset = stringlib_find(start, end-start,
2739 from_s, from_len,
2740 0);
2741 if (offset == -1)
2742 break;
2743 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002744
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002745 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002746
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002747 result_s += (next-start);
2748 start = next+from_len;
2749 }
2750 Py_MEMCPY(result_s, start, end-start);
2751 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002752}
2753
2754/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2755Py_LOCAL(PyBytesObject *)
2756replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002757 char from_c, char to_c,
2758 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002759{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002760 char *self_s, *result_s, *start, *end, *next;
2761 Py_ssize_t self_len;
2762 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002763
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002764 /* The result string will be the same size */
2765 self_s = PyBytes_AS_STRING(self);
2766 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002767
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002768 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002769
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002770 if (next == NULL) {
2771 /* No matches; return the original string */
2772 return return_self(self);
2773 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002774
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002775 /* Need to make a new string */
2776 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2777 if (result == NULL)
2778 return NULL;
2779 result_s = PyBytes_AS_STRING(result);
2780 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002781
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002782 /* change everything in-place, starting with this one */
2783 start = result_s + (next-self_s);
2784 *start = to_c;
2785 start++;
2786 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002787
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002788 while (--maxcount > 0) {
2789 next = findchar(start, end-start, from_c);
2790 if (next == NULL)
2791 break;
2792 *next = to_c;
2793 start = next+1;
2794 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002795
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002796 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002797}
2798
2799/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2800Py_LOCAL(PyBytesObject *)
2801replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002802 const char *from_s, Py_ssize_t from_len,
2803 const char *to_s, Py_ssize_t to_len,
2804 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002805{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002806 char *result_s, *start, *end;
2807 char *self_s;
2808 Py_ssize_t self_len, offset;
2809 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002810
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002811 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002812
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002813 self_s = PyBytes_AS_STRING(self);
2814 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002815
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002816 offset = stringlib_find(self_s, self_len,
2817 from_s, from_len,
2818 0);
2819 if (offset == -1) {
2820 /* No matches; return the original string */
2821 return return_self(self);
2822 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002823
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002824 /* Need to make a new string */
2825 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2826 if (result == NULL)
2827 return NULL;
2828 result_s = PyBytes_AS_STRING(result);
2829 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002830
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002831 /* change everything in-place, starting with this one */
2832 start = result_s + offset;
2833 Py_MEMCPY(start, to_s, from_len);
2834 start += from_len;
2835 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002836
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002837 while ( --maxcount > 0) {
2838 offset = stringlib_find(start, end-start,
2839 from_s, from_len,
2840 0);
2841 if (offset==-1)
2842 break;
2843 Py_MEMCPY(start+offset, to_s, from_len);
2844 start += offset+from_len;
2845 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002846
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002847 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002848}
2849
2850/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2851Py_LOCAL(PyBytesObject *)
2852replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002853 char from_c,
2854 const char *to_s, Py_ssize_t to_len,
2855 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002856{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002857 char *self_s, *result_s;
2858 char *start, *next, *end;
2859 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002860 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002861 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002862
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002863 self_s = PyBytes_AS_STRING(self);
2864 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002865
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002866 count = countchar(self_s, self_len, from_c, maxcount);
2867 if (count == 0) {
2868 /* no matches, return unchanged */
2869 return return_self(self);
2870 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002871
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002872 /* use the difference between current and new, hence the "-1" */
2873 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002874 assert(count > 0);
2875 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002876 PyErr_SetString(PyExc_OverflowError,
2877 "replacement bytes are too long");
2878 return NULL;
2879 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002880 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002881
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002882 if ( (result = (PyBytesObject *)
2883 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2884 return NULL;
2885 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002886
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002887 start = self_s;
2888 end = self_s + self_len;
2889 while (count-- > 0) {
2890 next = findchar(start, end-start, from_c);
2891 if (next == NULL)
2892 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002893
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002894 if (next == start) {
2895 /* replace with the 'to' */
2896 Py_MEMCPY(result_s, to_s, to_len);
2897 result_s += to_len;
2898 start += 1;
2899 } else {
2900 /* copy the unchanged old then the 'to' */
2901 Py_MEMCPY(result_s, start, next-start);
2902 result_s += (next-start);
2903 Py_MEMCPY(result_s, to_s, to_len);
2904 result_s += to_len;
2905 start = next+1;
2906 }
2907 }
2908 /* Copy the remainder of the remaining string */
2909 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002910
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002911 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002912}
2913
2914/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2915Py_LOCAL(PyBytesObject *)
2916replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002917 const char *from_s, Py_ssize_t from_len,
2918 const char *to_s, Py_ssize_t to_len,
2919 Py_ssize_t maxcount) {
2920 char *self_s, *result_s;
2921 char *start, *next, *end;
2922 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002923 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002924 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002925
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002926 self_s = PyBytes_AS_STRING(self);
2927 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002928
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002929 count = stringlib_count(self_s, self_len,
2930 from_s, from_len,
2931 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002932
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002933 if (count == 0) {
2934 /* no matches, return unchanged */
2935 return return_self(self);
2936 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002937
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002938 /* Check for overflow */
2939 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002940 assert(count > 0);
2941 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002942 PyErr_SetString(PyExc_OverflowError,
2943 "replacement bytes are too long");
2944 return NULL;
2945 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002946 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002947
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002948 if ( (result = (PyBytesObject *)
2949 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2950 return NULL;
2951 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002952
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002953 start = self_s;
2954 end = self_s + self_len;
2955 while (count-- > 0) {
2956 offset = stringlib_find(start, end-start,
2957 from_s, from_len,
2958 0);
2959 if (offset == -1)
2960 break;
2961 next = start+offset;
2962 if (next == start) {
2963 /* replace with the 'to' */
2964 Py_MEMCPY(result_s, to_s, to_len);
2965 result_s += to_len;
2966 start += from_len;
2967 } else {
2968 /* copy the unchanged old then the 'to' */
2969 Py_MEMCPY(result_s, start, next-start);
2970 result_s += (next-start);
2971 Py_MEMCPY(result_s, to_s, to_len);
2972 result_s += to_len;
2973 start = next+from_len;
2974 }
2975 }
2976 /* Copy the remainder of the remaining string */
2977 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002978
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002979 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002980}
2981
2982
2983Py_LOCAL(PyBytesObject *)
2984replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002985 const char *from_s, Py_ssize_t from_len,
2986 const char *to_s, Py_ssize_t to_len,
2987 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002988{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002989 if (maxcount < 0) {
2990 maxcount = PY_SSIZE_T_MAX;
2991 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2992 /* nothing to do; return the original string */
2993 return return_self(self);
2994 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002995
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002996 if (maxcount == 0 ||
2997 (from_len == 0 && to_len == 0)) {
2998 /* nothing to do; return the original string */
2999 return return_self(self);
3000 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003001
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003002 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003003
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003004 if (from_len == 0) {
3005 /* insert the 'to' string everywhere. */
3006 /* >>> "Python".replace("", ".") */
3007 /* '.P.y.t.h.o.n.' */
3008 return replace_interleave(self, to_s, to_len, maxcount);
3009 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003010
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003011 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3012 /* point for an empty self string to generate a non-empty string */
3013 /* Special case so the remaining code always gets a non-empty string */
3014 if (PyBytes_GET_SIZE(self) == 0) {
3015 return return_self(self);
3016 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003017
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003018 if (to_len == 0) {
3019 /* delete all occurrences of 'from' string */
3020 if (from_len == 1) {
3021 return replace_delete_single_character(
3022 self, from_s[0], maxcount);
3023 } else {
3024 return replace_delete_substring(self, from_s,
3025 from_len, maxcount);
3026 }
3027 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003028
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003029 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003030
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003031 if (from_len == to_len) {
3032 if (from_len == 1) {
3033 return replace_single_character_in_place(
3034 self,
3035 from_s[0],
3036 to_s[0],
3037 maxcount);
3038 } else {
3039 return replace_substring_in_place(
3040 self, from_s, from_len, to_s, to_len,
3041 maxcount);
3042 }
3043 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003044
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003045 /* Otherwise use the more generic algorithms */
3046 if (from_len == 1) {
3047 return replace_single_character(self, from_s[0],
3048 to_s, to_len, maxcount);
3049 } else {
3050 /* len('from')>=2, len('to')>=1 */
3051 return replace_substring(self, from_s, from_len, to_s, to_len,
3052 maxcount);
3053 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003054}
3055
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003056
3057/*[clinic input]
3058bytes.replace
3059
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003060 old: Py_buffer
3061 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003062 count: Py_ssize_t = -1
3063 Maximum number of occurrences to replace.
3064 -1 (the default value) means replace all occurrences.
3065 /
3066
3067Return a copy with all occurrences of substring old replaced by new.
3068
3069If the optional argument count is given, only the first count occurrences are
3070replaced.
3071[clinic start generated code]*/
3072
3073PyDoc_STRVAR(bytes_replace__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02003074"replace($self, old, new, count=-1, /)\n"
3075"--\n"
3076"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003077"Return a copy with all occurrences of substring old replaced by new.\n"
3078"\n"
3079" count\n"
3080" Maximum number of occurrences to replace.\n"
3081" -1 (the default value) means replace all occurrences.\n"
3082"\n"
3083"If the optional argument count is given, only the first count occurrences are\n"
3084"replaced.");
3085
3086#define BYTES_REPLACE_METHODDEF \
3087 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, bytes_replace__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003088
3089static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003090bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new, Py_ssize_t count);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003091
3092static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003093bytes_replace(PyBytesObject*self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003094{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003095 PyObject *return_value = NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02003096 Py_buffer old = {NULL, NULL};
3097 Py_buffer new = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003098 Py_ssize_t count = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003099
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003100 if (!PyArg_ParseTuple(args,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003101 "y*y*|n:replace",
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003102 &old, &new, &count))
3103 goto exit;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003104 return_value = bytes_replace_impl(self, &old, &new, count);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003105
3106exit:
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003107 /* Cleanup for old */
3108 if (old.obj)
3109 PyBuffer_Release(&old);
3110 /* Cleanup for new */
3111 if (new.obj)
3112 PyBuffer_Release(&new);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003113
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003114 return return_value;
3115}
3116
3117static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003118bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new, Py_ssize_t count)
3119/*[clinic end generated code: output=f07bd9ecf29ee8d8 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003120{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003121 return (PyObject *)replace((PyBytesObject *) self,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003122 (const char *)old->buf, old->len,
3123 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003124}
3125
3126/** End DALKE **/
3127
3128/* Matches the end (direction >= 0) or start (direction < 0) of self
3129 * against substr, using the start and end arguments. Returns
3130 * -1 on error, 0 if not found and 1 if found.
3131 */
3132Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003133_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003134 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003135{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003136 Py_ssize_t len = PyBytes_GET_SIZE(self);
3137 Py_ssize_t slen;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02003138 Py_buffer sub_view = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003139 const char* sub;
3140 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003141
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003142 if (PyBytes_Check(substr)) {
3143 sub = PyBytes_AS_STRING(substr);
3144 slen = PyBytes_GET_SIZE(substr);
3145 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02003146 else {
3147 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
3148 return -1;
3149 sub = sub_view.buf;
3150 slen = sub_view.len;
3151 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003152 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003153
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003154 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003156 if (direction < 0) {
3157 /* startswith */
3158 if (start+slen > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02003159 goto notfound;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003160 } else {
3161 /* endswith */
3162 if (end-start < slen || start > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02003163 goto notfound;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003164
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003165 if (end-slen > start)
3166 start = end - slen;
3167 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02003168 if (end-start < slen)
3169 goto notfound;
3170 if (memcmp(str+start, sub, slen) != 0)
3171 goto notfound;
3172
3173 PyBuffer_Release(&sub_view);
3174 return 1;
3175
3176notfound:
3177 PyBuffer_Release(&sub_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003178 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003179}
3180
3181
3182PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003183"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003184\n\
3185Return True if B starts with the specified prefix, False otherwise.\n\
3186With optional start, test B beginning at that position.\n\
3187With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00003188prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003189
3190static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003191bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003192{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003193 Py_ssize_t start = 0;
3194 Py_ssize_t end = PY_SSIZE_T_MAX;
3195 PyObject *subobj;
3196 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003197
Jesus Ceaac451502011-04-20 17:09:23 +02003198 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003199 return NULL;
3200 if (PyTuple_Check(subobj)) {
3201 Py_ssize_t i;
3202 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3203 result = _bytes_tailmatch(self,
3204 PyTuple_GET_ITEM(subobj, i),
3205 start, end, -1);
3206 if (result == -1)
3207 return NULL;
3208 else if (result) {
3209 Py_RETURN_TRUE;
3210 }
3211 }
3212 Py_RETURN_FALSE;
3213 }
3214 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03003215 if (result == -1) {
3216 if (PyErr_ExceptionMatches(PyExc_TypeError))
3217 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
3218 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003219 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03003220 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003221 else
3222 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003223}
3224
3225
3226PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003227"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003228\n\
3229Return True if B ends with the specified suffix, False otherwise.\n\
3230With optional start, test B beginning at that position.\n\
3231With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00003232suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003233
3234static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003235bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003236{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003237 Py_ssize_t start = 0;
3238 Py_ssize_t end = PY_SSIZE_T_MAX;
3239 PyObject *subobj;
3240 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003241
Jesus Ceaac451502011-04-20 17:09:23 +02003242 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003243 return NULL;
3244 if (PyTuple_Check(subobj)) {
3245 Py_ssize_t i;
3246 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3247 result = _bytes_tailmatch(self,
3248 PyTuple_GET_ITEM(subobj, i),
3249 start, end, +1);
3250 if (result == -1)
3251 return NULL;
3252 else if (result) {
3253 Py_RETURN_TRUE;
3254 }
3255 }
3256 Py_RETURN_FALSE;
3257 }
3258 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03003259 if (result == -1) {
3260 if (PyErr_ExceptionMatches(PyExc_TypeError))
3261 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
3262 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003263 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03003264 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003265 else
3266 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003267}
3268
3269
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003270/*[clinic input]
3271bytes.decode
3272
3273 encoding: str(c_default="NULL") = 'utf-8'
3274 The encoding with which to decode the bytes.
3275 errors: str(c_default="NULL") = 'strict'
3276 The error handling scheme to use for the handling of decoding errors.
3277 The default is 'strict' meaning that decoding errors raise a
3278 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
3279 as well as any other name registered with codecs.register_error that
3280 can handle UnicodeDecodeErrors.
3281
3282Decode the bytes using the codec registered for encoding.
3283[clinic start generated code]*/
3284
3285PyDoc_STRVAR(bytes_decode__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02003286"decode($self, /, encoding=\'utf-8\', errors=\'strict\')\n"
3287"--\n"
3288"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003289"Decode the bytes using the codec registered for encoding.\n"
3290"\n"
3291" encoding\n"
3292" The encoding with which to decode the bytes.\n"
3293" errors\n"
3294" The error handling scheme to use for the handling of decoding errors.\n"
3295" The default is \'strict\' meaning that decoding errors raise a\n"
3296" UnicodeDecodeError. Other possible values are \'ignore\' and \'replace\'\n"
3297" as well as any other name registered with codecs.register_error that\n"
3298" can handle UnicodeDecodeErrors.");
3299
3300#define BYTES_DECODE_METHODDEF \
3301 {"decode", (PyCFunction)bytes_decode, METH_VARARGS|METH_KEYWORDS, bytes_decode__doc__},
3302
3303static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003304bytes_decode_impl(PyBytesObject*self, const char *encoding, const char *errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00003305
3306static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003307bytes_decode(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00003308{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003309 PyObject *return_value = NULL;
3310 static char *_keywords[] = {"encoding", "errors", NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003311 const char *encoding = NULL;
3312 const char *errors = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +00003313
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003314 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
3315 "|ss:decode", _keywords,
3316 &encoding, &errors))
3317 goto exit;
3318 return_value = bytes_decode_impl(self, encoding, errors);
3319
3320exit:
3321 return return_value;
3322}
3323
3324static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003325bytes_decode_impl(PyBytesObject*self, const char *encoding, const char *errors)
3326/*[clinic end generated code: output=61a80290bbfce696 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003327{
Martin v. Löwis0efea322014-07-27 17:29:17 +02003328 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00003329}
3330
Guido van Rossum20188312006-05-05 15:15:40 +00003331
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003332/*[clinic input]
3333bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003334
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003335 keepends: int(py_default="False") = 0
3336
3337Return a list of the lines in the bytes, breaking at line boundaries.
3338
3339Line breaks are not included in the resulting list unless keepends is given and
3340true.
3341[clinic start generated code]*/
3342
3343PyDoc_STRVAR(bytes_splitlines__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02003344"splitlines($self, /, keepends=False)\n"
3345"--\n"
3346"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003347"Return a list of the lines in the bytes, breaking at line boundaries.\n"
3348"\n"
3349"Line breaks are not included in the resulting list unless keepends is given and\n"
3350"true.");
3351
3352#define BYTES_SPLITLINES_METHODDEF \
3353 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS|METH_KEYWORDS, bytes_splitlines__doc__},
3354
3355static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003356bytes_splitlines_impl(PyBytesObject*self, int keepends);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003357
3358static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003359bytes_splitlines(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003360{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003361 PyObject *return_value = NULL;
3362 static char *_keywords[] = {"keepends", NULL};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003363 int keepends = 0;
3364
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003365 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
3366 "|i:splitlines", _keywords,
3367 &keepends))
3368 goto exit;
3369 return_value = bytes_splitlines_impl(self, keepends);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003370
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003371exit:
3372 return return_value;
3373}
3374
3375static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003376bytes_splitlines_impl(PyBytesObject*self, int keepends)
3377/*[clinic end generated code: output=79da057d05d126de input=ddb93e3351080c8c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003378{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003379 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00003380 (PyObject*) self, PyBytes_AS_STRING(self),
3381 PyBytes_GET_SIZE(self), keepends
3382 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003383}
3384
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003385static int
Victor Stinner6430fd52011-09-29 04:02:13 +02003386hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003387{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003388 if (c >= 128)
3389 return -1;
David Malcolm96960882010-11-05 17:23:41 +00003390 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003391 return c - '0';
3392 else {
David Malcolm96960882010-11-05 17:23:41 +00003393 if (Py_ISUPPER(c))
3394 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003395 if (c >= 'a' && c <= 'f')
3396 return c - 'a' + 10;
3397 }
3398 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003399}
3400
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003401/*[clinic input]
3402@classmethod
3403bytes.fromhex
3404
3405 string: unicode
3406 /
3407
3408Create a bytes object from a string of hexadecimal numbers.
3409
3410Spaces between two numbers are accepted.
3411Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
3412[clinic start generated code]*/
3413
3414PyDoc_STRVAR(bytes_fromhex__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02003415"fromhex($type, string, /)\n"
3416"--\n"
3417"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003418"Create a bytes object from a string of hexadecimal numbers.\n"
3419"\n"
3420"Spaces between two numbers are accepted.\n"
Martin v. Löwis0efea322014-07-27 17:29:17 +02003421"Example: bytes.fromhex(\'B9 01EF\') -> b\'\\\\xb9\\\\x01\\\\xef\'.");
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003422
3423#define BYTES_FROMHEX_METHODDEF \
3424 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS, bytes_fromhex__doc__},
3425
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003426static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003427bytes_fromhex_impl(PyTypeObject *type, PyObject *string);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003428
3429static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003430bytes_fromhex(PyTypeObject *type, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003431{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003432 PyObject *return_value = NULL;
3433 PyObject *string;
3434
3435 if (!PyArg_ParseTuple(args,
3436 "U:fromhex",
3437 &string))
3438 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02003439 return_value = bytes_fromhex_impl(type, string);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003440
3441exit:
3442 return return_value;
3443}
3444
3445static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003446bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
3447/*[clinic end generated code: output=09e6cbef56cbbb65 input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003448{
3449 PyObject *newstring;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003450 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003451 Py_ssize_t hexlen, byteslen, i, j;
3452 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003453 void *data;
3454 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003455
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003456 assert(PyUnicode_Check(string));
3457 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003458 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003459 kind = PyUnicode_KIND(string);
3460 data = PyUnicode_DATA(string);
3461 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003462
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003463 byteslen = hexlen/2; /* This overestimates if there are spaces */
3464 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
3465 if (!newstring)
3466 return NULL;
3467 buf = PyBytes_AS_STRING(newstring);
3468 for (i = j = 0; i < hexlen; i += 2) {
3469 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003470 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003471 i++;
3472 if (i >= hexlen)
3473 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003474 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
3475 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003476 if (top == -1 || bot == -1) {
3477 PyErr_Format(PyExc_ValueError,
3478 "non-hexadecimal number found in "
3479 "fromhex() arg at position %zd", i);
3480 goto error;
3481 }
3482 buf[j++] = (top << 4) + bot;
3483 }
3484 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
3485 goto error;
3486 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003487
3488 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003489 Py_XDECREF(newstring);
3490 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003491}
3492
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003493static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003494bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003495{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003496 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003497}
3498
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003499
3500static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003501bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003502 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
3503 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3504 _Py_capitalize__doc__},
3505 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3506 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003507 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003508 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
3509 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02003510 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003511 expandtabs__doc__},
3512 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003513 BYTES_FROMHEX_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003514 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3515 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3516 _Py_isalnum__doc__},
3517 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3518 _Py_isalpha__doc__},
3519 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3520 _Py_isdigit__doc__},
3521 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3522 _Py_islower__doc__},
3523 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3524 _Py_isspace__doc__},
3525 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3526 _Py_istitle__doc__},
3527 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3528 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003529 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003530 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3531 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003532 BYTES_LSTRIP_METHODDEF
3533 BYTES_MAKETRANS_METHODDEF
3534 BYTES_PARTITION_METHODDEF
3535 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003536 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3537 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3538 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003539 BYTES_RPARTITION_METHODDEF
3540 BYTES_RSPLIT_METHODDEF
3541 BYTES_RSTRIP_METHODDEF
3542 BYTES_SPLIT_METHODDEF
3543 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003544 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
3545 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003546 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003547 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3548 _Py_swapcase__doc__},
3549 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003550 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003551 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3552 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003553 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003554};
3555
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003556static PyObject *
Ethan Furmanb95b5612015-01-23 20:05:18 -08003557bytes_mod(PyObject *v, PyObject *w)
3558{
3559 if (!PyBytes_Check(v))
3560 Py_RETURN_NOTIMPLEMENTED;
3561 return _PyBytes_Format(v, w);
3562}
3563
3564static PyNumberMethods bytes_as_number = {
3565 0, /*nb_add*/
3566 0, /*nb_subtract*/
3567 0, /*nb_multiply*/
3568 bytes_mod, /*nb_remainder*/
3569};
3570
3571static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003572str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3573
3574static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003575bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003576{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003577 PyObject *x = NULL;
3578 const char *encoding = NULL;
3579 const char *errors = NULL;
3580 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003581 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003582 Py_ssize_t size;
3583 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003584 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003585
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003586 if (type != &PyBytes_Type)
3587 return str_subtype_new(type, args, kwds);
3588 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3589 &encoding, &errors))
3590 return NULL;
3591 if (x == NULL) {
3592 if (encoding != NULL || errors != NULL) {
3593 PyErr_SetString(PyExc_TypeError,
3594 "encoding or errors without sequence "
3595 "argument");
3596 return NULL;
3597 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003598 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003599 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003600
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003601 if (PyUnicode_Check(x)) {
3602 /* Encode via the codec registry */
3603 if (encoding == NULL) {
3604 PyErr_SetString(PyExc_TypeError,
3605 "string argument without an encoding");
3606 return NULL;
3607 }
3608 new = PyUnicode_AsEncodedString(x, encoding, errors);
3609 if (new == NULL)
3610 return NULL;
3611 assert(PyBytes_Check(new));
3612 return new;
3613 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003614
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003615 /* If it's not unicode, there can't be encoding or errors */
3616 if (encoding != NULL || errors != NULL) {
3617 PyErr_SetString(PyExc_TypeError,
3618 "encoding or errors without a string argument");
3619 return NULL;
3620 }
3621
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003622 /* We'd like to call PyObject_Bytes here, but we need to check for an
3623 integer argument before deferring to PyBytes_FromObject, something
3624 PyObject_Bytes doesn't do. */
3625 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3626 if (func != NULL) {
3627 new = PyObject_CallFunctionObjArgs(func, NULL);
3628 Py_DECREF(func);
3629 if (new == NULL)
3630 return NULL;
3631 if (!PyBytes_Check(new)) {
3632 PyErr_Format(PyExc_TypeError,
3633 "__bytes__ returned non-bytes (type %.200s)",
3634 Py_TYPE(new)->tp_name);
3635 Py_DECREF(new);
3636 return NULL;
3637 }
3638 return new;
3639 }
3640 else if (PyErr_Occurred())
3641 return NULL;
3642
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003643 /* Is it an integer? */
3644 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3645 if (size == -1 && PyErr_Occurred()) {
3646 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3647 return NULL;
3648 PyErr_Clear();
3649 }
3650 else if (size < 0) {
3651 PyErr_SetString(PyExc_ValueError, "negative count");
3652 return NULL;
3653 }
3654 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003655 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003656 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003657 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003658 return new;
3659 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003660
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003661 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003662}
3663
3664PyObject *
3665PyBytes_FromObject(PyObject *x)
3666{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003667 PyObject *new, *it;
3668 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003669
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003670 if (x == NULL) {
3671 PyErr_BadInternalCall();
3672 return NULL;
3673 }
Larry Hastingsca28e992012-05-24 22:58:30 -07003674
3675 if (PyBytes_CheckExact(x)) {
3676 Py_INCREF(x);
3677 return x;
3678 }
3679
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003680 /* Use the modern buffer interface */
3681 if (PyObject_CheckBuffer(x)) {
3682 Py_buffer view;
3683 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3684 return NULL;
3685 new = PyBytes_FromStringAndSize(NULL, view.len);
3686 if (!new)
3687 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003688 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3689 &view, view.len, 'C') < 0)
3690 goto fail;
3691 PyBuffer_Release(&view);
3692 return new;
3693 fail:
3694 Py_XDECREF(new);
3695 PyBuffer_Release(&view);
3696 return NULL;
3697 }
3698 if (PyUnicode_Check(x)) {
3699 PyErr_SetString(PyExc_TypeError,
3700 "cannot convert unicode object to bytes");
3701 return NULL;
3702 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003703
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003704 if (PyList_CheckExact(x)) {
3705 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3706 if (new == NULL)
3707 return NULL;
3708 for (i = 0; i < Py_SIZE(x); i++) {
3709 Py_ssize_t value = PyNumber_AsSsize_t(
3710 PyList_GET_ITEM(x, i), PyExc_ValueError);
3711 if (value == -1 && PyErr_Occurred()) {
3712 Py_DECREF(new);
3713 return NULL;
3714 }
3715 if (value < 0 || value >= 256) {
3716 PyErr_SetString(PyExc_ValueError,
3717 "bytes must be in range(0, 256)");
3718 Py_DECREF(new);
3719 return NULL;
3720 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003721 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003722 }
3723 return new;
3724 }
3725 if (PyTuple_CheckExact(x)) {
3726 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3727 if (new == NULL)
3728 return NULL;
3729 for (i = 0; i < Py_SIZE(x); i++) {
3730 Py_ssize_t value = PyNumber_AsSsize_t(
3731 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
3732 if (value == -1 && PyErr_Occurred()) {
3733 Py_DECREF(new);
3734 return NULL;
3735 }
3736 if (value < 0 || value >= 256) {
3737 PyErr_SetString(PyExc_ValueError,
3738 "bytes must be in range(0, 256)");
3739 Py_DECREF(new);
3740 return NULL;
3741 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003742 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003743 }
3744 return new;
3745 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00003746
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003747 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003748 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003749 if (size == -1 && PyErr_Occurred())
3750 return NULL;
3751 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
3752 returning a shared empty bytes string. This required because we
3753 want to call _PyBytes_Resize() the returned object, which we can
3754 only do on bytes objects with refcount == 1. */
Victor Stinner88d146b2014-08-17 21:12:18 +02003755 if (size == 0)
3756 size = 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003757 new = PyBytes_FromStringAndSize(NULL, size);
3758 if (new == NULL)
3759 return NULL;
Victor Stinner88d146b2014-08-17 21:12:18 +02003760 assert(Py_REFCNT(new) == 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003761
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003762 /* Get the iterator */
3763 it = PyObject_GetIter(x);
3764 if (it == NULL)
3765 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003766
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003767 /* Run the iterator to exhaustion */
3768 for (i = 0; ; i++) {
3769 PyObject *item;
3770 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003771
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003772 /* Get the next item */
3773 item = PyIter_Next(it);
3774 if (item == NULL) {
3775 if (PyErr_Occurred())
3776 goto error;
3777 break;
3778 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003779
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003780 /* Interpret it as an int (__index__) */
3781 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3782 Py_DECREF(item);
3783 if (value == -1 && PyErr_Occurred())
3784 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003785
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003786 /* Range check */
3787 if (value < 0 || value >= 256) {
3788 PyErr_SetString(PyExc_ValueError,
3789 "bytes must be in range(0, 256)");
3790 goto error;
3791 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003792
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003793 /* Append the byte */
3794 if (i >= size) {
3795 size = 2 * size + 1;
3796 if (_PyBytes_Resize(&new, size) < 0)
3797 goto error;
3798 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003799 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003800 }
3801 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003802
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003803 /* Clean up and return success */
3804 Py_DECREF(it);
3805 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003806
3807 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003808 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01003809 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003810 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003811}
3812
3813static PyObject *
3814str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3815{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003816 PyObject *tmp, *pnew;
3817 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003818
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003819 assert(PyType_IsSubtype(type, &PyBytes_Type));
3820 tmp = bytes_new(&PyBytes_Type, args, kwds);
3821 if (tmp == NULL)
3822 return NULL;
3823 assert(PyBytes_CheckExact(tmp));
3824 n = PyBytes_GET_SIZE(tmp);
3825 pnew = type->tp_alloc(type, n);
3826 if (pnew != NULL) {
3827 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3828 PyBytes_AS_STRING(tmp), n+1);
3829 ((PyBytesObject *)pnew)->ob_shash =
3830 ((PyBytesObject *)tmp)->ob_shash;
3831 }
3832 Py_DECREF(tmp);
3833 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003834}
3835
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003836PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003837"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003838bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003839bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003840bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3841bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003842\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003843Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003844 - an iterable yielding integers in range(256)\n\
3845 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003846 - any object implementing the buffer API.\n\
3847 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003848
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003849static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003850
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003851PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003852 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3853 "bytes",
3854 PyBytesObject_SIZE,
3855 sizeof(char),
3856 bytes_dealloc, /* tp_dealloc */
3857 0, /* tp_print */
3858 0, /* tp_getattr */
3859 0, /* tp_setattr */
3860 0, /* tp_reserved */
3861 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08003862 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003863 &bytes_as_sequence, /* tp_as_sequence */
3864 &bytes_as_mapping, /* tp_as_mapping */
3865 (hashfunc)bytes_hash, /* tp_hash */
3866 0, /* tp_call */
3867 bytes_str, /* tp_str */
3868 PyObject_GenericGetAttr, /* tp_getattro */
3869 0, /* tp_setattro */
3870 &bytes_as_buffer, /* tp_as_buffer */
3871 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3872 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3873 bytes_doc, /* tp_doc */
3874 0, /* tp_traverse */
3875 0, /* tp_clear */
3876 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3877 0, /* tp_weaklistoffset */
3878 bytes_iter, /* tp_iter */
3879 0, /* tp_iternext */
3880 bytes_methods, /* tp_methods */
3881 0, /* tp_members */
3882 0, /* tp_getset */
3883 &PyBaseObject_Type, /* tp_base */
3884 0, /* tp_dict */
3885 0, /* tp_descr_get */
3886 0, /* tp_descr_set */
3887 0, /* tp_dictoffset */
3888 0, /* tp_init */
3889 0, /* tp_alloc */
3890 bytes_new, /* tp_new */
3891 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003892};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003893
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003894void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003895PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003896{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003897 assert(pv != NULL);
3898 if (*pv == NULL)
3899 return;
3900 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003901 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003902 return;
3903 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003904
3905 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3906 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003907 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003908 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02003909
Antoine Pitrou161d6952014-05-01 14:36:20 +02003910 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003911 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02003912 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3913 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3914 Py_CLEAR(*pv);
3915 return;
3916 }
3917
3918 oldsize = PyBytes_GET_SIZE(*pv);
3919 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3920 PyErr_NoMemory();
3921 goto error;
3922 }
3923 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3924 goto error;
3925
3926 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3927 PyBuffer_Release(&wb);
3928 return;
3929
3930 error:
3931 PyBuffer_Release(&wb);
3932 Py_CLEAR(*pv);
3933 return;
3934 }
3935
3936 else {
3937 /* Multiple references, need to create new object */
3938 PyObject *v;
3939 v = bytes_concat(*pv, w);
3940 Py_DECREF(*pv);
3941 *pv = v;
3942 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003943}
3944
3945void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003946PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003947{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003948 PyBytes_Concat(pv, w);
3949 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003950}
3951
3952
Ethan Furmanb95b5612015-01-23 20:05:18 -08003953/* The following function breaks the notion that bytes are immutable:
3954 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003955 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08003956 as creating a new bytes object and destroying the old one, only
3957 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003958 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08003959 Note that if there's not enough memory to resize the bytes object, the
3960 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003961 memory" exception is set, and -1 is returned. Else (on success) 0 is
3962 returned, and the value in *pv may or may not be the same as on input.
3963 As always, an extra byte is allocated for a trailing \0 byte (newsize
3964 does *not* include that), and a trailing \0 byte is stored.
3965*/
3966
3967int
3968_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3969{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003970 PyObject *v;
3971 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003972 v = *pv;
3973 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3974 *pv = 0;
3975 Py_DECREF(v);
3976 PyErr_BadInternalCall();
3977 return -1;
3978 }
3979 /* XXX UNREF/NEWREF interface should be more symmetrical */
3980 _Py_DEC_REFTOTAL;
3981 _Py_ForgetReference(v);
3982 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003983 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003984 if (*pv == NULL) {
3985 PyObject_Del(v);
3986 PyErr_NoMemory();
3987 return -1;
3988 }
3989 _Py_NewReference(*pv);
3990 sv = (PyBytesObject *) *pv;
3991 Py_SIZE(sv) = newsize;
3992 sv->ob_sval[newsize] = '\0';
3993 sv->ob_shash = -1; /* invalidate cached hash value */
3994 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003995}
3996
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003997void
3998PyBytes_Fini(void)
3999{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004000 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02004001 for (i = 0; i < UCHAR_MAX + 1; i++)
4002 Py_CLEAR(characters[i]);
4003 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004004}
4005
Benjamin Peterson4116f362008-05-27 00:36:20 +00004006/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004007
4008typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004009 PyObject_HEAD
4010 Py_ssize_t it_index;
4011 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004012} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004013
4014static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004015striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004016{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004017 _PyObject_GC_UNTRACK(it);
4018 Py_XDECREF(it->it_seq);
4019 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004020}
4021
4022static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004023striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004024{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004025 Py_VISIT(it->it_seq);
4026 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004027}
4028
4029static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004030striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004031{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004032 PyBytesObject *seq;
4033 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004034
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004035 assert(it != NULL);
4036 seq = it->it_seq;
4037 if (seq == NULL)
4038 return NULL;
4039 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004040
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004041 if (it->it_index < PyBytes_GET_SIZE(seq)) {
4042 item = PyLong_FromLong(
4043 (unsigned char)seq->ob_sval[it->it_index]);
4044 if (item != NULL)
4045 ++it->it_index;
4046 return item;
4047 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004048
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004049 Py_DECREF(seq);
4050 it->it_seq = NULL;
4051 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004052}
4053
4054static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004055striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004056{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004057 Py_ssize_t len = 0;
4058 if (it->it_seq)
4059 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
4060 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004061}
4062
4063PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004064 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004065
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004066static PyObject *
4067striter_reduce(striterobject *it)
4068{
4069 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02004070 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004071 it->it_seq, it->it_index);
4072 } else {
4073 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
4074 if (u == NULL)
4075 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02004076 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004077 }
4078}
4079
4080PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
4081
4082static PyObject *
4083striter_setstate(striterobject *it, PyObject *state)
4084{
4085 Py_ssize_t index = PyLong_AsSsize_t(state);
4086 if (index == -1 && PyErr_Occurred())
4087 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00004088 if (it->it_seq != NULL) {
4089 if (index < 0)
4090 index = 0;
4091 else if (index > PyBytes_GET_SIZE(it->it_seq))
4092 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
4093 it->it_index = index;
4094 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004095 Py_RETURN_NONE;
4096}
4097
4098PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
4099
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004100static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004101 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
4102 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004103 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
4104 reduce_doc},
4105 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
4106 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004107 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004108};
4109
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004110PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004111 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4112 "bytes_iterator", /* tp_name */
4113 sizeof(striterobject), /* tp_basicsize */
4114 0, /* tp_itemsize */
4115 /* methods */
4116 (destructor)striter_dealloc, /* tp_dealloc */
4117 0, /* tp_print */
4118 0, /* tp_getattr */
4119 0, /* tp_setattr */
4120 0, /* tp_reserved */
4121 0, /* tp_repr */
4122 0, /* tp_as_number */
4123 0, /* tp_as_sequence */
4124 0, /* tp_as_mapping */
4125 0, /* tp_hash */
4126 0, /* tp_call */
4127 0, /* tp_str */
4128 PyObject_GenericGetAttr, /* tp_getattro */
4129 0, /* tp_setattro */
4130 0, /* tp_as_buffer */
4131 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
4132 0, /* tp_doc */
4133 (traverseproc)striter_traverse, /* tp_traverse */
4134 0, /* tp_clear */
4135 0, /* tp_richcompare */
4136 0, /* tp_weaklistoffset */
4137 PyObject_SelfIter, /* tp_iter */
4138 (iternextfunc)striter_next, /* tp_iternext */
4139 striter_methods, /* tp_methods */
4140 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004141};
4142
4143static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00004144bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004145{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004146 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004147
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004148 if (!PyBytes_Check(seq)) {
4149 PyErr_BadInternalCall();
4150 return NULL;
4151 }
4152 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
4153 if (it == NULL)
4154 return NULL;
4155 it->it_index = 0;
4156 Py_INCREF(seq);
4157 it->it_seq = (PyBytesObject *)seq;
4158 _PyObject_GC_TRACK(it);
4159 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004160}