blob: 5a2d41c5a8f5ea2e6b0f26169610f0a2e993c371 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020010/*[clinic input]
Martin v. Löwis0efea322014-07-27 17:29:17 +020011class bytes "PyBytesObject*" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020012[clinic start generated code]*/
Martin v. Löwis0efea322014-07-27 17:29:17 +020013/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020014
Christian Heimes2c9c7a52008-05-26 13:42:13 +000015#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000016Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000017#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000018
Christian Heimes2c9c7a52008-05-26 13:42:13 +000019static PyBytesObject *characters[UCHAR_MAX + 1];
20static PyBytesObject *nullstring;
21
Mark Dickinsonfd24b322008-12-06 15:33:31 +000022/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
23 for a string of length n should request PyBytesObject_SIZE + n bytes.
24
25 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
26 3 bytes per string allocation on a typical system.
27*/
28#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
29
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000031 For PyBytes_FromString(), the parameter `str' points to a null-terminated
32 string containing exactly `size' bytes.
33
34 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000042 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043 alter the data yourself, since the strings may be shared.
44
45 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020047 allocated for string data, not counting the null terminating character.
48 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000049 PyBytes_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyBytes_FromString()).
51*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020052static PyObject *
53_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000054{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020055 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020056 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020057
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000058 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000059#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000060 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000061#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000062 Py_INCREF(op);
63 return (PyObject *)op;
64 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000065
Victor Stinner049e5092014-08-17 22:20:00 +020066 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 PyErr_SetString(PyExc_OverflowError,
68 "byte string is too large");
69 return NULL;
70 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020073 if (use_calloc)
74 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
75 else
76 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 if (op == NULL)
78 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010079 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020081 if (!use_calloc)
82 op->ob_sval[size] = '\0';
83 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 if (size == 0) {
85 nullstring = op;
86 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020087 }
88 return (PyObject *) op;
89}
90
91PyObject *
92PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
93{
94 PyBytesObject *op;
95 if (size < 0) {
96 PyErr_SetString(PyExc_SystemError,
97 "Negative size passed to PyBytes_FromStringAndSize");
98 return NULL;
99 }
100 if (size == 1 && str != NULL &&
101 (op = characters[*str & UCHAR_MAX]) != NULL)
102 {
103#ifdef COUNT_ALLOCS
104 one_strings++;
105#endif
106 Py_INCREF(op);
107 return (PyObject *)op;
108 }
109
110 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
111 if (op == NULL)
112 return NULL;
113 if (str == NULL)
114 return (PyObject *) op;
115
116 Py_MEMCPY(op->ob_sval, str, size);
117 /* share short strings */
118 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 characters[*str & UCHAR_MAX] = op;
120 Py_INCREF(op);
121 }
122 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000123}
124
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000125PyObject *
126PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000127{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200128 size_t size;
129 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000130
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000131 assert(str != NULL);
132 size = strlen(str);
133 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
134 PyErr_SetString(PyExc_OverflowError,
135 "byte string is too long");
136 return NULL;
137 }
138 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000139#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000140 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000141#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000142 Py_INCREF(op);
143 return (PyObject *)op;
144 }
145 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000146#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000148#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 Py_INCREF(op);
150 return (PyObject *)op;
151 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000152
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000153 /* Inline PyObject_NewVar */
154 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
155 if (op == NULL)
156 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100157 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000158 op->ob_shash = -1;
159 Py_MEMCPY(op->ob_sval, str, size+1);
160 /* share short strings */
161 if (size == 0) {
162 nullstring = op;
163 Py_INCREF(op);
164 } else if (size == 1) {
165 characters[*str & UCHAR_MAX] = op;
166 Py_INCREF(op);
167 }
168 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000169}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000170
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000171PyObject *
172PyBytes_FromFormatV(const char *format, va_list vargs)
173{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000174 va_list count;
175 Py_ssize_t n = 0;
176 const char* f;
177 char *s;
178 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000179
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000180 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 /* step 1: figure out how large a buffer we need */
182 for (f = format; *f; f++) {
183 if (*f == '%') {
184 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000185 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000186 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000187
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000188 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
189 * they don't affect the amount of space we reserve.
190 */
191 if ((*f == 'l' || *f == 'z') &&
192 (f[1] == 'd' || f[1] == 'u'))
193 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000194
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000195 switch (*f) {
196 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100197 {
198 int c = va_arg(count, int);
199 if (c < 0 || c > 255) {
200 PyErr_SetString(PyExc_OverflowError,
201 "PyBytes_FromFormatV(): %c format "
202 "expects an integer in range [0; 255]");
203 return NULL;
204 }
205 n++;
206 break;
207 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000208 case '%':
209 n++;
210 break;
211 case 'd': case 'u': case 'i': case 'x':
212 (void) va_arg(count, int);
213 /* 20 bytes is enough to hold a 64-bit
214 integer. Decimal takes the most space.
215 This isn't enough for octal. */
216 n += 20;
217 break;
218 case 's':
219 s = va_arg(count, char*);
220 n += strlen(s);
221 break;
222 case 'p':
223 (void) va_arg(count, int);
224 /* maximum 64-bit pointer representation:
225 * 0xffffffffffffffff
226 * so 19 characters is enough.
227 * XXX I count 18 -- what's the extra for?
228 */
229 n += 19;
230 break;
231 default:
232 /* if we stumble upon an unknown
233 formatting code, copy the rest of
234 the format string to the output
235 string. (we cannot just skip the
236 code, since there's no way to know
237 what's in the argument list) */
238 n += strlen(p);
239 goto expand;
240 }
241 } else
242 n++;
243 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000244 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000245 /* step 2: fill the buffer */
246 /* Since we've analyzed how much space we need for the worst case,
247 use sprintf directly instead of the slower PyOS_snprintf. */
248 string = PyBytes_FromStringAndSize(NULL, n);
249 if (!string)
250 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000251
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000253
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000254 for (f = format; *f; f++) {
255 if (*f == '%') {
256 const char* p = f++;
257 Py_ssize_t i;
258 int longflag = 0;
259 int size_tflag = 0;
260 /* parse the width.precision part (we're only
261 interested in the precision value, if any) */
262 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000263 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 n = (n*10) + *f++ - '0';
265 if (*f == '.') {
266 f++;
267 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000268 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000269 n = (n*10) + *f++ - '0';
270 }
David Malcolm96960882010-11-05 17:23:41 +0000271 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 f++;
273 /* handle the long flag, but only for %ld and %lu.
274 others can be added when necessary. */
275 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
276 longflag = 1;
277 ++f;
278 }
279 /* handle the size_t flag. */
280 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
281 size_tflag = 1;
282 ++f;
283 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000284
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000285 switch (*f) {
286 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100287 {
288 int c = va_arg(vargs, int);
289 /* c has been checked for overflow in the first step */
290 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000291 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100292 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000293 case 'd':
294 if (longflag)
295 sprintf(s, "%ld", va_arg(vargs, long));
296 else if (size_tflag)
297 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
298 va_arg(vargs, Py_ssize_t));
299 else
300 sprintf(s, "%d", va_arg(vargs, int));
301 s += strlen(s);
302 break;
303 case 'u':
304 if (longflag)
305 sprintf(s, "%lu",
306 va_arg(vargs, unsigned long));
307 else if (size_tflag)
308 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
309 va_arg(vargs, size_t));
310 else
311 sprintf(s, "%u",
312 va_arg(vargs, unsigned int));
313 s += strlen(s);
314 break;
315 case 'i':
316 sprintf(s, "%i", va_arg(vargs, int));
317 s += strlen(s);
318 break;
319 case 'x':
320 sprintf(s, "%x", va_arg(vargs, int));
321 s += strlen(s);
322 break;
323 case 's':
324 p = va_arg(vargs, char*);
325 i = strlen(p);
326 if (n > 0 && i > n)
327 i = n;
328 Py_MEMCPY(s, p, i);
329 s += i;
330 break;
331 case 'p':
332 sprintf(s, "%p", va_arg(vargs, void*));
333 /* %p is ill-defined: ensure leading 0x. */
334 if (s[1] == 'X')
335 s[1] = 'x';
336 else if (s[1] != 'x') {
337 memmove(s+2, s, strlen(s)+1);
338 s[0] = '0';
339 s[1] = 'x';
340 }
341 s += strlen(s);
342 break;
343 case '%':
344 *s++ = '%';
345 break;
346 default:
347 strcpy(s, p);
348 s += strlen(s);
349 goto end;
350 }
351 } else
352 *s++ = *f;
353 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000354
355 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000356 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
357 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000358}
359
360PyObject *
361PyBytes_FromFormat(const char *format, ...)
362{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000363 PyObject* ret;
364 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000365
366#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000369 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000370#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 ret = PyBytes_FromFormatV(format, vargs);
372 va_end(vargs);
373 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000374}
375
Ethan Furmanb95b5612015-01-23 20:05:18 -0800376/* Helpers for formatstring */
377
378Py_LOCAL_INLINE(PyObject *)
379getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
380{
381 Py_ssize_t argidx = *p_argidx;
382 if (argidx < arglen) {
383 (*p_argidx)++;
384 if (arglen < 0)
385 return args;
386 else
387 return PyTuple_GetItem(args, argidx);
388 }
389 PyErr_SetString(PyExc_TypeError,
390 "not enough arguments for format string");
391 return NULL;
392}
393
394/* Format codes
395 * F_LJUST '-'
396 * F_SIGN '+'
397 * F_BLANK ' '
398 * F_ALT '#'
399 * F_ZERO '0'
400 */
401#define F_LJUST (1<<0)
402#define F_SIGN (1<<1)
403#define F_BLANK (1<<2)
404#define F_ALT (1<<3)
405#define F_ZERO (1<<4)
406
407/* Returns a new reference to a PyBytes object, or NULL on failure. */
408
409static PyObject *
410formatfloat(PyObject *v, int flags, int prec, int type)
411{
412 char *p;
413 PyObject *result;
414 double x;
415
416 x = PyFloat_AsDouble(v);
417 if (x == -1.0 && PyErr_Occurred()) {
418 PyErr_Format(PyExc_TypeError, "float argument required, "
419 "not %.200s", Py_TYPE(v)->tp_name);
420 return NULL;
421 }
422
423 if (prec < 0)
424 prec = 6;
425
426 p = PyOS_double_to_string(x, type, prec,
427 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
428
429 if (p == NULL)
430 return NULL;
431 result = PyBytes_FromStringAndSize(p, strlen(p));
432 PyMem_Free(p);
433 return result;
434}
435
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300436static PyObject *
437formatlong(PyObject *v, int flags, int prec, int type)
438{
439 PyObject *result, *iobj;
440 if (type == 'i')
441 type = 'd';
442 if (PyLong_Check(v))
443 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
444 if (PyNumber_Check(v)) {
445 /* make sure number is a type of integer for o, x, and X */
446 if (type == 'o' || type == 'x' || type == 'X')
447 iobj = PyNumber_Index(v);
448 else
449 iobj = PyNumber_Long(v);
450 if (iobj == NULL) {
451 if (!PyErr_ExceptionMatches(PyExc_TypeError))
452 return NULL;
453 }
454 else if (!PyLong_Check(iobj))
455 Py_CLEAR(iobj);
456 if (iobj != NULL) {
457 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
458 Py_DECREF(iobj);
459 return result;
460 }
461 }
462 PyErr_Format(PyExc_TypeError,
463 "%%%c format: %s is required, not %.200s", type,
464 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
465 : "a number",
466 Py_TYPE(v)->tp_name);
467 return NULL;
468}
469
470static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200471byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800472{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200473 if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) {
474 *p = PyBytes_AS_STRING(arg)[0];
475 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800476 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200477 else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) {
478 *p = PyByteArray_AS_STRING(arg)[0];
479 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800480 }
481 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300482 PyObject *iobj;
483 long ival;
484 int overflow;
485 /* make sure number is a type of integer */
486 if (PyLong_Check(arg)) {
487 ival = PyLong_AsLongAndOverflow(arg, &overflow);
488 }
489 else {
490 iobj = PyNumber_Index(arg);
491 if (iobj == NULL) {
492 if (!PyErr_ExceptionMatches(PyExc_TypeError))
493 return 0;
494 goto onError;
495 }
496 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
497 Py_DECREF(iobj);
498 }
499 if (!overflow && 0 <= ival && ival <= 255) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200500 *p = (char)ival;
501 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800502 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800503 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300504 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200505 PyErr_SetString(PyExc_TypeError,
506 "%c requires an integer in range(256) or a single byte");
507 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800508}
509
510static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200511format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800512{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200513 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800514 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800515 /* is it a bytes object? */
516 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200517 *pbuf = PyBytes_AS_STRING(v);
518 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800519 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200520 return v;
521 }
522 if (PyByteArray_Check(v)) {
523 *pbuf = PyByteArray_AS_STRING(v);
524 *plen = PyByteArray_GET_SIZE(v);
525 Py_INCREF(v);
526 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800527 }
528 /* does it support __bytes__? */
529 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
530 if (func != NULL) {
531 result = PyObject_CallFunctionObjArgs(func, NULL);
532 Py_DECREF(func);
533 if (result == NULL)
534 return NULL;
535 if (!PyBytes_Check(result)) {
536 PyErr_Format(PyExc_TypeError,
537 "__bytes__ returned non-bytes (type %.200s)",
538 Py_TYPE(result)->tp_name);
539 Py_DECREF(result);
540 return NULL;
541 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200542 *pbuf = PyBytes_AS_STRING(result);
543 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800544 return result;
545 }
546 PyErr_Format(PyExc_TypeError,
547 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
548 Py_TYPE(v)->tp_name);
549 return NULL;
550}
551
552/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
553
554 FORMATBUFLEN is the length of the buffer in which the ints &
555 chars are formatted. XXX This is a magic number. Each formatting
556 routine does bounds checking to ensure no overflow, but a better
557 solution may be to malloc a buffer of appropriate size for each
558 format. For now, the current solution is sufficient.
559*/
560#define FORMATBUFLEN (size_t)120
561
562PyObject *
563_PyBytes_Format(PyObject *format, PyObject *args)
564{
565 char *fmt, *res;
566 Py_ssize_t arglen, argidx;
567 Py_ssize_t reslen, rescnt, fmtcnt;
568 int args_owned = 0;
569 PyObject *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800570 PyObject *dict = NULL;
571 if (format == NULL || !PyBytes_Check(format) || args == NULL) {
572 PyErr_BadInternalCall();
573 return NULL;
574 }
575 fmt = PyBytes_AS_STRING(format);
576 fmtcnt = PyBytes_GET_SIZE(format);
577 reslen = rescnt = fmtcnt + 100;
578 result = PyBytes_FromStringAndSize((char *)NULL, reslen);
579 if (result == NULL)
580 return NULL;
581 res = PyBytes_AsString(result);
582 if (PyTuple_Check(args)) {
583 arglen = PyTuple_GET_SIZE(args);
584 argidx = 0;
585 }
586 else {
587 arglen = -1;
588 argidx = -2;
589 }
590 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
591 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
592 !PyByteArray_Check(args)) {
593 dict = args;
594 }
595 while (--fmtcnt >= 0) {
596 if (*fmt != '%') {
597 if (--rescnt < 0) {
598 rescnt = fmtcnt + 100;
599 reslen += rescnt;
600 if (_PyBytes_Resize(&result, reslen))
601 return NULL;
602 res = PyBytes_AS_STRING(result)
603 + reslen - rescnt;
604 --rescnt;
605 }
606 *res++ = *fmt++;
607 }
608 else {
609 /* Got a format specifier */
610 int flags = 0;
611 Py_ssize_t width = -1;
612 int prec = -1;
613 int c = '\0';
614 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800615 PyObject *v = NULL;
616 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200617 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800618 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200619 Py_ssize_t len = 0;
620 char onechar; /* For byte_converter() */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800621
Ethan Furmanb95b5612015-01-23 20:05:18 -0800622 fmt++;
623 if (*fmt == '(') {
624 char *keystart;
625 Py_ssize_t keylen;
626 PyObject *key;
627 int pcount = 1;
628
629 if (dict == NULL) {
630 PyErr_SetString(PyExc_TypeError,
631 "format requires a mapping");
632 goto error;
633 }
634 ++fmt;
635 --fmtcnt;
636 keystart = fmt;
637 /* Skip over balanced parentheses */
638 while (pcount > 0 && --fmtcnt >= 0) {
639 if (*fmt == ')')
640 --pcount;
641 else if (*fmt == '(')
642 ++pcount;
643 fmt++;
644 }
645 keylen = fmt - keystart - 1;
646 if (fmtcnt < 0 || pcount > 0) {
647 PyErr_SetString(PyExc_ValueError,
648 "incomplete format key");
649 goto error;
650 }
651 key = PyBytes_FromStringAndSize(keystart,
652 keylen);
653 if (key == NULL)
654 goto error;
655 if (args_owned) {
656 Py_DECREF(args);
657 args_owned = 0;
658 }
659 args = PyObject_GetItem(dict, key);
660 Py_DECREF(key);
661 if (args == NULL) {
662 goto error;
663 }
664 args_owned = 1;
665 arglen = -1;
666 argidx = -2;
667 }
668 while (--fmtcnt >= 0) {
669 switch (c = *fmt++) {
670 case '-': flags |= F_LJUST; continue;
671 case '+': flags |= F_SIGN; continue;
672 case ' ': flags |= F_BLANK; continue;
673 case '#': flags |= F_ALT; continue;
674 case '0': flags |= F_ZERO; continue;
675 }
676 break;
677 }
678 if (c == '*') {
679 v = getnextarg(args, arglen, &argidx);
680 if (v == NULL)
681 goto error;
682 if (!PyLong_Check(v)) {
683 PyErr_SetString(PyExc_TypeError,
684 "* wants int");
685 goto error;
686 }
687 width = PyLong_AsSsize_t(v);
688 if (width == -1 && PyErr_Occurred())
689 goto error;
690 if (width < 0) {
691 flags |= F_LJUST;
692 width = -width;
693 }
694 if (--fmtcnt >= 0)
695 c = *fmt++;
696 }
697 else if (c >= 0 && isdigit(c)) {
698 width = c - '0';
699 while (--fmtcnt >= 0) {
700 c = Py_CHARMASK(*fmt++);
701 if (!isdigit(c))
702 break;
703 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
704 PyErr_SetString(
705 PyExc_ValueError,
706 "width too big");
707 goto error;
708 }
709 width = width*10 + (c - '0');
710 }
711 }
712 if (c == '.') {
713 prec = 0;
714 if (--fmtcnt >= 0)
715 c = *fmt++;
716 if (c == '*') {
717 v = getnextarg(args, arglen, &argidx);
718 if (v == NULL)
719 goto error;
720 if (!PyLong_Check(v)) {
721 PyErr_SetString(
722 PyExc_TypeError,
723 "* wants int");
724 goto error;
725 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200726 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800727 if (prec == -1 && PyErr_Occurred())
728 goto error;
729 if (prec < 0)
730 prec = 0;
731 if (--fmtcnt >= 0)
732 c = *fmt++;
733 }
734 else if (c >= 0 && isdigit(c)) {
735 prec = c - '0';
736 while (--fmtcnt >= 0) {
737 c = Py_CHARMASK(*fmt++);
738 if (!isdigit(c))
739 break;
740 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
741 PyErr_SetString(
742 PyExc_ValueError,
743 "prec too big");
744 goto error;
745 }
746 prec = prec*10 + (c - '0');
747 }
748 }
749 } /* prec */
750 if (fmtcnt >= 0) {
751 if (c == 'h' || c == 'l' || c == 'L') {
752 if (--fmtcnt >= 0)
753 c = *fmt++;
754 }
755 }
756 if (fmtcnt < 0) {
757 PyErr_SetString(PyExc_ValueError,
758 "incomplete format");
759 goto error;
760 }
761 if (c != '%') {
762 v = getnextarg(args, arglen, &argidx);
763 if (v == NULL)
764 goto error;
765 }
766 sign = 0;
767 fill = ' ';
768 switch (c) {
769 case '%':
770 pbuf = "%";
771 len = 1;
772 break;
Ethan Furman62e977f2015-03-11 08:17:00 -0700773 case 'r':
774 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800775 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200776 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800777 if (temp == NULL)
778 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200779 assert(PyUnicode_IS_ASCII(temp));
780 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
781 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800782 if (prec >= 0 && len > prec)
783 len = prec;
784 break;
785 case 's':
786 // %s is only for 2/3 code; 3 only code should use %b
787 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200788 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800789 if (temp == NULL)
790 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800791 if (prec >= 0 && len > prec)
792 len = prec;
793 break;
794 case 'i':
795 case 'd':
796 case 'u':
797 case 'o':
798 case 'x':
799 case 'X':
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300800 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200801 if (!temp)
802 goto error;
803 assert(PyUnicode_IS_ASCII(temp));
804 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
805 len = PyUnicode_GET_LENGTH(temp);
806 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800807 if (flags & F_ZERO)
808 fill = '0';
809 break;
810 case 'e':
811 case 'E':
812 case 'f':
813 case 'F':
814 case 'g':
815 case 'G':
816 temp = formatfloat(v, flags, prec, c);
817 if (temp == NULL)
818 goto error;
819 pbuf = PyBytes_AS_STRING(temp);
820 len = PyBytes_GET_SIZE(temp);
821 sign = 1;
822 if (flags & F_ZERO)
823 fill = '0';
824 break;
825 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200826 pbuf = &onechar;
827 len = byte_converter(v, &onechar);
828 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800829 goto error;
830 break;
831 default:
832 PyErr_Format(PyExc_ValueError,
833 "unsupported format character '%c' (0x%x) "
834 "at index %zd",
835 c, c,
836 (Py_ssize_t)(fmt - 1 -
837 PyBytes_AsString(format)));
838 goto error;
839 }
840 if (sign) {
841 if (*pbuf == '-' || *pbuf == '+') {
842 sign = *pbuf++;
843 len--;
844 }
845 else if (flags & F_SIGN)
846 sign = '+';
847 else if (flags & F_BLANK)
848 sign = ' ';
849 else
850 sign = 0;
851 }
852 if (width < len)
853 width = len;
854 if (rescnt - (sign != 0) < width) {
855 reslen -= rescnt;
856 rescnt = width + fmtcnt + 100;
857 reslen += rescnt;
858 if (reslen < 0) {
859 Py_DECREF(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800860 Py_XDECREF(temp);
861 return PyErr_NoMemory();
862 }
863 if (_PyBytes_Resize(&result, reslen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800864 Py_XDECREF(temp);
865 return NULL;
866 }
867 res = PyBytes_AS_STRING(result)
868 + reslen - rescnt;
869 }
870 if (sign) {
871 if (fill != ' ')
872 *res++ = sign;
873 rescnt--;
874 if (width > len)
875 width--;
876 }
877 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
878 assert(pbuf[0] == '0');
879 assert(pbuf[1] == c);
880 if (fill != ' ') {
881 *res++ = *pbuf++;
882 *res++ = *pbuf++;
883 }
884 rescnt -= 2;
885 width -= 2;
886 if (width < 0)
887 width = 0;
888 len -= 2;
889 }
890 if (width > len && !(flags & F_LJUST)) {
891 do {
892 --rescnt;
893 *res++ = fill;
894 } while (--width > len);
895 }
896 if (fill == ' ') {
897 if (sign)
898 *res++ = sign;
899 if ((flags & F_ALT) &&
900 (c == 'x' || c == 'X')) {
901 assert(pbuf[0] == '0');
902 assert(pbuf[1] == c);
903 *res++ = *pbuf++;
904 *res++ = *pbuf++;
905 }
906 }
907 Py_MEMCPY(res, pbuf, len);
908 res += len;
909 rescnt -= len;
910 while (--width >= len) {
911 --rescnt;
912 *res++ = ' ';
913 }
914 if (dict && (argidx < arglen) && c != '%') {
915 PyErr_SetString(PyExc_TypeError,
916 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -0800917 Py_XDECREF(temp);
918 goto error;
919 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800920 Py_XDECREF(temp);
921 } /* '%' */
922 } /* until end */
923 if (argidx < arglen && !dict) {
924 PyErr_SetString(PyExc_TypeError,
925 "not all arguments converted during bytes formatting");
926 goto error;
927 }
928 if (args_owned) {
929 Py_DECREF(args);
930 }
931 if (_PyBytes_Resize(&result, reslen - rescnt))
932 return NULL;
933 return result;
934
935 error:
936 Py_DECREF(result);
937 if (args_owned) {
938 Py_DECREF(args);
939 }
940 return NULL;
941}
942
943/* =-= */
944
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000945static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000946bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000947{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000948 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000949}
950
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000951/* Unescape a backslash-escaped string. If unicode is non-zero,
952 the string is a u-literal. If recode_encoding is non-zero,
953 the string is UTF-8 encoded and should be re-encoded in the
954 specified encoding. */
955
956PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000957 Py_ssize_t len,
958 const char *errors,
959 Py_ssize_t unicode,
960 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000961{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000962 int c;
963 char *p, *buf;
964 const char *end;
965 PyObject *v;
966 Py_ssize_t newlen = recode_encoding ? 4*len:len;
967 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
968 if (v == NULL)
969 return NULL;
970 p = buf = PyBytes_AsString(v);
971 end = s + len;
972 while (s < end) {
973 if (*s != '\\') {
974 non_esc:
975 if (recode_encoding && (*s & 0x80)) {
976 PyObject *u, *w;
977 char *r;
978 const char* t;
979 Py_ssize_t rn;
980 t = s;
981 /* Decode non-ASCII bytes as UTF-8. */
982 while (t < end && (*t & 0x80)) t++;
983 u = PyUnicode_DecodeUTF8(s, t - s, errors);
984 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000985
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000986 /* Recode them in target encoding. */
987 w = PyUnicode_AsEncodedString(
988 u, recode_encoding, errors);
989 Py_DECREF(u);
990 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000991
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000992 /* Append bytes to output buffer. */
993 assert(PyBytes_Check(w));
994 r = PyBytes_AS_STRING(w);
995 rn = PyBytes_GET_SIZE(w);
996 Py_MEMCPY(p, r, rn);
997 p += rn;
998 Py_DECREF(w);
999 s = t;
1000 } else {
1001 *p++ = *s++;
1002 }
1003 continue;
1004 }
1005 s++;
1006 if (s==end) {
1007 PyErr_SetString(PyExc_ValueError,
1008 "Trailing \\ in string");
1009 goto failed;
1010 }
1011 switch (*s++) {
1012 /* XXX This assumes ASCII! */
1013 case '\n': break;
1014 case '\\': *p++ = '\\'; break;
1015 case '\'': *p++ = '\''; break;
1016 case '\"': *p++ = '\"'; break;
1017 case 'b': *p++ = '\b'; break;
1018 case 'f': *p++ = '\014'; break; /* FF */
1019 case 't': *p++ = '\t'; break;
1020 case 'n': *p++ = '\n'; break;
1021 case 'r': *p++ = '\r'; break;
1022 case 'v': *p++ = '\013'; break; /* VT */
1023 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1024 case '0': case '1': case '2': case '3':
1025 case '4': case '5': case '6': case '7':
1026 c = s[-1] - '0';
1027 if (s < end && '0' <= *s && *s <= '7') {
1028 c = (c<<3) + *s++ - '0';
1029 if (s < end && '0' <= *s && *s <= '7')
1030 c = (c<<3) + *s++ - '0';
1031 }
1032 *p++ = c;
1033 break;
1034 case 'x':
David Malcolm96960882010-11-05 17:23:41 +00001035 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001036 unsigned int x = 0;
1037 c = Py_CHARMASK(*s);
1038 s++;
David Malcolm96960882010-11-05 17:23:41 +00001039 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001040 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001041 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001042 x = 10 + c - 'a';
1043 else
1044 x = 10 + c - 'A';
1045 x = x << 4;
1046 c = Py_CHARMASK(*s);
1047 s++;
David Malcolm96960882010-11-05 17:23:41 +00001048 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001049 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001050 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001051 x += 10 + c - 'a';
1052 else
1053 x += 10 + c - 'A';
1054 *p++ = x;
1055 break;
1056 }
1057 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001058 PyErr_Format(PyExc_ValueError,
1059 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001060 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001061 goto failed;
1062 }
1063 if (strcmp(errors, "replace") == 0) {
1064 *p++ = '?';
1065 } else if (strcmp(errors, "ignore") == 0)
1066 /* do nothing */;
1067 else {
1068 PyErr_Format(PyExc_ValueError,
1069 "decoding error; unknown "
1070 "error handling code: %.400s",
1071 errors);
1072 goto failed;
1073 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001074 /* skip \x */
1075 if (s < end && Py_ISXDIGIT(s[0]))
1076 s++; /* and a hexdigit */
1077 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001078 default:
1079 *p++ = '\\';
1080 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001081 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001082 UTF-8 bytes may follow. */
1083 }
1084 }
1085 if (p-buf < newlen)
1086 _PyBytes_Resize(&v, p - buf);
1087 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001088 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001089 Py_DECREF(v);
1090 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001091}
1092
1093/* -------------------------------------------------------------------- */
1094/* object api */
1095
1096Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001097PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001098{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001099 if (!PyBytes_Check(op)) {
1100 PyErr_Format(PyExc_TypeError,
1101 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1102 return -1;
1103 }
1104 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001105}
1106
1107char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001108PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001109{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001110 if (!PyBytes_Check(op)) {
1111 PyErr_Format(PyExc_TypeError,
1112 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1113 return NULL;
1114 }
1115 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001116}
1117
1118int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001119PyBytes_AsStringAndSize(PyObject *obj,
1120 char **s,
1121 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001122{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001123 if (s == NULL) {
1124 PyErr_BadInternalCall();
1125 return -1;
1126 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001127
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001128 if (!PyBytes_Check(obj)) {
1129 PyErr_Format(PyExc_TypeError,
1130 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1131 return -1;
1132 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001134 *s = PyBytes_AS_STRING(obj);
1135 if (len != NULL)
1136 *len = PyBytes_GET_SIZE(obj);
1137 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001138 PyErr_SetString(PyExc_ValueError,
1139 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001140 return -1;
1141 }
1142 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001143}
Neal Norwitz6968b052007-02-27 19:02:19 +00001144
1145/* -------------------------------------------------------------------- */
1146/* Methods */
1147
Eric Smith0923d1d2009-04-16 20:16:10 +00001148#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001149
1150#include "stringlib/fastsearch.h"
1151#include "stringlib/count.h"
1152#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001153#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001154#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001155#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001156#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001157
Eric Smith0f78bff2009-11-30 01:01:42 +00001158#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001159
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001160PyObject *
1161PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001162{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001163 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001164 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001165 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001166 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001167 unsigned char quote, *s, *p;
1168
1169 /* Compute size of output string */
1170 squotes = dquotes = 0;
1171 newsize = 3; /* b'' */
1172 s = (unsigned char*)op->ob_sval;
1173 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001174 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001175 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001176 case '\'': squotes++; break;
1177 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001178 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001179 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001180 default:
1181 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001182 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001183 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001184 if (newsize > PY_SSIZE_T_MAX - incr)
1185 goto overflow;
1186 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001187 }
1188 quote = '\'';
1189 if (smartquotes && squotes && !dquotes)
1190 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001191 if (squotes && quote == '\'') {
1192 if (newsize > PY_SSIZE_T_MAX - squotes)
1193 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001194 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001195 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001196
1197 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001198 if (v == NULL) {
1199 return NULL;
1200 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001201 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001202
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001203 *p++ = 'b', *p++ = quote;
1204 for (i = 0; i < length; i++) {
1205 unsigned char c = op->ob_sval[i];
1206 if (c == quote || c == '\\')
1207 *p++ = '\\', *p++ = c;
1208 else if (c == '\t')
1209 *p++ = '\\', *p++ = 't';
1210 else if (c == '\n')
1211 *p++ = '\\', *p++ = 'n';
1212 else if (c == '\r')
1213 *p++ = '\\', *p++ = 'r';
1214 else if (c < ' ' || c >= 0x7f) {
1215 *p++ = '\\';
1216 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001217 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1218 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001219 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001220 else
1221 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001222 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001223 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001224 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001225 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001226
1227 overflow:
1228 PyErr_SetString(PyExc_OverflowError,
1229 "bytes object is too large to make repr");
1230 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001231}
1232
Neal Norwitz6968b052007-02-27 19:02:19 +00001233static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001234bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001235{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001236 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001237}
1238
Neal Norwitz6968b052007-02-27 19:02:19 +00001239static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001240bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001241{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001242 if (Py_BytesWarningFlag) {
1243 if (PyErr_WarnEx(PyExc_BytesWarning,
1244 "str() on a bytes instance", 1))
1245 return NULL;
1246 }
1247 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001248}
1249
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001250static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001251bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001252{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001253 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001254}
Neal Norwitz6968b052007-02-27 19:02:19 +00001255
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001256/* This is also used by PyBytes_Concat() */
1257static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001258bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001259{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001260 Py_ssize_t size;
1261 Py_buffer va, vb;
1262 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001263
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001264 va.len = -1;
1265 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001266 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1267 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001268 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1269 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1270 goto done;
1271 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001272
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001273 /* Optimize end cases */
1274 if (va.len == 0 && PyBytes_CheckExact(b)) {
1275 result = b;
1276 Py_INCREF(result);
1277 goto done;
1278 }
1279 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1280 result = a;
1281 Py_INCREF(result);
1282 goto done;
1283 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001284
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001285 size = va.len + vb.len;
1286 if (size < 0) {
1287 PyErr_NoMemory();
1288 goto done;
1289 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001290
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001291 result = PyBytes_FromStringAndSize(NULL, size);
1292 if (result != NULL) {
1293 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1294 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1295 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001296
1297 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001298 if (va.len != -1)
1299 PyBuffer_Release(&va);
1300 if (vb.len != -1)
1301 PyBuffer_Release(&vb);
1302 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001303}
Neal Norwitz6968b052007-02-27 19:02:19 +00001304
1305static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001306bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001307{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001308 Py_ssize_t i;
1309 Py_ssize_t j;
1310 Py_ssize_t size;
1311 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001312 size_t nbytes;
1313 if (n < 0)
1314 n = 0;
1315 /* watch out for overflows: the size can overflow int,
1316 * and the # of bytes needed can overflow size_t
1317 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001318 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001319 PyErr_SetString(PyExc_OverflowError,
1320 "repeated bytes are too long");
1321 return NULL;
1322 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001323 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001324 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1325 Py_INCREF(a);
1326 return (PyObject *)a;
1327 }
1328 nbytes = (size_t)size;
1329 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1330 PyErr_SetString(PyExc_OverflowError,
1331 "repeated bytes are too long");
1332 return NULL;
1333 }
1334 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1335 if (op == NULL)
1336 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001337 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001338 op->ob_shash = -1;
1339 op->ob_sval[size] = '\0';
1340 if (Py_SIZE(a) == 1 && n > 0) {
1341 memset(op->ob_sval, a->ob_sval[0] , n);
1342 return (PyObject *) op;
1343 }
1344 i = 0;
1345 if (i < size) {
1346 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1347 i = Py_SIZE(a);
1348 }
1349 while (i < size) {
1350 j = (i <= size-i) ? i : size-i;
1351 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1352 i += j;
1353 }
1354 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001355}
1356
Guido van Rossum98297ee2007-11-06 21:34:58 +00001357static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001358bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001359{
1360 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1361 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001362 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +00001363 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +00001364 PyErr_Clear();
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001365 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
Antoine Pitroud1188562010-06-09 16:38:55 +00001366 return -1;
1367 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
1368 varg.buf, varg.len, 0);
1369 PyBuffer_Release(&varg);
1370 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001371 }
1372 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001373 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1374 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001375 }
1376
Antoine Pitrou0010d372010-08-15 17:12:55 +00001377 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001378}
1379
Neal Norwitz6968b052007-02-27 19:02:19 +00001380static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001381bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001382{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001383 if (i < 0 || i >= Py_SIZE(a)) {
1384 PyErr_SetString(PyExc_IndexError, "index out of range");
1385 return NULL;
1386 }
1387 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001388}
1389
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001390Py_LOCAL(int)
1391bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1392{
1393 int cmp;
1394 Py_ssize_t len;
1395
1396 len = Py_SIZE(a);
1397 if (Py_SIZE(b) != len)
1398 return 0;
1399
1400 if (a->ob_sval[0] != b->ob_sval[0])
1401 return 0;
1402
1403 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1404 return (cmp == 0);
1405}
1406
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001407static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001408bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001409{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001410 int c;
1411 Py_ssize_t len_a, len_b;
1412 Py_ssize_t min_len;
1413 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001414
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 /* Make sure both arguments are strings. */
1416 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001417 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
1418 if (PyObject_IsInstance((PyObject*)a,
1419 (PyObject*)&PyUnicode_Type) ||
1420 PyObject_IsInstance((PyObject*)b,
1421 (PyObject*)&PyUnicode_Type)) {
1422 if (PyErr_WarnEx(PyExc_BytesWarning,
1423 "Comparison between bytes and string", 1))
1424 return NULL;
1425 }
1426 else if (PyObject_IsInstance((PyObject*)a,
1427 (PyObject*)&PyLong_Type) ||
1428 PyObject_IsInstance((PyObject*)b,
1429 (PyObject*)&PyLong_Type)) {
1430 if (PyErr_WarnEx(PyExc_BytesWarning,
1431 "Comparison between bytes and int", 1))
1432 return NULL;
1433 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001434 }
1435 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001436 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001437 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001438 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001439 case Py_EQ:
1440 case Py_LE:
1441 case Py_GE:
1442 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001443 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001444 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001445 case Py_NE:
1446 case Py_LT:
1447 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001448 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001449 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001450 default:
1451 PyErr_BadArgument();
1452 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001453 }
1454 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001455 else if (op == Py_EQ || op == Py_NE) {
1456 int eq = bytes_compare_eq(a, b);
1457 eq ^= (op == Py_NE);
1458 result = eq ? Py_True : Py_False;
1459 }
1460 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001461 len_a = Py_SIZE(a);
1462 len_b = Py_SIZE(b);
1463 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001464 if (min_len > 0) {
1465 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001466 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001467 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001468 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001469 else
1470 c = 0;
1471 if (c == 0)
1472 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1473 switch (op) {
1474 case Py_LT: c = c < 0; break;
1475 case Py_LE: c = c <= 0; break;
1476 case Py_GT: c = c > 0; break;
1477 case Py_GE: c = c >= 0; break;
1478 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001479 PyErr_BadArgument();
1480 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001481 }
1482 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001483 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001484
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001485 Py_INCREF(result);
1486 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001487}
1488
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001489static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001490bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001491{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001492 if (a->ob_shash == -1) {
1493 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001494 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001495 }
1496 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001497}
1498
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001499static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001500bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001501{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001502 if (PyIndex_Check(item)) {
1503 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1504 if (i == -1 && PyErr_Occurred())
1505 return NULL;
1506 if (i < 0)
1507 i += PyBytes_GET_SIZE(self);
1508 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1509 PyErr_SetString(PyExc_IndexError,
1510 "index out of range");
1511 return NULL;
1512 }
1513 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1514 }
1515 else if (PySlice_Check(item)) {
1516 Py_ssize_t start, stop, step, slicelength, cur, i;
1517 char* source_buf;
1518 char* result_buf;
1519 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001520
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001521 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001522 PyBytes_GET_SIZE(self),
1523 &start, &stop, &step, &slicelength) < 0) {
1524 return NULL;
1525 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001526
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001527 if (slicelength <= 0) {
1528 return PyBytes_FromStringAndSize("", 0);
1529 }
1530 else if (start == 0 && step == 1 &&
1531 slicelength == PyBytes_GET_SIZE(self) &&
1532 PyBytes_CheckExact(self)) {
1533 Py_INCREF(self);
1534 return (PyObject *)self;
1535 }
1536 else if (step == 1) {
1537 return PyBytes_FromStringAndSize(
1538 PyBytes_AS_STRING(self) + start,
1539 slicelength);
1540 }
1541 else {
1542 source_buf = PyBytes_AS_STRING(self);
1543 result = PyBytes_FromStringAndSize(NULL, slicelength);
1544 if (result == NULL)
1545 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001546
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001547 result_buf = PyBytes_AS_STRING(result);
1548 for (cur = start, i = 0; i < slicelength;
1549 cur += step, i++) {
1550 result_buf[i] = source_buf[cur];
1551 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001552
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001553 return result;
1554 }
1555 }
1556 else {
1557 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001558 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001559 Py_TYPE(item)->tp_name);
1560 return NULL;
1561 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001562}
1563
1564static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001565bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001566{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001567 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1568 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001569}
1570
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001571static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001572 (lenfunc)bytes_length, /*sq_length*/
1573 (binaryfunc)bytes_concat, /*sq_concat*/
1574 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1575 (ssizeargfunc)bytes_item, /*sq_item*/
1576 0, /*sq_slice*/
1577 0, /*sq_ass_item*/
1578 0, /*sq_ass_slice*/
1579 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001580};
1581
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001582static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001583 (lenfunc)bytes_length,
1584 (binaryfunc)bytes_subscript,
1585 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001586};
1587
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001588static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001589 (getbufferproc)bytes_buffer_getbuffer,
1590 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001591};
1592
1593
1594#define LEFTSTRIP 0
1595#define RIGHTSTRIP 1
1596#define BOTHSTRIP 2
1597
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001598/*[clinic input]
1599bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001600
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001601 sep: object = None
1602 The delimiter according which to split the bytes.
1603 None (the default value) means split on ASCII whitespace characters
1604 (space, tab, return, newline, formfeed, vertical tab).
1605 maxsplit: Py_ssize_t = -1
1606 Maximum number of splits to do.
1607 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001608
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001609Return a list of the sections in the bytes, using sep as the delimiter.
1610[clinic start generated code]*/
1611
1612PyDoc_STRVAR(bytes_split__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001613"split($self, /, sep=None, maxsplit=-1)\n"
1614"--\n"
1615"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001616"Return a list of the sections in the bytes, using sep as the delimiter.\n"
1617"\n"
1618" sep\n"
1619" The delimiter according which to split the bytes.\n"
1620" None (the default value) means split on ASCII whitespace characters\n"
1621" (space, tab, return, newline, formfeed, vertical tab).\n"
1622" maxsplit\n"
1623" Maximum number of splits to do.\n"
1624" -1 (the default value) means no limit.");
1625
1626#define BYTES_SPLIT_METHODDEF \
1627 {"split", (PyCFunction)bytes_split, METH_VARARGS|METH_KEYWORDS, bytes_split__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001628
1629static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001630bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001631
1632static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001633bytes_split(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Neal Norwitz6968b052007-02-27 19:02:19 +00001634{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001635 PyObject *return_value = NULL;
1636 static char *_keywords[] = {"sep", "maxsplit", NULL};
1637 PyObject *sep = Py_None;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001638 Py_ssize_t maxsplit = -1;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001639
1640 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
1641 "|On:split", _keywords,
1642 &sep, &maxsplit))
1643 goto exit;
1644 return_value = bytes_split_impl(self, sep, maxsplit);
1645
1646exit:
1647 return return_value;
1648}
1649
1650static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001651bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
1652/*[clinic end generated code: output=c80a47afdd505975 input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001653{
1654 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001655 const char *s = PyBytes_AS_STRING(self), *sub;
1656 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001657 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001658
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001659 if (maxsplit < 0)
1660 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001661 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001662 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001663 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001664 return NULL;
1665 sub = vsub.buf;
1666 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001667
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001668 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1669 PyBuffer_Release(&vsub);
1670 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001671}
1672
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001673/*[clinic input]
1674bytes.partition
1675
1676 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001677 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001678 /
1679
1680Partition the bytes into three parts using the given separator.
1681
1682This will search for the separator sep in the bytes. If the separator is found,
1683returns a 3-tuple containing the part before the separator, the separator
1684itself, and the part after it.
1685
1686If the separator is not found, returns a 3-tuple containing the original bytes
1687object and two empty bytes objects.
1688[clinic start generated code]*/
1689
1690PyDoc_STRVAR(bytes_partition__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001691"partition($self, sep, /)\n"
1692"--\n"
1693"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001694"Partition the bytes into three parts using the given separator.\n"
1695"\n"
1696"This will search for the separator sep in the bytes. If the separator is found,\n"
1697"returns a 3-tuple containing the part before the separator, the separator\n"
1698"itself, and the part after it.\n"
1699"\n"
1700"If the separator is not found, returns a 3-tuple containing the original bytes\n"
1701"object and two empty bytes objects.");
1702
1703#define BYTES_PARTITION_METHODDEF \
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001704 {"partition", (PyCFunction)bytes_partition, METH_VARARGS, bytes_partition__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001705
1706static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001707bytes_partition_impl(PyBytesObject *self, Py_buffer *sep);
1708
1709static PyObject *
1710bytes_partition(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001711{
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001712 PyObject *return_value = NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001713 Py_buffer sep = {NULL, NULL};
Neal Norwitz6968b052007-02-27 19:02:19 +00001714
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001715 if (!PyArg_ParseTuple(args,
1716 "y*:partition",
1717 &sep))
1718 goto exit;
1719 return_value = bytes_partition_impl(self, &sep);
Neal Norwitz6968b052007-02-27 19:02:19 +00001720
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001721exit:
1722 /* Cleanup for sep */
1723 if (sep.obj)
1724 PyBuffer_Release(&sep);
1725
1726 return return_value;
Neal Norwitz6968b052007-02-27 19:02:19 +00001727}
1728
Neal Norwitz6968b052007-02-27 19:02:19 +00001729static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001730bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1731/*[clinic end generated code: output=3006727cfbf83aa4 input=bc855dc63ca949de]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001732{
Neal Norwitz6968b052007-02-27 19:02:19 +00001733 return stringlib_partition(
1734 (PyObject*) self,
1735 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001736 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001737 );
1738}
1739
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001740/*[clinic input]
1741bytes.rpartition
1742
1743 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001744 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001745 /
1746
1747Partition the bytes into three parts using the given separator.
1748
1749This will search for the separator sep in the bytes, starting and the end. If
1750the separator is found, returns a 3-tuple containing the part before the
1751separator, the separator itself, and the part after it.
1752
1753If the separator is not found, returns a 3-tuple containing two empty bytes
1754objects and the original bytes object.
1755[clinic start generated code]*/
1756
1757PyDoc_STRVAR(bytes_rpartition__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001758"rpartition($self, sep, /)\n"
1759"--\n"
1760"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001761"Partition the bytes into three parts using the given separator.\n"
1762"\n"
1763"This will search for the separator sep in the bytes, starting and the end. If\n"
1764"the separator is found, returns a 3-tuple containing the part before the\n"
1765"separator, the separator itself, and the part after it.\n"
1766"\n"
1767"If the separator is not found, returns a 3-tuple containing two empty bytes\n"
1768"objects and the original bytes object.");
1769
1770#define BYTES_RPARTITION_METHODDEF \
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001771 {"rpartition", (PyCFunction)bytes_rpartition, METH_VARARGS, bytes_rpartition__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001772
1773static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001774bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep);
1775
1776static PyObject *
1777bytes_rpartition(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001778{
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001779 PyObject *return_value = NULL;
1780 Py_buffer sep = {NULL, NULL};
Neal Norwitz6968b052007-02-27 19:02:19 +00001781
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001782 if (!PyArg_ParseTuple(args,
1783 "y*:rpartition",
1784 &sep))
1785 goto exit;
1786 return_value = bytes_rpartition_impl(self, &sep);
Neal Norwitz6968b052007-02-27 19:02:19 +00001787
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001788exit:
1789 /* Cleanup for sep */
1790 if (sep.obj)
1791 PyBuffer_Release(&sep);
1792
1793 return return_value;
1794}
1795
1796static PyObject *
1797bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1798/*[clinic end generated code: output=57b169dc47fa90e8 input=6588fff262a9170e]*/
1799{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001800 return stringlib_rpartition(
1801 (PyObject*) self,
1802 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001803 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001804 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001805}
1806
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001807/*[clinic input]
1808bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001809
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001810Return a list of the sections in the bytes, using sep as the delimiter.
1811
1812Splitting is done starting at the end of the bytes and working to the front.
1813[clinic start generated code]*/
1814
1815PyDoc_STRVAR(bytes_rsplit__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001816"rsplit($self, /, sep=None, maxsplit=-1)\n"
1817"--\n"
1818"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001819"Return a list of the sections in the bytes, using sep as the delimiter.\n"
1820"\n"
1821" sep\n"
1822" The delimiter according which to split the bytes.\n"
1823" None (the default value) means split on ASCII whitespace characters\n"
1824" (space, tab, return, newline, formfeed, vertical tab).\n"
1825" maxsplit\n"
1826" Maximum number of splits to do.\n"
1827" -1 (the default value) means no limit.\n"
1828"\n"
1829"Splitting is done starting at the end of the bytes and working to the front.");
1830
1831#define BYTES_RSPLIT_METHODDEF \
1832 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS|METH_KEYWORDS, bytes_rsplit__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001833
Neal Norwitz6968b052007-02-27 19:02:19 +00001834static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001835bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001836
1837static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001838bytes_rsplit(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Neal Norwitz6968b052007-02-27 19:02:19 +00001839{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001840 PyObject *return_value = NULL;
1841 static char *_keywords[] = {"sep", "maxsplit", NULL};
1842 PyObject *sep = Py_None;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001843 Py_ssize_t maxsplit = -1;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001844
1845 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
1846 "|On:rsplit", _keywords,
1847 &sep, &maxsplit))
1848 goto exit;
1849 return_value = bytes_rsplit_impl(self, sep, maxsplit);
1850
1851exit:
1852 return return_value;
1853}
1854
1855static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001856bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
1857/*[clinic end generated code: output=f86feddedbd7b26d input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001858{
1859 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001860 const char *s = PyBytes_AS_STRING(self), *sub;
1861 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001862 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001863
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001864 if (maxsplit < 0)
1865 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001866 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001867 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001868 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001869 return NULL;
1870 sub = vsub.buf;
1871 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001872
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001873 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1874 PyBuffer_Release(&vsub);
1875 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001876}
1877
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001878
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001879/*[clinic input]
1880bytes.join
1881
1882 iterable_of_bytes: object
1883 /
1884
1885Concatenate any number of bytes objects.
1886
1887The bytes whose method is called is inserted in between each pair.
1888
1889The result is returned as a new bytes object.
1890
1891Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1892[clinic start generated code]*/
1893
1894PyDoc_STRVAR(bytes_join__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001895"join($self, iterable_of_bytes, /)\n"
1896"--\n"
1897"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001898"Concatenate any number of bytes objects.\n"
1899"\n"
1900"The bytes whose method is called is inserted in between each pair.\n"
1901"\n"
1902"The result is returned as a new bytes object.\n"
1903"\n"
1904"Example: b\'.\'.join([b\'ab\', b\'pq\', b\'rs\']) -> b\'ab.pq.rs\'.");
1905
1906#define BYTES_JOIN_METHODDEF \
1907 {"join", (PyCFunction)bytes_join, METH_O, bytes_join__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001908
Neal Norwitz6968b052007-02-27 19:02:19 +00001909static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001910bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
1911/*[clinic end generated code: output=e541a14a8da97908 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001912{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001913 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001914}
1915
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001916PyObject *
1917_PyBytes_Join(PyObject *sep, PyObject *x)
1918{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001919 assert(sep != NULL && PyBytes_Check(sep));
1920 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001921 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001922}
1923
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001924/* helper macro to fixup start/end slice values */
1925#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001926 if (end > len) \
1927 end = len; \
1928 else if (end < 0) { \
1929 end += len; \
1930 if (end < 0) \
1931 end = 0; \
1932 } \
1933 if (start < 0) { \
1934 start += len; \
1935 if (start < 0) \
1936 start = 0; \
1937 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001938
1939Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001940bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001941{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001942 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001943 char byte;
1944 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001945 const char *sub;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001946 Py_ssize_t len, sub_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001947 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001948 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001949
Antoine Pitrouac65d962011-10-20 23:54:17 +02001950 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1951 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001952 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001953
Antoine Pitrouac65d962011-10-20 23:54:17 +02001954 if (subobj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001955 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001956 return -2;
1957
1958 sub = subbuf.buf;
1959 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001960 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001961 else {
1962 sub = &byte;
1963 sub_len = 1;
1964 }
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001965 len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001966
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001967 ADJUST_INDICES(start, end, len);
1968 if (end - start < sub_len)
1969 res = -1;
Victor Stinnerdabbfe72015-03-25 03:16:32 +01001970 /* Issue #23573: FIXME, windows has no memrchr() */
1971 else if (sub_len == 1 && dir > 0) {
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001972 unsigned char needle = *sub;
1973 int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
1974 res = stringlib_fastsearch_memchr_1char(
1975 PyBytes_AS_STRING(self) + start, end - start,
1976 needle, needle, mode);
1977 if (res >= 0)
1978 res += start;
1979 }
1980 else {
1981 if (dir > 0)
1982 res = stringlib_find_slice(
1983 PyBytes_AS_STRING(self), len,
1984 sub, sub_len, start, end);
1985 else
1986 res = stringlib_rfind_slice(
1987 PyBytes_AS_STRING(self), len,
1988 sub, sub_len, start, end);
1989 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001990
1991 if (subobj)
1992 PyBuffer_Release(&subbuf);
1993
1994 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001995}
1996
1997
1998PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001999"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002000\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002001Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08002002such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002003arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002004\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002005Return -1 on failure.");
2006
Neal Norwitz6968b052007-02-27 19:02:19 +00002007static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002008bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00002009{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002010 Py_ssize_t result = bytes_find_internal(self, args, +1);
2011 if (result == -2)
2012 return NULL;
2013 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00002014}
2015
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002016
2017PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002018"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00002019\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002020Like B.find() but raise ValueError when the substring is not found.");
2021
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00002022static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002023bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00002024{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002025 Py_ssize_t result = bytes_find_internal(self, args, +1);
2026 if (result == -2)
2027 return NULL;
2028 if (result == -1) {
2029 PyErr_SetString(PyExc_ValueError,
2030 "substring not found");
2031 return NULL;
2032 }
2033 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00002034}
2035
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002036
2037PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002038"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002039\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002040Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08002041such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002042arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002043\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002044Return -1 on failure.");
2045
Neal Norwitz6968b052007-02-27 19:02:19 +00002046static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002047bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00002048{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002049 Py_ssize_t result = bytes_find_internal(self, args, -1);
2050 if (result == -2)
2051 return NULL;
2052 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00002053}
2054
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002055
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002056PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002057"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002058\n\
2059Like B.rfind() but raise ValueError when the substring is not found.");
2060
2061static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002062bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002063{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002064 Py_ssize_t result = bytes_find_internal(self, args, -1);
2065 if (result == -2)
2066 return NULL;
2067 if (result == -1) {
2068 PyErr_SetString(PyExc_ValueError,
2069 "substring not found");
2070 return NULL;
2071 }
2072 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002073}
2074
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002075
2076Py_LOCAL_INLINE(PyObject *)
2077do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002078{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002079 Py_buffer vsep;
2080 char *s = PyBytes_AS_STRING(self);
2081 Py_ssize_t len = PyBytes_GET_SIZE(self);
2082 char *sep;
2083 Py_ssize_t seplen;
2084 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002085
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002086 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002087 return NULL;
2088 sep = vsep.buf;
2089 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002091 i = 0;
2092 if (striptype != RIGHTSTRIP) {
2093 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2094 i++;
2095 }
2096 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002097
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002098 j = len;
2099 if (striptype != LEFTSTRIP) {
2100 do {
2101 j--;
2102 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2103 j++;
2104 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002106 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002107
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002108 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2109 Py_INCREF(self);
2110 return (PyObject*)self;
2111 }
2112 else
2113 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002114}
2115
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002116
2117Py_LOCAL_INLINE(PyObject *)
2118do_strip(PyBytesObject *self, int striptype)
2119{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002120 char *s = PyBytes_AS_STRING(self);
2121 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002122
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002123 i = 0;
2124 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00002125 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002126 i++;
2127 }
2128 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002129
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002130 j = len;
2131 if (striptype != LEFTSTRIP) {
2132 do {
2133 j--;
David Malcolm96960882010-11-05 17:23:41 +00002134 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002135 j++;
2136 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002138 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2139 Py_INCREF(self);
2140 return (PyObject*)self;
2141 }
2142 else
2143 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002144}
2145
2146
2147Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002148do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002149{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002150 if (bytes != NULL && bytes != Py_None) {
2151 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002152 }
2153 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002154}
2155
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002156/*[clinic input]
2157bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002158
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002159 self: self(type="PyBytesObject *")
2160 bytes: object = None
2161 /
2162
2163Strip leading and trailing bytes contained in the argument.
2164
2165If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2166[clinic start generated code]*/
2167
2168PyDoc_STRVAR(bytes_strip__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002169"strip($self, bytes=None, /)\n"
2170"--\n"
2171"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002172"Strip leading and trailing bytes contained in the argument.\n"
2173"\n"
2174"If the argument is omitted or None, strip leading and trailing ASCII whitespace.");
2175
2176#define BYTES_STRIP_METHODDEF \
2177 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, bytes_strip__doc__},
2178
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002179static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002180bytes_strip_impl(PyBytesObject *self, PyObject *bytes);
2181
2182static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002183bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002184{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002185 PyObject *return_value = NULL;
2186 PyObject *bytes = Py_None;
2187
2188 if (!PyArg_UnpackTuple(args, "strip",
2189 0, 1,
2190 &bytes))
2191 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02002192 return_value = bytes_strip_impl(self, bytes);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002193
2194exit:
2195 return return_value;
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002196}
2197
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002198static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002199bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Martin v. Löwis0efea322014-07-27 17:29:17 +02002200/*[clinic end generated code: output=c8234a599ba5ec35 input=37daa5fad1395d95]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002201{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002202 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002203}
2204
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002205/*[clinic input]
2206bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002207
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002208 self: self(type="PyBytesObject *")
2209 bytes: object = None
2210 /
2211
2212Strip leading bytes contained in the argument.
2213
2214If the argument is omitted or None, strip leading ASCII whitespace.
2215[clinic start generated code]*/
2216
2217PyDoc_STRVAR(bytes_lstrip__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002218"lstrip($self, bytes=None, /)\n"
2219"--\n"
2220"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002221"Strip leading bytes contained in the argument.\n"
2222"\n"
2223"If the argument is omitted or None, strip leading ASCII whitespace.");
2224
2225#define BYTES_LSTRIP_METHODDEF \
2226 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, bytes_lstrip__doc__},
2227
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002228static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002229bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes);
2230
2231static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002232bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002233{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002234 PyObject *return_value = NULL;
2235 PyObject *bytes = Py_None;
2236
2237 if (!PyArg_UnpackTuple(args, "lstrip",
2238 0, 1,
2239 &bytes))
2240 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02002241 return_value = bytes_lstrip_impl(self, bytes);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002242
2243exit:
2244 return return_value;
2245}
2246
2247static PyObject *
2248bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Martin v. Löwis0efea322014-07-27 17:29:17 +02002249/*[clinic end generated code: output=529e8511ab6f1115 input=88811b09dfbc2988]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002250{
2251 return do_argstrip(self, LEFTSTRIP, bytes);
2252}
2253
2254/*[clinic input]
2255bytes.rstrip
2256
2257 self: self(type="PyBytesObject *")
2258 bytes: object = None
2259 /
2260
2261Strip trailing bytes contained in the argument.
2262
2263If the argument is omitted or None, strip trailing ASCII whitespace.
2264[clinic start generated code]*/
2265
2266PyDoc_STRVAR(bytes_rstrip__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002267"rstrip($self, bytes=None, /)\n"
2268"--\n"
2269"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002270"Strip trailing bytes contained in the argument.\n"
2271"\n"
2272"If the argument is omitted or None, strip trailing ASCII whitespace.");
2273
2274#define BYTES_RSTRIP_METHODDEF \
2275 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, bytes_rstrip__doc__},
2276
2277static PyObject *
2278bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes);
2279
2280static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002281bytes_rstrip(PyBytesObject *self, PyObject *args)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002282{
2283 PyObject *return_value = NULL;
2284 PyObject *bytes = Py_None;
2285
2286 if (!PyArg_UnpackTuple(args, "rstrip",
2287 0, 1,
2288 &bytes))
2289 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02002290 return_value = bytes_rstrip_impl(self, bytes);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002291
2292exit:
2293 return return_value;
2294}
2295
2296static PyObject *
2297bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Martin v. Löwis0efea322014-07-27 17:29:17 +02002298/*[clinic end generated code: output=e98730bd133e6593 input=8f93c9cd361f0140]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002299{
2300 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002301}
Neal Norwitz6968b052007-02-27 19:02:19 +00002302
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002303
2304PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002305"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002306\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002307Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002308string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002309as in slice notation.");
2310
2311static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002312bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002313{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002314 PyObject *sub_obj;
2315 const char *str = PyBytes_AS_STRING(self), *sub;
2316 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02002317 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002318 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002319
Antoine Pitrouac65d962011-10-20 23:54:17 +02002320 Py_buffer vsub;
2321 PyObject *count_obj;
2322
2323 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
2324 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002325 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002326
Antoine Pitrouac65d962011-10-20 23:54:17 +02002327 if (sub_obj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002328 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02002329 return NULL;
2330
2331 sub = vsub.buf;
2332 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002333 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02002334 else {
2335 sub = &byte;
2336 sub_len = 1;
2337 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002338
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002339 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002340
Antoine Pitrouac65d962011-10-20 23:54:17 +02002341 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002342 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2343 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02002344
2345 if (sub_obj)
2346 PyBuffer_Release(&vsub);
2347
2348 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002349}
2350
2351
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002352/*[clinic input]
2353bytes.translate
2354
2355 self: self(type="PyBytesObject *")
Victor Stinner049e5092014-08-17 22:20:00 +02002356 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002357 Translation table, which must be a bytes object of length 256.
2358 [
2359 deletechars: object
2360 ]
2361 /
2362
2363Return a copy with each character mapped by the given translation table.
2364
2365All characters occurring in the optional argument deletechars are removed.
2366The remaining characters are mapped through the given translation table.
2367[clinic start generated code]*/
2368
2369PyDoc_STRVAR(bytes_translate__doc__,
2370"translate(table, [deletechars])\n"
2371"Return a copy with each character mapped by the given translation table.\n"
2372"\n"
2373" table\n"
2374" Translation table, which must be a bytes object of length 256.\n"
2375"\n"
2376"All characters occurring in the optional argument deletechars are removed.\n"
2377"The remaining characters are mapped through the given translation table.");
2378
2379#define BYTES_TRANSLATE_METHODDEF \
2380 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, bytes_translate__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002381
2382static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002383bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1, PyObject *deletechars);
2384
2385static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002386bytes_translate(PyBytesObject *self, PyObject *args)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002387{
2388 PyObject *return_value = NULL;
2389 PyObject *table;
2390 int group_right_1 = 0;
2391 PyObject *deletechars = NULL;
2392
2393 switch (PyTuple_GET_SIZE(args)) {
2394 case 1:
2395 if (!PyArg_ParseTuple(args, "O:translate", &table))
Martin v. Löwis0efea322014-07-27 17:29:17 +02002396 goto exit;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002397 break;
2398 case 2:
2399 if (!PyArg_ParseTuple(args, "OO:translate", &table, &deletechars))
Martin v. Löwis0efea322014-07-27 17:29:17 +02002400 goto exit;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002401 group_right_1 = 1;
2402 break;
2403 default:
2404 PyErr_SetString(PyExc_TypeError, "bytes.translate requires 1 to 2 arguments");
Martin v. Löwis0efea322014-07-27 17:29:17 +02002405 goto exit;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002406 }
Martin v. Löwis0efea322014-07-27 17:29:17 +02002407 return_value = bytes_translate_impl(self, table, group_right_1, deletechars);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002408
Martin v. Löwis0efea322014-07-27 17:29:17 +02002409exit:
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002410 return return_value;
2411}
2412
2413static PyObject *
2414bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1, PyObject *deletechars)
Larry Hastingsdfbeb162014-10-13 10:39:41 +01002415/*[clinic end generated code: output=f0f29a57f41df5d8 input=d8fa5519d7cc4be7]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002416{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002417 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002418 Py_buffer table_view = {NULL, NULL};
2419 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002420 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002421 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002422 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002423 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002424 Py_ssize_t inlen, tablen, dellen = 0;
2425 PyObject *result;
2426 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002427
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002428 if (PyBytes_Check(table)) {
2429 table_chars = PyBytes_AS_STRING(table);
2430 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002431 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002432 else if (table == Py_None) {
2433 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002434 tablen = 256;
2435 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002436 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002437 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002438 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002439 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002440 tablen = table_view.len;
2441 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002442
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002443 if (tablen != 256) {
2444 PyErr_SetString(PyExc_ValueError,
2445 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002446 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002447 return NULL;
2448 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002449
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002450 if (deletechars != NULL) {
2451 if (PyBytes_Check(deletechars)) {
2452 del_table_chars = PyBytes_AS_STRING(deletechars);
2453 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002454 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002455 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002456 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002457 PyBuffer_Release(&table_view);
2458 return NULL;
2459 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002460 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002461 dellen = del_table_view.len;
2462 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002463 }
2464 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002465 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002466 dellen = 0;
2467 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002468
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002469 inlen = PyBytes_GET_SIZE(input_obj);
2470 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002471 if (result == NULL) {
2472 PyBuffer_Release(&del_table_view);
2473 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002474 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002475 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002476 output_start = output = PyBytes_AsString(result);
2477 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002478
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002479 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002480 /* If no deletions are required, use faster code */
2481 for (i = inlen; --i >= 0; ) {
2482 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002483 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002484 changed = 1;
2485 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002486 if (!changed && PyBytes_CheckExact(input_obj)) {
2487 Py_INCREF(input_obj);
2488 Py_DECREF(result);
2489 result = input_obj;
2490 }
2491 PyBuffer_Release(&del_table_view);
2492 PyBuffer_Release(&table_view);
2493 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002494 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002495
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002496 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002497 for (i = 0; i < 256; i++)
2498 trans_table[i] = Py_CHARMASK(i);
2499 } else {
2500 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002501 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002502 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002503 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002504
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002505 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002506 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002507 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002508
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002509 for (i = inlen; --i >= 0; ) {
2510 c = Py_CHARMASK(*input++);
2511 if (trans_table[c] != -1)
2512 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2513 continue;
2514 changed = 1;
2515 }
2516 if (!changed && PyBytes_CheckExact(input_obj)) {
2517 Py_DECREF(result);
2518 Py_INCREF(input_obj);
2519 return input_obj;
2520 }
2521 /* Fix the size of the resulting string */
2522 if (inlen > 0)
2523 _PyBytes_Resize(&result, output - output_start);
2524 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002525}
2526
2527
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002528/*[clinic input]
2529
2530@staticmethod
2531bytes.maketrans
2532
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002533 frm: Py_buffer
2534 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002535 /
2536
2537Return a translation table useable for the bytes or bytearray translate method.
2538
2539The returned table will be one where each byte in frm is mapped to the byte at
2540the same position in to.
2541
2542The bytes objects frm and to must be of the same length.
2543[clinic start generated code]*/
2544
2545PyDoc_STRVAR(bytes_maketrans__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002546"maketrans(frm, to, /)\n"
2547"--\n"
2548"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002549"Return a translation table useable for the bytes or bytearray translate method.\n"
2550"\n"
2551"The returned table will be one where each byte in frm is mapped to the byte at\n"
2552"the same position in to.\n"
2553"\n"
2554"The bytes objects frm and to must be of the same length.");
2555
2556#define BYTES_MAKETRANS_METHODDEF \
2557 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC, bytes_maketrans__doc__},
2558
Georg Brandlabc38772009-04-12 15:51:51 +00002559static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002560bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002561
2562static PyObject *
2563bytes_maketrans(void *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00002564{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002565 PyObject *return_value = NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002566 Py_buffer frm = {NULL, NULL};
2567 Py_buffer to = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002568
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002569 if (!PyArg_ParseTuple(args,
2570 "y*y*:maketrans",
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002571 &frm, &to))
2572 goto exit;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002573 return_value = bytes_maketrans_impl(&frm, &to);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002574
2575exit:
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002576 /* Cleanup for frm */
2577 if (frm.obj)
2578 PyBuffer_Release(&frm);
2579 /* Cleanup for to */
2580 if (to.obj)
2581 PyBuffer_Release(&to);
2582
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002583 return return_value;
2584}
2585
2586static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002587bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2588/*[clinic end generated code: output=7df47390c476ac60 input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002589{
2590 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002591}
2592
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002593/* find and count characters and substrings */
2594
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002595#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002596 ((char *)memchr((const void *)(target), c, target_len))
2597
2598/* String ops must return a string. */
2599/* If the object is subclass of string, create a copy */
2600Py_LOCAL(PyBytesObject *)
2601return_self(PyBytesObject *self)
2602{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002603 if (PyBytes_CheckExact(self)) {
2604 Py_INCREF(self);
2605 return self;
2606 }
2607 return (PyBytesObject *)PyBytes_FromStringAndSize(
2608 PyBytes_AS_STRING(self),
2609 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002610}
2611
2612Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00002613countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002614{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002615 Py_ssize_t count=0;
2616 const char *start=target;
2617 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002618
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002619 while ( (start=findchar(start, end-start, c)) != NULL ) {
2620 count++;
2621 if (count >= maxcount)
2622 break;
2623 start += 1;
2624 }
2625 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002626}
2627
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002628
2629/* Algorithms for different cases of string replacement */
2630
2631/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2632Py_LOCAL(PyBytesObject *)
2633replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002634 const char *to_s, Py_ssize_t to_len,
2635 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002636{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002637 char *self_s, *result_s;
2638 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002639 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002640 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002641
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002642 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002643
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002644 /* 1 at the end plus 1 after every character;
2645 count = min(maxcount, self_len + 1) */
2646 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002647 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002648 else
2649 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2650 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002651
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002652 /* Check for overflow */
2653 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002654 assert(count > 0);
2655 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002656 PyErr_SetString(PyExc_OverflowError,
2657 "replacement bytes are too long");
2658 return NULL;
2659 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002660 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002661
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002662 if (! (result = (PyBytesObject *)
2663 PyBytes_FromStringAndSize(NULL, result_len)) )
2664 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002665
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002666 self_s = PyBytes_AS_STRING(self);
2667 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002668
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002669 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002670
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002671 /* Lay the first one down (guaranteed this will occur) */
2672 Py_MEMCPY(result_s, to_s, to_len);
2673 result_s += to_len;
2674 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002675
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002676 for (i=0; i<count; i++) {
2677 *result_s++ = *self_s++;
2678 Py_MEMCPY(result_s, to_s, to_len);
2679 result_s += to_len;
2680 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002681
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002682 /* Copy the rest of the original string */
2683 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002684
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002685 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002686}
2687
2688/* Special case for deleting a single character */
2689/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2690Py_LOCAL(PyBytesObject *)
2691replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002692 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002693{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002694 char *self_s, *result_s;
2695 char *start, *next, *end;
2696 Py_ssize_t self_len, result_len;
2697 Py_ssize_t count;
2698 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002699
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002700 self_len = PyBytes_GET_SIZE(self);
2701 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002702
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002703 count = countchar(self_s, self_len, from_c, maxcount);
2704 if (count == 0) {
2705 return return_self(self);
2706 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002707
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002708 result_len = self_len - count; /* from_len == 1 */
2709 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002710
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002711 if ( (result = (PyBytesObject *)
2712 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2713 return NULL;
2714 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002715
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002716 start = self_s;
2717 end = self_s + self_len;
2718 while (count-- > 0) {
2719 next = findchar(start, end-start, from_c);
2720 if (next == NULL)
2721 break;
2722 Py_MEMCPY(result_s, start, next-start);
2723 result_s += (next-start);
2724 start = next+1;
2725 }
2726 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002727
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002728 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002729}
2730
2731/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2732
2733Py_LOCAL(PyBytesObject *)
2734replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002735 const char *from_s, Py_ssize_t from_len,
2736 Py_ssize_t maxcount) {
2737 char *self_s, *result_s;
2738 char *start, *next, *end;
2739 Py_ssize_t self_len, result_len;
2740 Py_ssize_t count, offset;
2741 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002742
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002743 self_len = PyBytes_GET_SIZE(self);
2744 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002745
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002746 count = stringlib_count(self_s, self_len,
2747 from_s, from_len,
2748 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002749
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002750 if (count == 0) {
2751 /* no matches */
2752 return return_self(self);
2753 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002754
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002755 result_len = self_len - (count * from_len);
2756 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002757
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002758 if ( (result = (PyBytesObject *)
2759 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2760 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002761
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002762 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002763
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002764 start = self_s;
2765 end = self_s + self_len;
2766 while (count-- > 0) {
2767 offset = stringlib_find(start, end-start,
2768 from_s, from_len,
2769 0);
2770 if (offset == -1)
2771 break;
2772 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002773
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002774 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002775
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002776 result_s += (next-start);
2777 start = next+from_len;
2778 }
2779 Py_MEMCPY(result_s, start, end-start);
2780 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002781}
2782
2783/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2784Py_LOCAL(PyBytesObject *)
2785replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002786 char from_c, char to_c,
2787 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002788{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002789 char *self_s, *result_s, *start, *end, *next;
2790 Py_ssize_t self_len;
2791 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002792
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002793 /* The result string will be the same size */
2794 self_s = PyBytes_AS_STRING(self);
2795 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002796
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002797 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002798
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002799 if (next == NULL) {
2800 /* No matches; return the original string */
2801 return return_self(self);
2802 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002803
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002804 /* Need to make a new string */
2805 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2806 if (result == NULL)
2807 return NULL;
2808 result_s = PyBytes_AS_STRING(result);
2809 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002810
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002811 /* change everything in-place, starting with this one */
2812 start = result_s + (next-self_s);
2813 *start = to_c;
2814 start++;
2815 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002816
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002817 while (--maxcount > 0) {
2818 next = findchar(start, end-start, from_c);
2819 if (next == NULL)
2820 break;
2821 *next = to_c;
2822 start = next+1;
2823 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002824
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002825 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002826}
2827
2828/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2829Py_LOCAL(PyBytesObject *)
2830replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002831 const char *from_s, Py_ssize_t from_len,
2832 const char *to_s, Py_ssize_t to_len,
2833 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002834{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002835 char *result_s, *start, *end;
2836 char *self_s;
2837 Py_ssize_t self_len, offset;
2838 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002839
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002840 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002841
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002842 self_s = PyBytes_AS_STRING(self);
2843 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002844
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002845 offset = stringlib_find(self_s, self_len,
2846 from_s, from_len,
2847 0);
2848 if (offset == -1) {
2849 /* No matches; return the original string */
2850 return return_self(self);
2851 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002852
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002853 /* Need to make a new string */
2854 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2855 if (result == NULL)
2856 return NULL;
2857 result_s = PyBytes_AS_STRING(result);
2858 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002859
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002860 /* change everything in-place, starting with this one */
2861 start = result_s + offset;
2862 Py_MEMCPY(start, to_s, from_len);
2863 start += from_len;
2864 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002865
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002866 while ( --maxcount > 0) {
2867 offset = stringlib_find(start, end-start,
2868 from_s, from_len,
2869 0);
2870 if (offset==-1)
2871 break;
2872 Py_MEMCPY(start+offset, to_s, from_len);
2873 start += offset+from_len;
2874 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002875
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002876 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002877}
2878
2879/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2880Py_LOCAL(PyBytesObject *)
2881replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002882 char from_c,
2883 const char *to_s, Py_ssize_t to_len,
2884 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002885{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002886 char *self_s, *result_s;
2887 char *start, *next, *end;
2888 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002889 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002890 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002891
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002892 self_s = PyBytes_AS_STRING(self);
2893 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002894
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002895 count = countchar(self_s, self_len, from_c, maxcount);
2896 if (count == 0) {
2897 /* no matches, return unchanged */
2898 return return_self(self);
2899 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002900
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002901 /* use the difference between current and new, hence the "-1" */
2902 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002903 assert(count > 0);
2904 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002905 PyErr_SetString(PyExc_OverflowError,
2906 "replacement bytes are too long");
2907 return NULL;
2908 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002909 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002910
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002911 if ( (result = (PyBytesObject *)
2912 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2913 return NULL;
2914 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002915
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002916 start = self_s;
2917 end = self_s + self_len;
2918 while (count-- > 0) {
2919 next = findchar(start, end-start, from_c);
2920 if (next == NULL)
2921 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002922
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002923 if (next == start) {
2924 /* replace with the 'to' */
2925 Py_MEMCPY(result_s, to_s, to_len);
2926 result_s += to_len;
2927 start += 1;
2928 } else {
2929 /* copy the unchanged old then the 'to' */
2930 Py_MEMCPY(result_s, start, next-start);
2931 result_s += (next-start);
2932 Py_MEMCPY(result_s, to_s, to_len);
2933 result_s += to_len;
2934 start = next+1;
2935 }
2936 }
2937 /* Copy the remainder of the remaining string */
2938 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002939
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002940 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002941}
2942
2943/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2944Py_LOCAL(PyBytesObject *)
2945replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002946 const char *from_s, Py_ssize_t from_len,
2947 const char *to_s, Py_ssize_t to_len,
2948 Py_ssize_t maxcount) {
2949 char *self_s, *result_s;
2950 char *start, *next, *end;
2951 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002952 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002953 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002954
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002955 self_s = PyBytes_AS_STRING(self);
2956 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002957
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002958 count = stringlib_count(self_s, self_len,
2959 from_s, from_len,
2960 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002961
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002962 if (count == 0) {
2963 /* no matches, return unchanged */
2964 return return_self(self);
2965 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002966
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002967 /* Check for overflow */
2968 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002969 assert(count > 0);
2970 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002971 PyErr_SetString(PyExc_OverflowError,
2972 "replacement bytes are too long");
2973 return NULL;
2974 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002975 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002976
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002977 if ( (result = (PyBytesObject *)
2978 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2979 return NULL;
2980 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002981
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002982 start = self_s;
2983 end = self_s + self_len;
2984 while (count-- > 0) {
2985 offset = stringlib_find(start, end-start,
2986 from_s, from_len,
2987 0);
2988 if (offset == -1)
2989 break;
2990 next = start+offset;
2991 if (next == start) {
2992 /* replace with the 'to' */
2993 Py_MEMCPY(result_s, to_s, to_len);
2994 result_s += to_len;
2995 start += from_len;
2996 } else {
2997 /* copy the unchanged old then the 'to' */
2998 Py_MEMCPY(result_s, start, next-start);
2999 result_s += (next-start);
3000 Py_MEMCPY(result_s, to_s, to_len);
3001 result_s += to_len;
3002 start = next+from_len;
3003 }
3004 }
3005 /* Copy the remainder of the remaining string */
3006 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003007
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003008 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003009}
3010
3011
3012Py_LOCAL(PyBytesObject *)
3013replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003014 const char *from_s, Py_ssize_t from_len,
3015 const char *to_s, Py_ssize_t to_len,
3016 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003017{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003018 if (maxcount < 0) {
3019 maxcount = PY_SSIZE_T_MAX;
3020 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
3021 /* nothing to do; return the original string */
3022 return return_self(self);
3023 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003024
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003025 if (maxcount == 0 ||
3026 (from_len == 0 && to_len == 0)) {
3027 /* nothing to do; return the original string */
3028 return return_self(self);
3029 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003030
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003031 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003032
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003033 if (from_len == 0) {
3034 /* insert the 'to' string everywhere. */
3035 /* >>> "Python".replace("", ".") */
3036 /* '.P.y.t.h.o.n.' */
3037 return replace_interleave(self, to_s, to_len, maxcount);
3038 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003039
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003040 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3041 /* point for an empty self string to generate a non-empty string */
3042 /* Special case so the remaining code always gets a non-empty string */
3043 if (PyBytes_GET_SIZE(self) == 0) {
3044 return return_self(self);
3045 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003046
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003047 if (to_len == 0) {
3048 /* delete all occurrences of 'from' string */
3049 if (from_len == 1) {
3050 return replace_delete_single_character(
3051 self, from_s[0], maxcount);
3052 } else {
3053 return replace_delete_substring(self, from_s,
3054 from_len, maxcount);
3055 }
3056 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003057
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003058 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003059
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003060 if (from_len == to_len) {
3061 if (from_len == 1) {
3062 return replace_single_character_in_place(
3063 self,
3064 from_s[0],
3065 to_s[0],
3066 maxcount);
3067 } else {
3068 return replace_substring_in_place(
3069 self, from_s, from_len, to_s, to_len,
3070 maxcount);
3071 }
3072 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003073
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003074 /* Otherwise use the more generic algorithms */
3075 if (from_len == 1) {
3076 return replace_single_character(self, from_s[0],
3077 to_s, to_len, maxcount);
3078 } else {
3079 /* len('from')>=2, len('to')>=1 */
3080 return replace_substring(self, from_s, from_len, to_s, to_len,
3081 maxcount);
3082 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003083}
3084
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003085
3086/*[clinic input]
3087bytes.replace
3088
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003089 old: Py_buffer
3090 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003091 count: Py_ssize_t = -1
3092 Maximum number of occurrences to replace.
3093 -1 (the default value) means replace all occurrences.
3094 /
3095
3096Return a copy with all occurrences of substring old replaced by new.
3097
3098If the optional argument count is given, only the first count occurrences are
3099replaced.
3100[clinic start generated code]*/
3101
3102PyDoc_STRVAR(bytes_replace__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02003103"replace($self, old, new, count=-1, /)\n"
3104"--\n"
3105"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003106"Return a copy with all occurrences of substring old replaced by new.\n"
3107"\n"
3108" count\n"
3109" Maximum number of occurrences to replace.\n"
3110" -1 (the default value) means replace all occurrences.\n"
3111"\n"
3112"If the optional argument count is given, only the first count occurrences are\n"
3113"replaced.");
3114
3115#define BYTES_REPLACE_METHODDEF \
3116 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, bytes_replace__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003117
3118static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003119bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new, Py_ssize_t count);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003120
3121static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003122bytes_replace(PyBytesObject*self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003123{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003124 PyObject *return_value = NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02003125 Py_buffer old = {NULL, NULL};
3126 Py_buffer new = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003127 Py_ssize_t count = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003128
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003129 if (!PyArg_ParseTuple(args,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003130 "y*y*|n:replace",
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003131 &old, &new, &count))
3132 goto exit;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003133 return_value = bytes_replace_impl(self, &old, &new, count);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003134
3135exit:
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003136 /* Cleanup for old */
3137 if (old.obj)
3138 PyBuffer_Release(&old);
3139 /* Cleanup for new */
3140 if (new.obj)
3141 PyBuffer_Release(&new);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003142
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003143 return return_value;
3144}
3145
3146static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003147bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new, Py_ssize_t count)
3148/*[clinic end generated code: output=f07bd9ecf29ee8d8 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003149{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003150 return (PyObject *)replace((PyBytesObject *) self,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003151 (const char *)old->buf, old->len,
3152 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003153}
3154
3155/** End DALKE **/
3156
3157/* Matches the end (direction >= 0) or start (direction < 0) of self
3158 * against substr, using the start and end arguments. Returns
3159 * -1 on error, 0 if not found and 1 if found.
3160 */
3161Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003162_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003163 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003164{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003165 Py_ssize_t len = PyBytes_GET_SIZE(self);
3166 Py_ssize_t slen;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02003167 Py_buffer sub_view = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003168 const char* sub;
3169 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003170
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003171 if (PyBytes_Check(substr)) {
3172 sub = PyBytes_AS_STRING(substr);
3173 slen = PyBytes_GET_SIZE(substr);
3174 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02003175 else {
3176 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
3177 return -1;
3178 sub = sub_view.buf;
3179 slen = sub_view.len;
3180 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003181 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003182
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003183 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003184
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003185 if (direction < 0) {
3186 /* startswith */
3187 if (start+slen > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02003188 goto notfound;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003189 } else {
3190 /* endswith */
3191 if (end-start < slen || start > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02003192 goto notfound;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003193
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003194 if (end-slen > start)
3195 start = end - slen;
3196 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02003197 if (end-start < slen)
3198 goto notfound;
3199 if (memcmp(str+start, sub, slen) != 0)
3200 goto notfound;
3201
3202 PyBuffer_Release(&sub_view);
3203 return 1;
3204
3205notfound:
3206 PyBuffer_Release(&sub_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003207 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003208}
3209
3210
3211PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003212"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003213\n\
3214Return True if B starts with the specified prefix, False otherwise.\n\
3215With optional start, test B beginning at that position.\n\
3216With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00003217prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003218
3219static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003220bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003221{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003222 Py_ssize_t start = 0;
3223 Py_ssize_t end = PY_SSIZE_T_MAX;
3224 PyObject *subobj;
3225 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003226
Jesus Ceaac451502011-04-20 17:09:23 +02003227 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003228 return NULL;
3229 if (PyTuple_Check(subobj)) {
3230 Py_ssize_t i;
3231 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3232 result = _bytes_tailmatch(self,
3233 PyTuple_GET_ITEM(subobj, i),
3234 start, end, -1);
3235 if (result == -1)
3236 return NULL;
3237 else if (result) {
3238 Py_RETURN_TRUE;
3239 }
3240 }
3241 Py_RETURN_FALSE;
3242 }
3243 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03003244 if (result == -1) {
3245 if (PyErr_ExceptionMatches(PyExc_TypeError))
3246 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
3247 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003248 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03003249 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003250 else
3251 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003252}
3253
3254
3255PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003256"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003257\n\
3258Return True if B ends with the specified suffix, False otherwise.\n\
3259With optional start, test B beginning at that position.\n\
3260With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00003261suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003262
3263static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003264bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003265{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003266 Py_ssize_t start = 0;
3267 Py_ssize_t end = PY_SSIZE_T_MAX;
3268 PyObject *subobj;
3269 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003270
Jesus Ceaac451502011-04-20 17:09:23 +02003271 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003272 return NULL;
3273 if (PyTuple_Check(subobj)) {
3274 Py_ssize_t i;
3275 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3276 result = _bytes_tailmatch(self,
3277 PyTuple_GET_ITEM(subobj, i),
3278 start, end, +1);
3279 if (result == -1)
3280 return NULL;
3281 else if (result) {
3282 Py_RETURN_TRUE;
3283 }
3284 }
3285 Py_RETURN_FALSE;
3286 }
3287 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03003288 if (result == -1) {
3289 if (PyErr_ExceptionMatches(PyExc_TypeError))
3290 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
3291 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003292 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03003293 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003294 else
3295 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003296}
3297
3298
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003299/*[clinic input]
3300bytes.decode
3301
3302 encoding: str(c_default="NULL") = 'utf-8'
3303 The encoding with which to decode the bytes.
3304 errors: str(c_default="NULL") = 'strict'
3305 The error handling scheme to use for the handling of decoding errors.
3306 The default is 'strict' meaning that decoding errors raise a
3307 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
3308 as well as any other name registered with codecs.register_error that
3309 can handle UnicodeDecodeErrors.
3310
3311Decode the bytes using the codec registered for encoding.
3312[clinic start generated code]*/
3313
3314PyDoc_STRVAR(bytes_decode__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02003315"decode($self, /, encoding=\'utf-8\', errors=\'strict\')\n"
3316"--\n"
3317"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003318"Decode the bytes using the codec registered for encoding.\n"
3319"\n"
3320" encoding\n"
3321" The encoding with which to decode the bytes.\n"
3322" errors\n"
3323" The error handling scheme to use for the handling of decoding errors.\n"
3324" The default is \'strict\' meaning that decoding errors raise a\n"
3325" UnicodeDecodeError. Other possible values are \'ignore\' and \'replace\'\n"
3326" as well as any other name registered with codecs.register_error that\n"
3327" can handle UnicodeDecodeErrors.");
3328
3329#define BYTES_DECODE_METHODDEF \
3330 {"decode", (PyCFunction)bytes_decode, METH_VARARGS|METH_KEYWORDS, bytes_decode__doc__},
3331
3332static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003333bytes_decode_impl(PyBytesObject*self, const char *encoding, const char *errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00003334
3335static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003336bytes_decode(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00003337{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003338 PyObject *return_value = NULL;
3339 static char *_keywords[] = {"encoding", "errors", NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003340 const char *encoding = NULL;
3341 const char *errors = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +00003342
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003343 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
3344 "|ss:decode", _keywords,
3345 &encoding, &errors))
3346 goto exit;
3347 return_value = bytes_decode_impl(self, encoding, errors);
3348
3349exit:
3350 return return_value;
3351}
3352
3353static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003354bytes_decode_impl(PyBytesObject*self, const char *encoding, const char *errors)
3355/*[clinic end generated code: output=61a80290bbfce696 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003356{
Martin v. Löwis0efea322014-07-27 17:29:17 +02003357 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00003358}
3359
Guido van Rossum20188312006-05-05 15:15:40 +00003360
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003361/*[clinic input]
3362bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003363
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003364 keepends: int(py_default="False") = 0
3365
3366Return a list of the lines in the bytes, breaking at line boundaries.
3367
3368Line breaks are not included in the resulting list unless keepends is given and
3369true.
3370[clinic start generated code]*/
3371
3372PyDoc_STRVAR(bytes_splitlines__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02003373"splitlines($self, /, keepends=False)\n"
3374"--\n"
3375"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003376"Return a list of the lines in the bytes, breaking at line boundaries.\n"
3377"\n"
3378"Line breaks are not included in the resulting list unless keepends is given and\n"
3379"true.");
3380
3381#define BYTES_SPLITLINES_METHODDEF \
3382 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS|METH_KEYWORDS, bytes_splitlines__doc__},
3383
3384static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003385bytes_splitlines_impl(PyBytesObject*self, int keepends);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003386
3387static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003388bytes_splitlines(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003389{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003390 PyObject *return_value = NULL;
3391 static char *_keywords[] = {"keepends", NULL};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003392 int keepends = 0;
3393
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003394 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
3395 "|i:splitlines", _keywords,
3396 &keepends))
3397 goto exit;
3398 return_value = bytes_splitlines_impl(self, keepends);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003399
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003400exit:
3401 return return_value;
3402}
3403
3404static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003405bytes_splitlines_impl(PyBytesObject*self, int keepends)
3406/*[clinic end generated code: output=79da057d05d126de input=ddb93e3351080c8c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003407{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003408 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00003409 (PyObject*) self, PyBytes_AS_STRING(self),
3410 PyBytes_GET_SIZE(self), keepends
3411 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003412}
3413
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003414static int
Victor Stinner6430fd52011-09-29 04:02:13 +02003415hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003416{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003417 if (c >= 128)
3418 return -1;
David Malcolm96960882010-11-05 17:23:41 +00003419 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003420 return c - '0';
3421 else {
David Malcolm96960882010-11-05 17:23:41 +00003422 if (Py_ISUPPER(c))
3423 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003424 if (c >= 'a' && c <= 'f')
3425 return c - 'a' + 10;
3426 }
3427 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003428}
3429
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003430/*[clinic input]
3431@classmethod
3432bytes.fromhex
3433
3434 string: unicode
3435 /
3436
3437Create a bytes object from a string of hexadecimal numbers.
3438
3439Spaces between two numbers are accepted.
3440Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
3441[clinic start generated code]*/
3442
3443PyDoc_STRVAR(bytes_fromhex__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02003444"fromhex($type, string, /)\n"
3445"--\n"
3446"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003447"Create a bytes object from a string of hexadecimal numbers.\n"
3448"\n"
3449"Spaces between two numbers are accepted.\n"
Martin v. Löwis0efea322014-07-27 17:29:17 +02003450"Example: bytes.fromhex(\'B9 01EF\') -> b\'\\\\xb9\\\\x01\\\\xef\'.");
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003451
3452#define BYTES_FROMHEX_METHODDEF \
3453 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS, bytes_fromhex__doc__},
3454
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003455static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003456bytes_fromhex_impl(PyTypeObject *type, PyObject *string);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003457
3458static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003459bytes_fromhex(PyTypeObject *type, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003460{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003461 PyObject *return_value = NULL;
3462 PyObject *string;
3463
3464 if (!PyArg_ParseTuple(args,
3465 "U:fromhex",
3466 &string))
3467 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02003468 return_value = bytes_fromhex_impl(type, string);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003469
3470exit:
3471 return return_value;
3472}
3473
3474static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003475bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
3476/*[clinic end generated code: output=09e6cbef56cbbb65 input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003477{
3478 PyObject *newstring;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003479 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003480 Py_ssize_t hexlen, byteslen, i, j;
3481 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003482 void *data;
3483 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003484
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003485 assert(PyUnicode_Check(string));
3486 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003487 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003488 kind = PyUnicode_KIND(string);
3489 data = PyUnicode_DATA(string);
3490 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003491
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003492 byteslen = hexlen/2; /* This overestimates if there are spaces */
3493 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
3494 if (!newstring)
3495 return NULL;
3496 buf = PyBytes_AS_STRING(newstring);
3497 for (i = j = 0; i < hexlen; i += 2) {
3498 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003499 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003500 i++;
3501 if (i >= hexlen)
3502 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003503 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
3504 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003505 if (top == -1 || bot == -1) {
3506 PyErr_Format(PyExc_ValueError,
3507 "non-hexadecimal number found in "
3508 "fromhex() arg at position %zd", i);
3509 goto error;
3510 }
3511 buf[j++] = (top << 4) + bot;
3512 }
3513 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
3514 goto error;
3515 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003516
3517 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003518 Py_XDECREF(newstring);
3519 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003520}
3521
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003522static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003523bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003524{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003525 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003526}
3527
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003528
3529static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003530bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003531 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
3532 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3533 _Py_capitalize__doc__},
3534 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3535 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003536 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003537 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
3538 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02003539 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003540 expandtabs__doc__},
3541 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003542 BYTES_FROMHEX_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003543 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3544 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3545 _Py_isalnum__doc__},
3546 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3547 _Py_isalpha__doc__},
3548 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3549 _Py_isdigit__doc__},
3550 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3551 _Py_islower__doc__},
3552 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3553 _Py_isspace__doc__},
3554 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3555 _Py_istitle__doc__},
3556 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3557 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003558 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003559 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3560 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003561 BYTES_LSTRIP_METHODDEF
3562 BYTES_MAKETRANS_METHODDEF
3563 BYTES_PARTITION_METHODDEF
3564 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003565 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3566 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3567 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003568 BYTES_RPARTITION_METHODDEF
3569 BYTES_RSPLIT_METHODDEF
3570 BYTES_RSTRIP_METHODDEF
3571 BYTES_SPLIT_METHODDEF
3572 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003573 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
3574 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003575 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003576 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3577 _Py_swapcase__doc__},
3578 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003579 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003580 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3581 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003582 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003583};
3584
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003585static PyObject *
Ethan Furmanb95b5612015-01-23 20:05:18 -08003586bytes_mod(PyObject *v, PyObject *w)
3587{
3588 if (!PyBytes_Check(v))
3589 Py_RETURN_NOTIMPLEMENTED;
3590 return _PyBytes_Format(v, w);
3591}
3592
3593static PyNumberMethods bytes_as_number = {
3594 0, /*nb_add*/
3595 0, /*nb_subtract*/
3596 0, /*nb_multiply*/
3597 bytes_mod, /*nb_remainder*/
3598};
3599
3600static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003601str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3602
3603static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003604bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003605{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003606 PyObject *x = NULL;
3607 const char *encoding = NULL;
3608 const char *errors = NULL;
3609 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003610 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003611 Py_ssize_t size;
3612 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003613 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003614
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003615 if (type != &PyBytes_Type)
3616 return str_subtype_new(type, args, kwds);
3617 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3618 &encoding, &errors))
3619 return NULL;
3620 if (x == NULL) {
3621 if (encoding != NULL || errors != NULL) {
3622 PyErr_SetString(PyExc_TypeError,
3623 "encoding or errors without sequence "
3624 "argument");
3625 return NULL;
3626 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003627 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003628 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003629
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003630 if (PyUnicode_Check(x)) {
3631 /* Encode via the codec registry */
3632 if (encoding == NULL) {
3633 PyErr_SetString(PyExc_TypeError,
3634 "string argument without an encoding");
3635 return NULL;
3636 }
3637 new = PyUnicode_AsEncodedString(x, encoding, errors);
3638 if (new == NULL)
3639 return NULL;
3640 assert(PyBytes_Check(new));
3641 return new;
3642 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003643
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003644 /* If it's not unicode, there can't be encoding or errors */
3645 if (encoding != NULL || errors != NULL) {
3646 PyErr_SetString(PyExc_TypeError,
3647 "encoding or errors without a string argument");
3648 return NULL;
3649 }
3650
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003651 /* We'd like to call PyObject_Bytes here, but we need to check for an
3652 integer argument before deferring to PyBytes_FromObject, something
3653 PyObject_Bytes doesn't do. */
3654 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3655 if (func != NULL) {
3656 new = PyObject_CallFunctionObjArgs(func, NULL);
3657 Py_DECREF(func);
3658 if (new == NULL)
3659 return NULL;
3660 if (!PyBytes_Check(new)) {
3661 PyErr_Format(PyExc_TypeError,
3662 "__bytes__ returned non-bytes (type %.200s)",
3663 Py_TYPE(new)->tp_name);
3664 Py_DECREF(new);
3665 return NULL;
3666 }
3667 return new;
3668 }
3669 else if (PyErr_Occurred())
3670 return NULL;
3671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003672 /* Is it an integer? */
3673 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3674 if (size == -1 && PyErr_Occurred()) {
3675 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3676 return NULL;
3677 PyErr_Clear();
3678 }
3679 else if (size < 0) {
3680 PyErr_SetString(PyExc_ValueError, "negative count");
3681 return NULL;
3682 }
3683 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003684 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003685 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003686 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003687 return new;
3688 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003689
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003690 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003691}
3692
3693PyObject *
3694PyBytes_FromObject(PyObject *x)
3695{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003696 PyObject *new, *it;
3697 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003699 if (x == NULL) {
3700 PyErr_BadInternalCall();
3701 return NULL;
3702 }
Larry Hastingsca28e992012-05-24 22:58:30 -07003703
3704 if (PyBytes_CheckExact(x)) {
3705 Py_INCREF(x);
3706 return x;
3707 }
3708
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003709 /* Use the modern buffer interface */
3710 if (PyObject_CheckBuffer(x)) {
3711 Py_buffer view;
3712 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3713 return NULL;
3714 new = PyBytes_FromStringAndSize(NULL, view.len);
3715 if (!new)
3716 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003717 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3718 &view, view.len, 'C') < 0)
3719 goto fail;
3720 PyBuffer_Release(&view);
3721 return new;
3722 fail:
3723 Py_XDECREF(new);
3724 PyBuffer_Release(&view);
3725 return NULL;
3726 }
3727 if (PyUnicode_Check(x)) {
3728 PyErr_SetString(PyExc_TypeError,
3729 "cannot convert unicode object to bytes");
3730 return NULL;
3731 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003732
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003733 if (PyList_CheckExact(x)) {
3734 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3735 if (new == NULL)
3736 return NULL;
3737 for (i = 0; i < Py_SIZE(x); i++) {
3738 Py_ssize_t value = PyNumber_AsSsize_t(
3739 PyList_GET_ITEM(x, i), PyExc_ValueError);
3740 if (value == -1 && PyErr_Occurred()) {
3741 Py_DECREF(new);
3742 return NULL;
3743 }
3744 if (value < 0 || value >= 256) {
3745 PyErr_SetString(PyExc_ValueError,
3746 "bytes must be in range(0, 256)");
3747 Py_DECREF(new);
3748 return NULL;
3749 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003750 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003751 }
3752 return new;
3753 }
3754 if (PyTuple_CheckExact(x)) {
3755 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3756 if (new == NULL)
3757 return NULL;
3758 for (i = 0; i < Py_SIZE(x); i++) {
3759 Py_ssize_t value = PyNumber_AsSsize_t(
3760 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
3761 if (value == -1 && PyErr_Occurred()) {
3762 Py_DECREF(new);
3763 return NULL;
3764 }
3765 if (value < 0 || value >= 256) {
3766 PyErr_SetString(PyExc_ValueError,
3767 "bytes must be in range(0, 256)");
3768 Py_DECREF(new);
3769 return NULL;
3770 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003771 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003772 }
3773 return new;
3774 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00003775
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003776 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003777 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003778 if (size == -1 && PyErr_Occurred())
3779 return NULL;
3780 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
3781 returning a shared empty bytes string. This required because we
3782 want to call _PyBytes_Resize() the returned object, which we can
3783 only do on bytes objects with refcount == 1. */
Victor Stinner88d146b2014-08-17 21:12:18 +02003784 if (size == 0)
3785 size = 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003786 new = PyBytes_FromStringAndSize(NULL, size);
3787 if (new == NULL)
3788 return NULL;
Victor Stinner88d146b2014-08-17 21:12:18 +02003789 assert(Py_REFCNT(new) == 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003790
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003791 /* Get the iterator */
3792 it = PyObject_GetIter(x);
3793 if (it == NULL)
3794 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003795
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003796 /* Run the iterator to exhaustion */
3797 for (i = 0; ; i++) {
3798 PyObject *item;
3799 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003800
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003801 /* Get the next item */
3802 item = PyIter_Next(it);
3803 if (item == NULL) {
3804 if (PyErr_Occurred())
3805 goto error;
3806 break;
3807 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003808
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003809 /* Interpret it as an int (__index__) */
3810 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3811 Py_DECREF(item);
3812 if (value == -1 && PyErr_Occurred())
3813 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003814
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003815 /* Range check */
3816 if (value < 0 || value >= 256) {
3817 PyErr_SetString(PyExc_ValueError,
3818 "bytes must be in range(0, 256)");
3819 goto error;
3820 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003821
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003822 /* Append the byte */
3823 if (i >= size) {
3824 size = 2 * size + 1;
3825 if (_PyBytes_Resize(&new, size) < 0)
3826 goto error;
3827 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003828 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003829 }
3830 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003831
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003832 /* Clean up and return success */
3833 Py_DECREF(it);
3834 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003835
3836 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003837 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01003838 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003839 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003840}
3841
3842static PyObject *
3843str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3844{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003845 PyObject *tmp, *pnew;
3846 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003847
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003848 assert(PyType_IsSubtype(type, &PyBytes_Type));
3849 tmp = bytes_new(&PyBytes_Type, args, kwds);
3850 if (tmp == NULL)
3851 return NULL;
3852 assert(PyBytes_CheckExact(tmp));
3853 n = PyBytes_GET_SIZE(tmp);
3854 pnew = type->tp_alloc(type, n);
3855 if (pnew != NULL) {
3856 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3857 PyBytes_AS_STRING(tmp), n+1);
3858 ((PyBytesObject *)pnew)->ob_shash =
3859 ((PyBytesObject *)tmp)->ob_shash;
3860 }
3861 Py_DECREF(tmp);
3862 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003863}
3864
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003865PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003866"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003867bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003868bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003869bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3870bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003871\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003872Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003873 - an iterable yielding integers in range(256)\n\
3874 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003875 - any object implementing the buffer API.\n\
3876 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003877
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003878static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003879
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003880PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003881 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3882 "bytes",
3883 PyBytesObject_SIZE,
3884 sizeof(char),
3885 bytes_dealloc, /* tp_dealloc */
3886 0, /* tp_print */
3887 0, /* tp_getattr */
3888 0, /* tp_setattr */
3889 0, /* tp_reserved */
3890 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08003891 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003892 &bytes_as_sequence, /* tp_as_sequence */
3893 &bytes_as_mapping, /* tp_as_mapping */
3894 (hashfunc)bytes_hash, /* tp_hash */
3895 0, /* tp_call */
3896 bytes_str, /* tp_str */
3897 PyObject_GenericGetAttr, /* tp_getattro */
3898 0, /* tp_setattro */
3899 &bytes_as_buffer, /* tp_as_buffer */
3900 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3901 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3902 bytes_doc, /* tp_doc */
3903 0, /* tp_traverse */
3904 0, /* tp_clear */
3905 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3906 0, /* tp_weaklistoffset */
3907 bytes_iter, /* tp_iter */
3908 0, /* tp_iternext */
3909 bytes_methods, /* tp_methods */
3910 0, /* tp_members */
3911 0, /* tp_getset */
3912 &PyBaseObject_Type, /* tp_base */
3913 0, /* tp_dict */
3914 0, /* tp_descr_get */
3915 0, /* tp_descr_set */
3916 0, /* tp_dictoffset */
3917 0, /* tp_init */
3918 0, /* tp_alloc */
3919 bytes_new, /* tp_new */
3920 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003921};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003922
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003923void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003924PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003925{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003926 assert(pv != NULL);
3927 if (*pv == NULL)
3928 return;
3929 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003930 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003931 return;
3932 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003933
3934 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3935 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003936 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003937 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02003938
Antoine Pitrou161d6952014-05-01 14:36:20 +02003939 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003940 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02003941 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3942 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3943 Py_CLEAR(*pv);
3944 return;
3945 }
3946
3947 oldsize = PyBytes_GET_SIZE(*pv);
3948 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3949 PyErr_NoMemory();
3950 goto error;
3951 }
3952 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3953 goto error;
3954
3955 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3956 PyBuffer_Release(&wb);
3957 return;
3958
3959 error:
3960 PyBuffer_Release(&wb);
3961 Py_CLEAR(*pv);
3962 return;
3963 }
3964
3965 else {
3966 /* Multiple references, need to create new object */
3967 PyObject *v;
3968 v = bytes_concat(*pv, w);
3969 Py_DECREF(*pv);
3970 *pv = v;
3971 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003972}
3973
3974void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003975PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003976{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003977 PyBytes_Concat(pv, w);
3978 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003979}
3980
3981
Ethan Furmanb95b5612015-01-23 20:05:18 -08003982/* The following function breaks the notion that bytes are immutable:
3983 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003984 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08003985 as creating a new bytes object and destroying the old one, only
3986 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003987 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08003988 Note that if there's not enough memory to resize the bytes object, the
3989 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003990 memory" exception is set, and -1 is returned. Else (on success) 0 is
3991 returned, and the value in *pv may or may not be the same as on input.
3992 As always, an extra byte is allocated for a trailing \0 byte (newsize
3993 does *not* include that), and a trailing \0 byte is stored.
3994*/
3995
3996int
3997_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3998{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003999 PyObject *v;
4000 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004001 v = *pv;
4002 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
4003 *pv = 0;
4004 Py_DECREF(v);
4005 PyErr_BadInternalCall();
4006 return -1;
4007 }
4008 /* XXX UNREF/NEWREF interface should be more symmetrical */
4009 _Py_DEC_REFTOTAL;
4010 _Py_ForgetReference(v);
4011 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03004012 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004013 if (*pv == NULL) {
4014 PyObject_Del(v);
4015 PyErr_NoMemory();
4016 return -1;
4017 }
4018 _Py_NewReference(*pv);
4019 sv = (PyBytesObject *) *pv;
4020 Py_SIZE(sv) = newsize;
4021 sv->ob_sval[newsize] = '\0';
4022 sv->ob_shash = -1; /* invalidate cached hash value */
4023 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004024}
4025
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004026void
4027PyBytes_Fini(void)
4028{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004029 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02004030 for (i = 0; i < UCHAR_MAX + 1; i++)
4031 Py_CLEAR(characters[i]);
4032 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004033}
4034
Benjamin Peterson4116f362008-05-27 00:36:20 +00004035/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004036
4037typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004038 PyObject_HEAD
4039 Py_ssize_t it_index;
4040 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004041} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004042
4043static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004044striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004045{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004046 _PyObject_GC_UNTRACK(it);
4047 Py_XDECREF(it->it_seq);
4048 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004049}
4050
4051static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004052striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004053{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004054 Py_VISIT(it->it_seq);
4055 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004056}
4057
4058static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004059striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004060{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004061 PyBytesObject *seq;
4062 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004063
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004064 assert(it != NULL);
4065 seq = it->it_seq;
4066 if (seq == NULL)
4067 return NULL;
4068 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004069
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004070 if (it->it_index < PyBytes_GET_SIZE(seq)) {
4071 item = PyLong_FromLong(
4072 (unsigned char)seq->ob_sval[it->it_index]);
4073 if (item != NULL)
4074 ++it->it_index;
4075 return item;
4076 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004077
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004078 Py_DECREF(seq);
4079 it->it_seq = NULL;
4080 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004081}
4082
4083static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004084striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004085{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004086 Py_ssize_t len = 0;
4087 if (it->it_seq)
4088 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
4089 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004090}
4091
4092PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004093 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004094
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004095static PyObject *
4096striter_reduce(striterobject *it)
4097{
4098 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02004099 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004100 it->it_seq, it->it_index);
4101 } else {
4102 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
4103 if (u == NULL)
4104 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02004105 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004106 }
4107}
4108
4109PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
4110
4111static PyObject *
4112striter_setstate(striterobject *it, PyObject *state)
4113{
4114 Py_ssize_t index = PyLong_AsSsize_t(state);
4115 if (index == -1 && PyErr_Occurred())
4116 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00004117 if (it->it_seq != NULL) {
4118 if (index < 0)
4119 index = 0;
4120 else if (index > PyBytes_GET_SIZE(it->it_seq))
4121 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
4122 it->it_index = index;
4123 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004124 Py_RETURN_NONE;
4125}
4126
4127PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
4128
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004129static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004130 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
4131 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004132 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
4133 reduce_doc},
4134 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
4135 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004136 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004137};
4138
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004139PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004140 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4141 "bytes_iterator", /* tp_name */
4142 sizeof(striterobject), /* tp_basicsize */
4143 0, /* tp_itemsize */
4144 /* methods */
4145 (destructor)striter_dealloc, /* tp_dealloc */
4146 0, /* tp_print */
4147 0, /* tp_getattr */
4148 0, /* tp_setattr */
4149 0, /* tp_reserved */
4150 0, /* tp_repr */
4151 0, /* tp_as_number */
4152 0, /* tp_as_sequence */
4153 0, /* tp_as_mapping */
4154 0, /* tp_hash */
4155 0, /* tp_call */
4156 0, /* tp_str */
4157 PyObject_GenericGetAttr, /* tp_getattro */
4158 0, /* tp_setattro */
4159 0, /* tp_as_buffer */
4160 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
4161 0, /* tp_doc */
4162 (traverseproc)striter_traverse, /* tp_traverse */
4163 0, /* tp_clear */
4164 0, /* tp_richcompare */
4165 0, /* tp_weaklistoffset */
4166 PyObject_SelfIter, /* tp_iter */
4167 (iternextfunc)striter_next, /* tp_iternext */
4168 striter_methods, /* tp_methods */
4169 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004170};
4171
4172static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00004173bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004174{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004175 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004176
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004177 if (!PyBytes_Check(seq)) {
4178 PyErr_BadInternalCall();
4179 return NULL;
4180 }
4181 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
4182 if (it == NULL)
4183 return NULL;
4184 it->it_index = 0;
4185 Py_INCREF(seq);
4186 it->it_seq = (PyBytesObject *)seq;
4187 _PyObject_GC_TRACK(it);
4188 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004189}