blob: 673bb00b984370dadf8cd5f7367b1619ab97d96b [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +00008#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00009#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000010
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020011/*[clinic input]
Martin v. Löwis0efea322014-07-27 17:29:17 +020012class bytes "PyBytesObject*" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020013[clinic start generated code]*/
Martin v. Löwis0efea322014-07-27 17:29:17 +020014/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030016#include "clinic/bytesobject.c.h"
17
Christian Heimes2c9c7a52008-05-26 13:42:13 +000018#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000019Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000020#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000021
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022static PyBytesObject *characters[UCHAR_MAX + 1];
23static PyBytesObject *nullstring;
24
Mark Dickinsonfd24b322008-12-06 15:33:31 +000025/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
26 for a string of length n should request PyBytesObject_SIZE + n bytes.
27
28 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29 3 bytes per string allocation on a typical system.
30*/
31#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32
Christian Heimes2c9c7a52008-05-26 13:42:13 +000033/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000034 For PyBytes_FromString(), the parameter `str' points to a null-terminated
35 string containing exactly `size' bytes.
36
Martin Pantera90a4a92016-05-30 04:04:50 +000037 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000038 either NULL or else points to a string containing at least `size' bytes.
39 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
40 not have to be null-terminated. (Therefore it is safe to construct a
41 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
42 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
43 bytes (setting the last byte to the null terminating character) and you can
44 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000045 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000046 alter the data yourself, since the strings may be shared.
47
48 The PyObject member `op->ob_size', which denotes the number of "extra
49 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020050 allocated for string data, not counting the null terminating character.
51 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000052 PyBytes_FromStringAndSize()) or the length of the string in the `str'
53 parameter (for PyBytes_FromString()).
54*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020055static PyObject *
56_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000057{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020058 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020059 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020060
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000061 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000062#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000063 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000064#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 Py_INCREF(op);
66 return (PyObject *)op;
67 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000068
Victor Stinner049e5092014-08-17 22:20:00 +020069 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000070 PyErr_SetString(PyExc_OverflowError,
71 "byte string is too large");
72 return NULL;
73 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000074
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000075 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020076 if (use_calloc)
77 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
78 else
79 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 if (op == NULL)
81 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010082 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020084 if (!use_calloc)
85 op->ob_sval[size] = '\0';
86 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 if (size == 0) {
88 nullstring = op;
89 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020090 }
91 return (PyObject *) op;
92}
93
94PyObject *
95PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
96{
97 PyBytesObject *op;
98 if (size < 0) {
99 PyErr_SetString(PyExc_SystemError,
100 "Negative size passed to PyBytes_FromStringAndSize");
101 return NULL;
102 }
103 if (size == 1 && str != NULL &&
104 (op = characters[*str & UCHAR_MAX]) != NULL)
105 {
106#ifdef COUNT_ALLOCS
107 one_strings++;
108#endif
109 Py_INCREF(op);
110 return (PyObject *)op;
111 }
112
113 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
114 if (op == NULL)
115 return NULL;
116 if (str == NULL)
117 return (PyObject *) op;
118
119 Py_MEMCPY(op->ob_sval, str, size);
120 /* share short strings */
121 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 characters[*str & UCHAR_MAX] = op;
123 Py_INCREF(op);
124 }
125 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000126}
127
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000128PyObject *
129PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000130{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200131 size_t size;
132 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 assert(str != NULL);
135 size = strlen(str);
136 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
137 PyErr_SetString(PyExc_OverflowError,
138 "byte string is too long");
139 return NULL;
140 }
141 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000144#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
148 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000151#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 Py_INCREF(op);
153 return (PyObject *)op;
154 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 /* Inline PyObject_NewVar */
157 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
158 if (op == NULL)
159 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100160 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 op->ob_shash = -1;
162 Py_MEMCPY(op->ob_sval, str, size+1);
163 /* share short strings */
164 if (size == 0) {
165 nullstring = op;
166 Py_INCREF(op);
167 } else if (size == 1) {
168 characters[*str & UCHAR_MAX] = op;
169 Py_INCREF(op);
170 }
171 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000172}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000173
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000174PyObject *
175PyBytes_FromFormatV(const char *format, va_list vargs)
176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 va_list count;
178 Py_ssize_t n = 0;
179 const char* f;
180 char *s;
181 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000182
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000183 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 /* step 1: figure out how large a buffer we need */
185 for (f = format; *f; f++) {
186 if (*f == '%') {
187 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000188 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
192 * they don't affect the amount of space we reserve.
193 */
194 if ((*f == 'l' || *f == 'z') &&
195 (f[1] == 'd' || f[1] == 'u'))
196 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000197
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000198 switch (*f) {
199 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100200 {
201 int c = va_arg(count, int);
202 if (c < 0 || c > 255) {
203 PyErr_SetString(PyExc_OverflowError,
204 "PyBytes_FromFormatV(): %c format "
205 "expects an integer in range [0; 255]");
206 return NULL;
207 }
208 n++;
209 break;
210 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 case '%':
212 n++;
213 break;
214 case 'd': case 'u': case 'i': case 'x':
215 (void) va_arg(count, int);
216 /* 20 bytes is enough to hold a 64-bit
217 integer. Decimal takes the most space.
218 This isn't enough for octal. */
219 n += 20;
220 break;
221 case 's':
222 s = va_arg(count, char*);
223 n += strlen(s);
224 break;
225 case 'p':
226 (void) va_arg(count, int);
227 /* maximum 64-bit pointer representation:
228 * 0xffffffffffffffff
229 * so 19 characters is enough.
230 * XXX I count 18 -- what's the extra for?
231 */
232 n += 19;
233 break;
234 default:
235 /* if we stumble upon an unknown
236 formatting code, copy the rest of
237 the format string to the output
238 string. (we cannot just skip the
239 code, since there's no way to know
240 what's in the argument list) */
241 n += strlen(p);
242 goto expand;
243 }
244 } else
245 n++;
246 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000247 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 /* step 2: fill the buffer */
249 /* Since we've analyzed how much space we need for the worst case,
250 use sprintf directly instead of the slower PyOS_snprintf. */
251 string = PyBytes_FromStringAndSize(NULL, n);
252 if (!string)
253 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000254
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000256
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000257 for (f = format; *f; f++) {
258 if (*f == '%') {
259 const char* p = f++;
260 Py_ssize_t i;
261 int longflag = 0;
262 int size_tflag = 0;
263 /* parse the width.precision part (we're only
264 interested in the precision value, if any) */
265 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000266 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000267 n = (n*10) + *f++ - '0';
268 if (*f == '.') {
269 f++;
270 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000271 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 n = (n*10) + *f++ - '0';
273 }
David Malcolm96960882010-11-05 17:23:41 +0000274 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 f++;
276 /* handle the long flag, but only for %ld and %lu.
277 others can be added when necessary. */
278 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
279 longflag = 1;
280 ++f;
281 }
282 /* handle the size_t flag. */
283 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
284 size_tflag = 1;
285 ++f;
286 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000287
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000288 switch (*f) {
289 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100290 {
291 int c = va_arg(vargs, int);
292 /* c has been checked for overflow in the first step */
293 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000294 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100295 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 case 'd':
297 if (longflag)
298 sprintf(s, "%ld", va_arg(vargs, long));
299 else if (size_tflag)
300 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
301 va_arg(vargs, Py_ssize_t));
302 else
303 sprintf(s, "%d", va_arg(vargs, int));
304 s += strlen(s);
305 break;
306 case 'u':
307 if (longflag)
308 sprintf(s, "%lu",
309 va_arg(vargs, unsigned long));
310 else if (size_tflag)
311 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
312 va_arg(vargs, size_t));
313 else
314 sprintf(s, "%u",
315 va_arg(vargs, unsigned int));
316 s += strlen(s);
317 break;
318 case 'i':
319 sprintf(s, "%i", va_arg(vargs, int));
320 s += strlen(s);
321 break;
322 case 'x':
323 sprintf(s, "%x", va_arg(vargs, int));
324 s += strlen(s);
325 break;
326 case 's':
327 p = va_arg(vargs, char*);
328 i = strlen(p);
329 if (n > 0 && i > n)
330 i = n;
331 Py_MEMCPY(s, p, i);
332 s += i;
333 break;
334 case 'p':
335 sprintf(s, "%p", va_arg(vargs, void*));
336 /* %p is ill-defined: ensure leading 0x. */
337 if (s[1] == 'X')
338 s[1] = 'x';
339 else if (s[1] != 'x') {
340 memmove(s+2, s, strlen(s)+1);
341 s[0] = '0';
342 s[1] = 'x';
343 }
344 s += strlen(s);
345 break;
346 case '%':
347 *s++ = '%';
348 break;
349 default:
350 strcpy(s, p);
351 s += strlen(s);
352 goto end;
353 }
354 } else
355 *s++ = *f;
356 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000357
358 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
360 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361}
362
363PyObject *
364PyBytes_FromFormat(const char *format, ...)
365{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 PyObject* ret;
367 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368
369#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000373#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000374 ret = PyBytes_FromFormatV(format, vargs);
375 va_end(vargs);
376 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000377}
378
Ethan Furmanb95b5612015-01-23 20:05:18 -0800379/* Helpers for formatstring */
380
381Py_LOCAL_INLINE(PyObject *)
382getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
383{
384 Py_ssize_t argidx = *p_argidx;
385 if (argidx < arglen) {
386 (*p_argidx)++;
387 if (arglen < 0)
388 return args;
389 else
390 return PyTuple_GetItem(args, argidx);
391 }
392 PyErr_SetString(PyExc_TypeError,
393 "not enough arguments for format string");
394 return NULL;
395}
396
397/* Format codes
398 * F_LJUST '-'
399 * F_SIGN '+'
400 * F_BLANK ' '
401 * F_ALT '#'
402 * F_ZERO '0'
403 */
404#define F_LJUST (1<<0)
405#define F_SIGN (1<<1)
406#define F_BLANK (1<<2)
407#define F_ALT (1<<3)
408#define F_ZERO (1<<4)
409
410/* Returns a new reference to a PyBytes object, or NULL on failure. */
411
412static PyObject *
413formatfloat(PyObject *v, int flags, int prec, int type)
414{
415 char *p;
416 PyObject *result;
417 double x;
418
419 x = PyFloat_AsDouble(v);
420 if (x == -1.0 && PyErr_Occurred()) {
421 PyErr_Format(PyExc_TypeError, "float argument required, "
422 "not %.200s", Py_TYPE(v)->tp_name);
423 return NULL;
424 }
425
426 if (prec < 0)
427 prec = 6;
428
429 p = PyOS_double_to_string(x, type, prec,
430 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
431
432 if (p == NULL)
433 return NULL;
434 result = PyBytes_FromStringAndSize(p, strlen(p));
435 PyMem_Free(p);
436 return result;
437}
438
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300439static PyObject *
440formatlong(PyObject *v, int flags, int prec, int type)
441{
442 PyObject *result, *iobj;
443 if (type == 'i')
444 type = 'd';
445 if (PyLong_Check(v))
446 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
447 if (PyNumber_Check(v)) {
448 /* make sure number is a type of integer for o, x, and X */
449 if (type == 'o' || type == 'x' || type == 'X')
450 iobj = PyNumber_Index(v);
451 else
452 iobj = PyNumber_Long(v);
453 if (iobj == NULL) {
454 if (!PyErr_ExceptionMatches(PyExc_TypeError))
455 return NULL;
456 }
457 else if (!PyLong_Check(iobj))
458 Py_CLEAR(iobj);
459 if (iobj != NULL) {
460 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
461 Py_DECREF(iobj);
462 return result;
463 }
464 }
465 PyErr_Format(PyExc_TypeError,
466 "%%%c format: %s is required, not %.200s", type,
467 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
468 : "a number",
469 Py_TYPE(v)->tp_name);
470 return NULL;
471}
472
473static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200474byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800475{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200476 if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) {
477 *p = PyBytes_AS_STRING(arg)[0];
478 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800479 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200480 else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) {
481 *p = PyByteArray_AS_STRING(arg)[0];
482 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800483 }
484 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300485 PyObject *iobj;
486 long ival;
487 int overflow;
488 /* make sure number is a type of integer */
489 if (PyLong_Check(arg)) {
490 ival = PyLong_AsLongAndOverflow(arg, &overflow);
491 }
492 else {
493 iobj = PyNumber_Index(arg);
494 if (iobj == NULL) {
495 if (!PyErr_ExceptionMatches(PyExc_TypeError))
496 return 0;
497 goto onError;
498 }
499 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
500 Py_DECREF(iobj);
501 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300502 if (!overflow && ival == -1 && PyErr_Occurred())
503 goto onError;
504 if (overflow || !(0 <= ival && ival <= 255)) {
505 PyErr_SetString(PyExc_OverflowError,
506 "%c arg not in range(256)");
507 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800508 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300509 *p = (char)ival;
510 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800511 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300512 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200513 PyErr_SetString(PyExc_TypeError,
514 "%c requires an integer in range(256) or a single byte");
515 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800516}
517
518static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200519format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800520{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200521 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800522 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800523 /* is it a bytes object? */
524 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200525 *pbuf = PyBytes_AS_STRING(v);
526 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800527 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200528 return v;
529 }
530 if (PyByteArray_Check(v)) {
531 *pbuf = PyByteArray_AS_STRING(v);
532 *plen = PyByteArray_GET_SIZE(v);
533 Py_INCREF(v);
534 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800535 }
536 /* does it support __bytes__? */
537 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
538 if (func != NULL) {
539 result = PyObject_CallFunctionObjArgs(func, NULL);
540 Py_DECREF(func);
541 if (result == NULL)
542 return NULL;
543 if (!PyBytes_Check(result)) {
544 PyErr_Format(PyExc_TypeError,
545 "__bytes__ returned non-bytes (type %.200s)",
546 Py_TYPE(result)->tp_name);
547 Py_DECREF(result);
548 return NULL;
549 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200550 *pbuf = PyBytes_AS_STRING(result);
551 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800552 return result;
553 }
554 PyErr_Format(PyExc_TypeError,
555 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
556 Py_TYPE(v)->tp_name);
557 return NULL;
558}
559
560/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
561
562 FORMATBUFLEN is the length of the buffer in which the ints &
563 chars are formatted. XXX This is a magic number. Each formatting
564 routine does bounds checking to ensure no overflow, but a better
565 solution may be to malloc a buffer of appropriate size for each
566 format. For now, the current solution is sufficient.
567*/
568#define FORMATBUFLEN (size_t)120
569
570PyObject *
571_PyBytes_Format(PyObject *format, PyObject *args)
572{
573 char *fmt, *res;
574 Py_ssize_t arglen, argidx;
575 Py_ssize_t reslen, rescnt, fmtcnt;
576 int args_owned = 0;
577 PyObject *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800578 PyObject *dict = NULL;
579 if (format == NULL || !PyBytes_Check(format) || args == NULL) {
580 PyErr_BadInternalCall();
581 return NULL;
582 }
583 fmt = PyBytes_AS_STRING(format);
584 fmtcnt = PyBytes_GET_SIZE(format);
585 reslen = rescnt = fmtcnt + 100;
586 result = PyBytes_FromStringAndSize((char *)NULL, reslen);
587 if (result == NULL)
588 return NULL;
589 res = PyBytes_AsString(result);
590 if (PyTuple_Check(args)) {
591 arglen = PyTuple_GET_SIZE(args);
592 argidx = 0;
593 }
594 else {
595 arglen = -1;
596 argidx = -2;
597 }
598 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
599 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
600 !PyByteArray_Check(args)) {
601 dict = args;
602 }
603 while (--fmtcnt >= 0) {
604 if (*fmt != '%') {
605 if (--rescnt < 0) {
606 rescnt = fmtcnt + 100;
607 reslen += rescnt;
608 if (_PyBytes_Resize(&result, reslen))
609 return NULL;
610 res = PyBytes_AS_STRING(result)
611 + reslen - rescnt;
612 --rescnt;
613 }
614 *res++ = *fmt++;
615 }
616 else {
617 /* Got a format specifier */
618 int flags = 0;
619 Py_ssize_t width = -1;
620 int prec = -1;
621 int c = '\0';
622 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800623 PyObject *v = NULL;
624 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200625 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800626 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200627 Py_ssize_t len = 0;
628 char onechar; /* For byte_converter() */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800629
Ethan Furmanb95b5612015-01-23 20:05:18 -0800630 fmt++;
631 if (*fmt == '(') {
632 char *keystart;
633 Py_ssize_t keylen;
634 PyObject *key;
635 int pcount = 1;
636
637 if (dict == NULL) {
638 PyErr_SetString(PyExc_TypeError,
639 "format requires a mapping");
640 goto error;
641 }
642 ++fmt;
643 --fmtcnt;
644 keystart = fmt;
645 /* Skip over balanced parentheses */
646 while (pcount > 0 && --fmtcnt >= 0) {
647 if (*fmt == ')')
648 --pcount;
649 else if (*fmt == '(')
650 ++pcount;
651 fmt++;
652 }
653 keylen = fmt - keystart - 1;
654 if (fmtcnt < 0 || pcount > 0) {
655 PyErr_SetString(PyExc_ValueError,
656 "incomplete format key");
657 goto error;
658 }
659 key = PyBytes_FromStringAndSize(keystart,
660 keylen);
661 if (key == NULL)
662 goto error;
663 if (args_owned) {
664 Py_DECREF(args);
665 args_owned = 0;
666 }
667 args = PyObject_GetItem(dict, key);
668 Py_DECREF(key);
669 if (args == NULL) {
670 goto error;
671 }
672 args_owned = 1;
673 arglen = -1;
674 argidx = -2;
675 }
676 while (--fmtcnt >= 0) {
677 switch (c = *fmt++) {
678 case '-': flags |= F_LJUST; continue;
679 case '+': flags |= F_SIGN; continue;
680 case ' ': flags |= F_BLANK; continue;
681 case '#': flags |= F_ALT; continue;
682 case '0': flags |= F_ZERO; continue;
683 }
684 break;
685 }
686 if (c == '*') {
687 v = getnextarg(args, arglen, &argidx);
688 if (v == NULL)
689 goto error;
690 if (!PyLong_Check(v)) {
691 PyErr_SetString(PyExc_TypeError,
692 "* wants int");
693 goto error;
694 }
695 width = PyLong_AsSsize_t(v);
696 if (width == -1 && PyErr_Occurred())
697 goto error;
698 if (width < 0) {
699 flags |= F_LJUST;
700 width = -width;
701 }
702 if (--fmtcnt >= 0)
703 c = *fmt++;
704 }
705 else if (c >= 0 && isdigit(c)) {
706 width = c - '0';
707 while (--fmtcnt >= 0) {
708 c = Py_CHARMASK(*fmt++);
709 if (!isdigit(c))
710 break;
711 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
712 PyErr_SetString(
713 PyExc_ValueError,
714 "width too big");
715 goto error;
716 }
717 width = width*10 + (c - '0');
718 }
719 }
720 if (c == '.') {
721 prec = 0;
722 if (--fmtcnt >= 0)
723 c = *fmt++;
724 if (c == '*') {
725 v = getnextarg(args, arglen, &argidx);
726 if (v == NULL)
727 goto error;
728 if (!PyLong_Check(v)) {
729 PyErr_SetString(
730 PyExc_TypeError,
731 "* wants int");
732 goto error;
733 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200734 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800735 if (prec == -1 && PyErr_Occurred())
736 goto error;
737 if (prec < 0)
738 prec = 0;
739 if (--fmtcnt >= 0)
740 c = *fmt++;
741 }
742 else if (c >= 0 && isdigit(c)) {
743 prec = c - '0';
744 while (--fmtcnt >= 0) {
745 c = Py_CHARMASK(*fmt++);
746 if (!isdigit(c))
747 break;
748 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
749 PyErr_SetString(
750 PyExc_ValueError,
751 "prec too big");
752 goto error;
753 }
754 prec = prec*10 + (c - '0');
755 }
756 }
757 } /* prec */
758 if (fmtcnt >= 0) {
759 if (c == 'h' || c == 'l' || c == 'L') {
760 if (--fmtcnt >= 0)
761 c = *fmt++;
762 }
763 }
764 if (fmtcnt < 0) {
765 PyErr_SetString(PyExc_ValueError,
766 "incomplete format");
767 goto error;
768 }
769 if (c != '%') {
770 v = getnextarg(args, arglen, &argidx);
771 if (v == NULL)
772 goto error;
773 }
774 sign = 0;
775 fill = ' ';
776 switch (c) {
777 case '%':
778 pbuf = "%";
779 len = 1;
780 break;
Ethan Furman62e977f2015-03-11 08:17:00 -0700781 case 'r':
782 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800783 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200784 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800785 if (temp == NULL)
786 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200787 assert(PyUnicode_IS_ASCII(temp));
788 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
789 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800790 if (prec >= 0 && len > prec)
791 len = prec;
792 break;
793 case 's':
794 // %s is only for 2/3 code; 3 only code should use %b
795 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200796 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800797 if (temp == NULL)
798 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800799 if (prec >= 0 && len > prec)
800 len = prec;
801 break;
802 case 'i':
803 case 'd':
804 case 'u':
805 case 'o':
806 case 'x':
807 case 'X':
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300808 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200809 if (!temp)
810 goto error;
811 assert(PyUnicode_IS_ASCII(temp));
812 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
813 len = PyUnicode_GET_LENGTH(temp);
814 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800815 if (flags & F_ZERO)
816 fill = '0';
817 break;
818 case 'e':
819 case 'E':
820 case 'f':
821 case 'F':
822 case 'g':
823 case 'G':
824 temp = formatfloat(v, flags, prec, c);
825 if (temp == NULL)
826 goto error;
827 pbuf = PyBytes_AS_STRING(temp);
828 len = PyBytes_GET_SIZE(temp);
829 sign = 1;
830 if (flags & F_ZERO)
831 fill = '0';
832 break;
833 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200834 pbuf = &onechar;
835 len = byte_converter(v, &onechar);
836 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800837 goto error;
838 break;
839 default:
840 PyErr_Format(PyExc_ValueError,
841 "unsupported format character '%c' (0x%x) "
842 "at index %zd",
843 c, c,
844 (Py_ssize_t)(fmt - 1 -
845 PyBytes_AsString(format)));
846 goto error;
847 }
848 if (sign) {
849 if (*pbuf == '-' || *pbuf == '+') {
850 sign = *pbuf++;
851 len--;
852 }
853 else if (flags & F_SIGN)
854 sign = '+';
855 else if (flags & F_BLANK)
856 sign = ' ';
857 else
858 sign = 0;
859 }
860 if (width < len)
861 width = len;
862 if (rescnt - (sign != 0) < width) {
863 reslen -= rescnt;
864 rescnt = width + fmtcnt + 100;
865 reslen += rescnt;
866 if (reslen < 0) {
867 Py_DECREF(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800868 Py_XDECREF(temp);
869 return PyErr_NoMemory();
870 }
871 if (_PyBytes_Resize(&result, reslen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800872 Py_XDECREF(temp);
873 return NULL;
874 }
875 res = PyBytes_AS_STRING(result)
876 + reslen - rescnt;
877 }
878 if (sign) {
879 if (fill != ' ')
880 *res++ = sign;
881 rescnt--;
882 if (width > len)
883 width--;
884 }
Serhiy Storchakab1a16192016-12-17 21:48:03 +0200885 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800886 assert(pbuf[0] == '0');
887 assert(pbuf[1] == c);
888 if (fill != ' ') {
889 *res++ = *pbuf++;
890 *res++ = *pbuf++;
891 }
892 rescnt -= 2;
893 width -= 2;
894 if (width < 0)
895 width = 0;
896 len -= 2;
897 }
898 if (width > len && !(flags & F_LJUST)) {
899 do {
900 --rescnt;
901 *res++ = fill;
902 } while (--width > len);
903 }
904 if (fill == ' ') {
905 if (sign)
906 *res++ = sign;
Serhiy Storchakab1a16192016-12-17 21:48:03 +0200907 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800908 assert(pbuf[0] == '0');
909 assert(pbuf[1] == c);
910 *res++ = *pbuf++;
911 *res++ = *pbuf++;
912 }
913 }
914 Py_MEMCPY(res, pbuf, len);
915 res += len;
916 rescnt -= len;
917 while (--width >= len) {
918 --rescnt;
919 *res++ = ' ';
920 }
921 if (dict && (argidx < arglen) && c != '%') {
922 PyErr_SetString(PyExc_TypeError,
923 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -0800924 Py_XDECREF(temp);
925 goto error;
926 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800927 Py_XDECREF(temp);
928 } /* '%' */
929 } /* until end */
930 if (argidx < arglen && !dict) {
931 PyErr_SetString(PyExc_TypeError,
932 "not all arguments converted during bytes formatting");
933 goto error;
934 }
935 if (args_owned) {
936 Py_DECREF(args);
937 }
938 if (_PyBytes_Resize(&result, reslen - rescnt))
939 return NULL;
940 return result;
941
942 error:
943 Py_DECREF(result);
944 if (args_owned) {
945 Py_DECREF(args);
946 }
947 return NULL;
948}
949
950/* =-= */
951
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000952static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000953bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000954{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000955 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000956}
957
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000958/* Unescape a backslash-escaped string. If unicode is non-zero,
959 the string is a u-literal. If recode_encoding is non-zero,
960 the string is UTF-8 encoded and should be re-encoded in the
961 specified encoding. */
962
963PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000964 Py_ssize_t len,
965 const char *errors,
966 Py_ssize_t unicode,
967 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000968{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000969 int c;
970 char *p, *buf;
971 const char *end;
972 PyObject *v;
973 Py_ssize_t newlen = recode_encoding ? 4*len:len;
974 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
975 if (v == NULL)
976 return NULL;
977 p = buf = PyBytes_AsString(v);
978 end = s + len;
979 while (s < end) {
980 if (*s != '\\') {
981 non_esc:
982 if (recode_encoding && (*s & 0x80)) {
983 PyObject *u, *w;
984 char *r;
985 const char* t;
986 Py_ssize_t rn;
987 t = s;
988 /* Decode non-ASCII bytes as UTF-8. */
989 while (t < end && (*t & 0x80)) t++;
990 u = PyUnicode_DecodeUTF8(s, t - s, errors);
991 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000992
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000993 /* Recode them in target encoding. */
994 w = PyUnicode_AsEncodedString(
995 u, recode_encoding, errors);
996 Py_DECREF(u);
997 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000998
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000999 /* Append bytes to output buffer. */
1000 assert(PyBytes_Check(w));
1001 r = PyBytes_AS_STRING(w);
1002 rn = PyBytes_GET_SIZE(w);
1003 Py_MEMCPY(p, r, rn);
1004 p += rn;
1005 Py_DECREF(w);
1006 s = t;
1007 } else {
1008 *p++ = *s++;
1009 }
1010 continue;
1011 }
1012 s++;
1013 if (s==end) {
1014 PyErr_SetString(PyExc_ValueError,
1015 "Trailing \\ in string");
1016 goto failed;
1017 }
1018 switch (*s++) {
1019 /* XXX This assumes ASCII! */
1020 case '\n': break;
1021 case '\\': *p++ = '\\'; break;
1022 case '\'': *p++ = '\''; break;
1023 case '\"': *p++ = '\"'; break;
1024 case 'b': *p++ = '\b'; break;
1025 case 'f': *p++ = '\014'; break; /* FF */
1026 case 't': *p++ = '\t'; break;
1027 case 'n': *p++ = '\n'; break;
1028 case 'r': *p++ = '\r'; break;
1029 case 'v': *p++ = '\013'; break; /* VT */
1030 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1031 case '0': case '1': case '2': case '3':
1032 case '4': case '5': case '6': case '7':
1033 c = s[-1] - '0';
1034 if (s < end && '0' <= *s && *s <= '7') {
1035 c = (c<<3) + *s++ - '0';
1036 if (s < end && '0' <= *s && *s <= '7')
1037 c = (c<<3) + *s++ - '0';
1038 }
1039 *p++ = c;
1040 break;
1041 case 'x':
David Malcolm96960882010-11-05 17:23:41 +00001042 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001043 unsigned int x = 0;
1044 c = Py_CHARMASK(*s);
1045 s++;
David Malcolm96960882010-11-05 17:23:41 +00001046 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001047 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001048 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001049 x = 10 + c - 'a';
1050 else
1051 x = 10 + c - 'A';
1052 x = x << 4;
1053 c = Py_CHARMASK(*s);
1054 s++;
David Malcolm96960882010-11-05 17:23:41 +00001055 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001056 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001057 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001058 x += 10 + c - 'a';
1059 else
1060 x += 10 + c - 'A';
1061 *p++ = x;
1062 break;
1063 }
1064 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001065 PyErr_Format(PyExc_ValueError,
1066 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001067 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001068 goto failed;
1069 }
1070 if (strcmp(errors, "replace") == 0) {
1071 *p++ = '?';
1072 } else if (strcmp(errors, "ignore") == 0)
1073 /* do nothing */;
1074 else {
1075 PyErr_Format(PyExc_ValueError,
1076 "decoding error; unknown "
1077 "error handling code: %.400s",
1078 errors);
1079 goto failed;
1080 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001081 /* skip \x */
1082 if (s < end && Py_ISXDIGIT(s[0]))
1083 s++; /* and a hexdigit */
1084 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001085 default:
1086 *p++ = '\\';
1087 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001088 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001089 UTF-8 bytes may follow. */
1090 }
1091 }
1092 if (p-buf < newlen)
1093 _PyBytes_Resize(&v, p - buf);
1094 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001095 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001096 Py_DECREF(v);
1097 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001098}
1099
1100/* -------------------------------------------------------------------- */
1101/* object api */
1102
1103Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001104PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001105{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001106 if (!PyBytes_Check(op)) {
1107 PyErr_Format(PyExc_TypeError,
1108 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1109 return -1;
1110 }
1111 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001112}
1113
1114char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001115PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001116{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001117 if (!PyBytes_Check(op)) {
1118 PyErr_Format(PyExc_TypeError,
1119 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1120 return NULL;
1121 }
1122 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001123}
1124
1125int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001126PyBytes_AsStringAndSize(PyObject *obj,
1127 char **s,
1128 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001129{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001130 if (s == NULL) {
1131 PyErr_BadInternalCall();
1132 return -1;
1133 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001134
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001135 if (!PyBytes_Check(obj)) {
1136 PyErr_Format(PyExc_TypeError,
1137 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1138 return -1;
1139 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001141 *s = PyBytes_AS_STRING(obj);
1142 if (len != NULL)
1143 *len = PyBytes_GET_SIZE(obj);
1144 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001145 PyErr_SetString(PyExc_ValueError,
1146 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001147 return -1;
1148 }
1149 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001150}
Neal Norwitz6968b052007-02-27 19:02:19 +00001151
1152/* -------------------------------------------------------------------- */
1153/* Methods */
1154
Eric Smith0923d1d2009-04-16 20:16:10 +00001155#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001156
1157#include "stringlib/fastsearch.h"
1158#include "stringlib/count.h"
1159#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001160#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001161#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001162#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001163#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001164
Eric Smith0f78bff2009-11-30 01:01:42 +00001165#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001166
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001167PyObject *
1168PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001169{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001170 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001171 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001172 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001173 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001174 unsigned char quote, *s, *p;
1175
1176 /* Compute size of output string */
1177 squotes = dquotes = 0;
1178 newsize = 3; /* b'' */
1179 s = (unsigned char*)op->ob_sval;
1180 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001181 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001182 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001183 case '\'': squotes++; break;
1184 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001185 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001186 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001187 default:
1188 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001189 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001190 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001191 if (newsize > PY_SSIZE_T_MAX - incr)
1192 goto overflow;
1193 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001194 }
1195 quote = '\'';
1196 if (smartquotes && squotes && !dquotes)
1197 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001198 if (squotes && quote == '\'') {
1199 if (newsize > PY_SSIZE_T_MAX - squotes)
1200 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001201 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001202 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001203
1204 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001205 if (v == NULL) {
1206 return NULL;
1207 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001208 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001209
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001210 *p++ = 'b', *p++ = quote;
1211 for (i = 0; i < length; i++) {
1212 unsigned char c = op->ob_sval[i];
1213 if (c == quote || c == '\\')
1214 *p++ = '\\', *p++ = c;
1215 else if (c == '\t')
1216 *p++ = '\\', *p++ = 't';
1217 else if (c == '\n')
1218 *p++ = '\\', *p++ = 'n';
1219 else if (c == '\r')
1220 *p++ = '\\', *p++ = 'r';
1221 else if (c < ' ' || c >= 0x7f) {
1222 *p++ = '\\';
1223 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001224 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1225 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001227 else
1228 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001229 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001230 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001231 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001232 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001233
1234 overflow:
1235 PyErr_SetString(PyExc_OverflowError,
1236 "bytes object is too large to make repr");
1237 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001238}
1239
Neal Norwitz6968b052007-02-27 19:02:19 +00001240static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001241bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001242{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001243 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001244}
1245
Neal Norwitz6968b052007-02-27 19:02:19 +00001246static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001247bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001248{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001249 if (Py_BytesWarningFlag) {
1250 if (PyErr_WarnEx(PyExc_BytesWarning,
1251 "str() on a bytes instance", 1))
1252 return NULL;
1253 }
1254 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001255}
1256
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001257static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001258bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001259{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001260 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001261}
Neal Norwitz6968b052007-02-27 19:02:19 +00001262
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001263/* This is also used by PyBytes_Concat() */
1264static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001265bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001266{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001267 Py_buffer va, vb;
1268 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001269
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001270 va.len = -1;
1271 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001272 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1273 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001274 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1275 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1276 goto done;
1277 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001278
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001279 /* Optimize end cases */
1280 if (va.len == 0 && PyBytes_CheckExact(b)) {
1281 result = b;
1282 Py_INCREF(result);
1283 goto done;
1284 }
1285 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1286 result = a;
1287 Py_INCREF(result);
1288 goto done;
1289 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001290
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001291 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001292 PyErr_NoMemory();
1293 goto done;
1294 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001295
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001296 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001297 if (result != NULL) {
1298 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1299 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1300 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001301
1302 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001303 if (va.len != -1)
1304 PyBuffer_Release(&va);
1305 if (vb.len != -1)
1306 PyBuffer_Release(&vb);
1307 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001308}
Neal Norwitz6968b052007-02-27 19:02:19 +00001309
1310static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001311bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001312{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001313 Py_ssize_t i;
1314 Py_ssize_t j;
1315 Py_ssize_t size;
1316 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001317 size_t nbytes;
1318 if (n < 0)
1319 n = 0;
1320 /* watch out for overflows: the size can overflow int,
1321 * and the # of bytes needed can overflow size_t
1322 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001323 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001324 PyErr_SetString(PyExc_OverflowError,
1325 "repeated bytes are too long");
1326 return NULL;
1327 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001328 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001329 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1330 Py_INCREF(a);
1331 return (PyObject *)a;
1332 }
1333 nbytes = (size_t)size;
1334 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1335 PyErr_SetString(PyExc_OverflowError,
1336 "repeated bytes are too long");
1337 return NULL;
1338 }
1339 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1340 if (op == NULL)
1341 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001342 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001343 op->ob_shash = -1;
1344 op->ob_sval[size] = '\0';
1345 if (Py_SIZE(a) == 1 && n > 0) {
1346 memset(op->ob_sval, a->ob_sval[0] , n);
1347 return (PyObject *) op;
1348 }
1349 i = 0;
1350 if (i < size) {
1351 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1352 i = Py_SIZE(a);
1353 }
1354 while (i < size) {
1355 j = (i <= size-i) ? i : size-i;
1356 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1357 i += j;
1358 }
1359 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001360}
1361
Guido van Rossum98297ee2007-11-06 21:34:58 +00001362static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001363bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001364{
1365 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1366 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001367 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +00001368 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +00001369 PyErr_Clear();
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001370 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
Antoine Pitroud1188562010-06-09 16:38:55 +00001371 return -1;
1372 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
1373 varg.buf, varg.len, 0);
1374 PyBuffer_Release(&varg);
1375 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001376 }
1377 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001378 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1379 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001380 }
1381
Antoine Pitrou0010d372010-08-15 17:12:55 +00001382 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001383}
1384
Neal Norwitz6968b052007-02-27 19:02:19 +00001385static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001386bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001387{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001388 if (i < 0 || i >= Py_SIZE(a)) {
1389 PyErr_SetString(PyExc_IndexError, "index out of range");
1390 return NULL;
1391 }
1392 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001393}
1394
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001395Py_LOCAL(int)
1396bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1397{
1398 int cmp;
1399 Py_ssize_t len;
1400
1401 len = Py_SIZE(a);
1402 if (Py_SIZE(b) != len)
1403 return 0;
1404
1405 if (a->ob_sval[0] != b->ob_sval[0])
1406 return 0;
1407
1408 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1409 return (cmp == 0);
1410}
1411
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001412static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001413bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001414{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 int c;
1416 Py_ssize_t len_a, len_b;
1417 Py_ssize_t min_len;
1418 PyObject *result;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001419 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001420
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001421 /* Make sure both arguments are strings. */
1422 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001423 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001424 rc = PyObject_IsInstance((PyObject*)a,
1425 (PyObject*)&PyUnicode_Type);
1426 if (!rc)
1427 rc = PyObject_IsInstance((PyObject*)b,
1428 (PyObject*)&PyUnicode_Type);
1429 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001431 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001432 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001433 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001434 return NULL;
1435 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001436 else {
1437 rc = PyObject_IsInstance((PyObject*)a,
1438 (PyObject*)&PyLong_Type);
1439 if (!rc)
1440 rc = PyObject_IsInstance((PyObject*)b,
1441 (PyObject*)&PyLong_Type);
1442 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001443 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001444 if (rc) {
1445 if (PyErr_WarnEx(PyExc_BytesWarning,
1446 "Comparison between bytes and int", 1))
1447 return NULL;
1448 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001449 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001450 }
1451 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001452 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001453 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001454 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001455 case Py_EQ:
1456 case Py_LE:
1457 case Py_GE:
1458 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001459 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001460 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001461 case Py_NE:
1462 case Py_LT:
1463 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001464 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001465 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001466 default:
1467 PyErr_BadArgument();
1468 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001469 }
1470 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001471 else if (op == Py_EQ || op == Py_NE) {
1472 int eq = bytes_compare_eq(a, b);
1473 eq ^= (op == Py_NE);
1474 result = eq ? Py_True : Py_False;
1475 }
1476 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001477 len_a = Py_SIZE(a);
1478 len_b = Py_SIZE(b);
1479 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001480 if (min_len > 0) {
1481 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001482 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001483 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001484 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001485 else
1486 c = 0;
1487 if (c == 0)
1488 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1489 switch (op) {
1490 case Py_LT: c = c < 0; break;
1491 case Py_LE: c = c <= 0; break;
1492 case Py_GT: c = c > 0; break;
1493 case Py_GE: c = c >= 0; break;
1494 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001495 PyErr_BadArgument();
1496 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001497 }
1498 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001499 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001500
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001501 Py_INCREF(result);
1502 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001503}
1504
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001505static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001506bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001507{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001508 if (a->ob_shash == -1) {
1509 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001510 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001511 }
1512 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001513}
1514
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001515static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001516bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001517{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001518 if (PyIndex_Check(item)) {
1519 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1520 if (i == -1 && PyErr_Occurred())
1521 return NULL;
1522 if (i < 0)
1523 i += PyBytes_GET_SIZE(self);
1524 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1525 PyErr_SetString(PyExc_IndexError,
1526 "index out of range");
1527 return NULL;
1528 }
1529 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1530 }
1531 else if (PySlice_Check(item)) {
1532 Py_ssize_t start, stop, step, slicelength, cur, i;
1533 char* source_buf;
1534 char* result_buf;
1535 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001536
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001537 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001538 PyBytes_GET_SIZE(self),
1539 &start, &stop, &step, &slicelength) < 0) {
1540 return NULL;
1541 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001542
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001543 if (slicelength <= 0) {
1544 return PyBytes_FromStringAndSize("", 0);
1545 }
1546 else if (start == 0 && step == 1 &&
1547 slicelength == PyBytes_GET_SIZE(self) &&
1548 PyBytes_CheckExact(self)) {
1549 Py_INCREF(self);
1550 return (PyObject *)self;
1551 }
1552 else if (step == 1) {
1553 return PyBytes_FromStringAndSize(
1554 PyBytes_AS_STRING(self) + start,
1555 slicelength);
1556 }
1557 else {
1558 source_buf = PyBytes_AS_STRING(self);
1559 result = PyBytes_FromStringAndSize(NULL, slicelength);
1560 if (result == NULL)
1561 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001562
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001563 result_buf = PyBytes_AS_STRING(result);
1564 for (cur = start, i = 0; i < slicelength;
1565 cur += step, i++) {
1566 result_buf[i] = source_buf[cur];
1567 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001568
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001569 return result;
1570 }
1571 }
1572 else {
1573 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001574 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001575 Py_TYPE(item)->tp_name);
1576 return NULL;
1577 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001578}
1579
1580static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001581bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001582{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001583 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1584 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001585}
1586
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001587static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001588 (lenfunc)bytes_length, /*sq_length*/
1589 (binaryfunc)bytes_concat, /*sq_concat*/
1590 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1591 (ssizeargfunc)bytes_item, /*sq_item*/
1592 0, /*sq_slice*/
1593 0, /*sq_ass_item*/
1594 0, /*sq_ass_slice*/
1595 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001596};
1597
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001598static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001599 (lenfunc)bytes_length,
1600 (binaryfunc)bytes_subscript,
1601 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001602};
1603
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001604static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001605 (getbufferproc)bytes_buffer_getbuffer,
1606 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001607};
1608
1609
1610#define LEFTSTRIP 0
1611#define RIGHTSTRIP 1
1612#define BOTHSTRIP 2
1613
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001614/*[clinic input]
1615bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001616
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001617 sep: object = None
1618 The delimiter according which to split the bytes.
1619 None (the default value) means split on ASCII whitespace characters
1620 (space, tab, return, newline, formfeed, vertical tab).
1621 maxsplit: Py_ssize_t = -1
1622 Maximum number of splits to do.
1623 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001624
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001625Return a list of the sections in the bytes, using sep as the delimiter.
1626[clinic start generated code]*/
1627
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001628static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001629bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001630/*[clinic end generated code: output=8bde44dacb36ef2e input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001631{
1632 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001633 const char *s = PyBytes_AS_STRING(self), *sub;
1634 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001635 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001636
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001637 if (maxsplit < 0)
1638 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001639 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001640 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001641 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001642 return NULL;
1643 sub = vsub.buf;
1644 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001645
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001646 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1647 PyBuffer_Release(&vsub);
1648 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001649}
1650
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001651/*[clinic input]
1652bytes.partition
1653
1654 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001655 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001656 /
1657
1658Partition the bytes into three parts using the given separator.
1659
1660This will search for the separator sep in the bytes. If the separator is found,
1661returns a 3-tuple containing the part before the separator, the separator
1662itself, and the part after it.
1663
1664If the separator is not found, returns a 3-tuple containing the original bytes
1665object and two empty bytes objects.
1666[clinic start generated code]*/
1667
Neal Norwitz6968b052007-02-27 19:02:19 +00001668static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001669bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001670/*[clinic end generated code: output=f532b392a17ff695 input=bc855dc63ca949de]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001671{
Neal Norwitz6968b052007-02-27 19:02:19 +00001672 return stringlib_partition(
1673 (PyObject*) self,
1674 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001675 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001676 );
1677}
1678
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001679/*[clinic input]
1680bytes.rpartition
1681
1682 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001683 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001684 /
1685
1686Partition the bytes into three parts using the given separator.
1687
1688This will search for the separator sep in the bytes, starting and the end. If
1689the separator is found, returns a 3-tuple containing the part before the
1690separator, the separator itself, and the part after it.
1691
1692If the separator is not found, returns a 3-tuple containing two empty bytes
1693objects and the original bytes object.
1694[clinic start generated code]*/
1695
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001696static PyObject *
1697bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001698/*[clinic end generated code: output=191b114cbb028e50 input=6588fff262a9170e]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001699{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001700 return stringlib_rpartition(
1701 (PyObject*) self,
1702 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001703 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001704 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001705}
1706
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001707/*[clinic input]
1708bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001709
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001710Return a list of the sections in the bytes, using sep as the delimiter.
1711
1712Splitting is done starting at the end of the bytes and working to the front.
1713[clinic start generated code]*/
1714
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001715static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001716bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001717/*[clinic end generated code: output=0b6570b977911d88 input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001718{
1719 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001720 const char *s = PyBytes_AS_STRING(self), *sub;
1721 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001722 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001723
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001724 if (maxsplit < 0)
1725 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001726 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001727 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001728 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001729 return NULL;
1730 sub = vsub.buf;
1731 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001732
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001733 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1734 PyBuffer_Release(&vsub);
1735 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001736}
1737
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001738
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001739/*[clinic input]
1740bytes.join
1741
1742 iterable_of_bytes: object
1743 /
1744
1745Concatenate any number of bytes objects.
1746
1747The bytes whose method is called is inserted in between each pair.
1748
1749The result is returned as a new bytes object.
1750
1751Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1752[clinic start generated code]*/
1753
Neal Norwitz6968b052007-02-27 19:02:19 +00001754static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001755bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001756/*[clinic end generated code: output=634aff14764ff997 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001757{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001758 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001759}
1760
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001761PyObject *
1762_PyBytes_Join(PyObject *sep, PyObject *x)
1763{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001764 assert(sep != NULL && PyBytes_Check(sep));
1765 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001766 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001767}
1768
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001769/* helper macro to fixup start/end slice values */
1770#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001771 if (end > len) \
1772 end = len; \
1773 else if (end < 0) { \
1774 end += len; \
1775 if (end < 0) \
1776 end = 0; \
1777 } \
1778 if (start < 0) { \
1779 start += len; \
1780 if (start < 0) \
1781 start = 0; \
1782 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001783
1784Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001785bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001786{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001787 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001788 char byte;
1789 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001790 const char *sub;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001791 Py_ssize_t len, sub_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001792 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001793 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001794
Antoine Pitrouac65d962011-10-20 23:54:17 +02001795 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1796 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001797 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001798
Antoine Pitrouac65d962011-10-20 23:54:17 +02001799 if (subobj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001800 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001801 return -2;
1802
1803 sub = subbuf.buf;
1804 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001805 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001806 else {
1807 sub = &byte;
1808 sub_len = 1;
1809 }
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001810 len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001811
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001812 ADJUST_INDICES(start, end, len);
1813 if (end - start < sub_len)
1814 res = -1;
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001815 else if (sub_len == 1
1816#ifndef HAVE_MEMRCHR
1817 && dir > 0
1818#endif
1819 ) {
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001820 unsigned char needle = *sub;
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001821 int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001822 res = stringlib_fastsearch_memchr_1char(
1823 PyBytes_AS_STRING(self) + start, end - start,
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001824 needle, needle, mode);
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001825 if (res >= 0)
1826 res += start;
1827 }
1828 else {
1829 if (dir > 0)
1830 res = stringlib_find_slice(
1831 PyBytes_AS_STRING(self), len,
1832 sub, sub_len, start, end);
1833 else
1834 res = stringlib_rfind_slice(
1835 PyBytes_AS_STRING(self), len,
1836 sub, sub_len, start, end);
1837 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001838
1839 if (subobj)
1840 PyBuffer_Release(&subbuf);
1841
1842 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001843}
1844
1845
1846PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001847"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001848\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001849Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001850such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001851arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001852\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001853Return -1 on failure.");
1854
Neal Norwitz6968b052007-02-27 19:02:19 +00001855static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001856bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001857{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001858 Py_ssize_t result = bytes_find_internal(self, args, +1);
1859 if (result == -2)
1860 return NULL;
1861 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001862}
1863
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001864
1865PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001866"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001867\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001868Like B.find() but raise ValueError when the substring is not found.");
1869
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001870static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001871bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001872{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001873 Py_ssize_t result = bytes_find_internal(self, args, +1);
1874 if (result == -2)
1875 return NULL;
1876 if (result == -1) {
1877 PyErr_SetString(PyExc_ValueError,
1878 "substring not found");
1879 return NULL;
1880 }
1881 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001882}
1883
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001884
1885PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001886"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001887\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001888Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001889such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001890arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001891\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001892Return -1 on failure.");
1893
Neal Norwitz6968b052007-02-27 19:02:19 +00001894static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001895bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001896{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001897 Py_ssize_t result = bytes_find_internal(self, args, -1);
1898 if (result == -2)
1899 return NULL;
1900 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001901}
1902
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001903
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001904PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001905"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001906\n\
1907Like B.rfind() but raise ValueError when the substring is not found.");
1908
1909static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001910bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001911{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001912 Py_ssize_t result = bytes_find_internal(self, args, -1);
1913 if (result == -2)
1914 return NULL;
1915 if (result == -1) {
1916 PyErr_SetString(PyExc_ValueError,
1917 "substring not found");
1918 return NULL;
1919 }
1920 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001921}
1922
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001923
1924Py_LOCAL_INLINE(PyObject *)
1925do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001926{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001927 Py_buffer vsep;
1928 char *s = PyBytes_AS_STRING(self);
1929 Py_ssize_t len = PyBytes_GET_SIZE(self);
1930 char *sep;
1931 Py_ssize_t seplen;
1932 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001933
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001934 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001935 return NULL;
1936 sep = vsep.buf;
1937 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001938
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001939 i = 0;
1940 if (striptype != RIGHTSTRIP) {
1941 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1942 i++;
1943 }
1944 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001945
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001946 j = len;
1947 if (striptype != LEFTSTRIP) {
1948 do {
1949 j--;
1950 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1951 j++;
1952 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001953
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001954 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001955
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001956 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1957 Py_INCREF(self);
1958 return (PyObject*)self;
1959 }
1960 else
1961 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001962}
1963
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001964
1965Py_LOCAL_INLINE(PyObject *)
1966do_strip(PyBytesObject *self, int striptype)
1967{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001968 char *s = PyBytes_AS_STRING(self);
1969 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001970
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001971 i = 0;
1972 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001973 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001974 i++;
1975 }
1976 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001977
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001978 j = len;
1979 if (striptype != LEFTSTRIP) {
1980 do {
1981 j--;
David Malcolm96960882010-11-05 17:23:41 +00001982 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001983 j++;
1984 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001985
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001986 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1987 Py_INCREF(self);
1988 return (PyObject*)self;
1989 }
1990 else
1991 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001992}
1993
1994
1995Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001996do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001997{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001998 if (bytes != NULL && bytes != Py_None) {
1999 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002000 }
2001 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002002}
2003
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002004/*[clinic input]
2005bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002006
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002007 self: self(type="PyBytesObject *")
2008 bytes: object = None
2009 /
2010
2011Strip leading and trailing bytes contained in the argument.
2012
2013If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2014[clinic start generated code]*/
2015
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002016static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002017bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002018/*[clinic end generated code: output=c7c228d3bd104a1b input=37daa5fad1395d95]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002019{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002020 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002021}
2022
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002023/*[clinic input]
2024bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002025
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002026 self: self(type="PyBytesObject *")
2027 bytes: object = None
2028 /
2029
2030Strip leading bytes contained in the argument.
2031
2032If the argument is omitted or None, strip leading ASCII whitespace.
2033[clinic start generated code]*/
2034
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002035static PyObject *
2036bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002037/*[clinic end generated code: output=28602e586f524e82 input=88811b09dfbc2988]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002038{
2039 return do_argstrip(self, LEFTSTRIP, bytes);
2040}
2041
2042/*[clinic input]
2043bytes.rstrip
2044
2045 self: self(type="PyBytesObject *")
2046 bytes: object = None
2047 /
2048
2049Strip trailing bytes contained in the argument.
2050
2051If the argument is omitted or None, strip trailing ASCII whitespace.
2052[clinic start generated code]*/
2053
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002054static PyObject *
2055bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002056/*[clinic end generated code: output=547e3815c95447da input=8f93c9cd361f0140]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002057{
2058 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002059}
Neal Norwitz6968b052007-02-27 19:02:19 +00002060
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002061
2062PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002063"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002064\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002065Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002066string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002067as in slice notation.");
2068
2069static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002070bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002071{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002072 PyObject *sub_obj;
2073 const char *str = PyBytes_AS_STRING(self), *sub;
2074 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02002075 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002076 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002077
Antoine Pitrouac65d962011-10-20 23:54:17 +02002078 Py_buffer vsub;
2079 PyObject *count_obj;
2080
2081 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
2082 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002083 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002084
Antoine Pitrouac65d962011-10-20 23:54:17 +02002085 if (sub_obj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002086 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02002087 return NULL;
2088
2089 sub = vsub.buf;
2090 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002091 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02002092 else {
2093 sub = &byte;
2094 sub_len = 1;
2095 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002097 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002098
Antoine Pitrouac65d962011-10-20 23:54:17 +02002099 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002100 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2101 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02002102
2103 if (sub_obj)
2104 PyBuffer_Release(&vsub);
2105
2106 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002107}
2108
2109
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002110/*[clinic input]
2111bytes.translate
2112
2113 self: self(type="PyBytesObject *")
Victor Stinner049e5092014-08-17 22:20:00 +02002114 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002115 Translation table, which must be a bytes object of length 256.
2116 [
2117 deletechars: object
2118 ]
2119 /
2120
2121Return a copy with each character mapped by the given translation table.
2122
2123All characters occurring in the optional argument deletechars are removed.
2124The remaining characters are mapped through the given translation table.
2125[clinic start generated code]*/
2126
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002127static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002128bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1,
2129 PyObject *deletechars)
2130/*[clinic end generated code: output=233df850eb50bf8d input=d8fa5519d7cc4be7]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002131{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002132 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002133 Py_buffer table_view = {NULL, NULL};
2134 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002135 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002136 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002137 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002138 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002139 Py_ssize_t inlen, tablen, dellen = 0;
2140 PyObject *result;
2141 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002142
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002143 if (PyBytes_Check(table)) {
2144 table_chars = PyBytes_AS_STRING(table);
2145 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002146 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002147 else if (table == Py_None) {
2148 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002149 tablen = 256;
2150 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002151 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002152 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002153 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002154 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002155 tablen = table_view.len;
2156 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002157
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002158 if (tablen != 256) {
2159 PyErr_SetString(PyExc_ValueError,
2160 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002161 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002162 return NULL;
2163 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002164
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002165 if (deletechars != NULL) {
2166 if (PyBytes_Check(deletechars)) {
2167 del_table_chars = PyBytes_AS_STRING(deletechars);
2168 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002169 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002170 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002171 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002172 PyBuffer_Release(&table_view);
2173 return NULL;
2174 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002175 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002176 dellen = del_table_view.len;
2177 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002178 }
2179 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002180 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002181 dellen = 0;
2182 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002183
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002184 inlen = PyBytes_GET_SIZE(input_obj);
2185 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002186 if (result == NULL) {
2187 PyBuffer_Release(&del_table_view);
2188 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002189 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002190 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002191 output_start = output = PyBytes_AsString(result);
2192 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002193
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002194 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002195 /* If no deletions are required, use faster code */
2196 for (i = inlen; --i >= 0; ) {
2197 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002198 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002199 changed = 1;
2200 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002201 if (!changed && PyBytes_CheckExact(input_obj)) {
2202 Py_INCREF(input_obj);
2203 Py_DECREF(result);
2204 result = input_obj;
2205 }
2206 PyBuffer_Release(&del_table_view);
2207 PyBuffer_Release(&table_view);
2208 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002209 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002210
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002211 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002212 for (i = 0; i < 256; i++)
2213 trans_table[i] = Py_CHARMASK(i);
2214 } else {
2215 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002216 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002217 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002218 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002219
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002220 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002221 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002222 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002223
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002224 for (i = inlen; --i >= 0; ) {
2225 c = Py_CHARMASK(*input++);
2226 if (trans_table[c] != -1)
2227 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2228 continue;
2229 changed = 1;
2230 }
2231 if (!changed && PyBytes_CheckExact(input_obj)) {
2232 Py_DECREF(result);
2233 Py_INCREF(input_obj);
2234 return input_obj;
2235 }
2236 /* Fix the size of the resulting string */
2237 if (inlen > 0)
2238 _PyBytes_Resize(&result, output - output_start);
2239 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002240}
2241
2242
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002243/*[clinic input]
2244
2245@staticmethod
2246bytes.maketrans
2247
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002248 frm: Py_buffer
2249 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002250 /
2251
2252Return a translation table useable for the bytes or bytearray translate method.
2253
2254The returned table will be one where each byte in frm is mapped to the byte at
2255the same position in to.
2256
2257The bytes objects frm and to must be of the same length.
2258[clinic start generated code]*/
2259
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002260static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002261bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002262/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002263{
2264 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002265}
2266
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002267/* find and count characters and substrings */
2268
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002269#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002270 ((char *)memchr((const void *)(target), c, target_len))
2271
2272/* String ops must return a string. */
2273/* If the object is subclass of string, create a copy */
2274Py_LOCAL(PyBytesObject *)
2275return_self(PyBytesObject *self)
2276{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002277 if (PyBytes_CheckExact(self)) {
2278 Py_INCREF(self);
2279 return self;
2280 }
2281 return (PyBytesObject *)PyBytes_FromStringAndSize(
2282 PyBytes_AS_STRING(self),
2283 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002284}
2285
2286Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00002287countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002288{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002289 Py_ssize_t count=0;
2290 const char *start=target;
2291 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002292
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002293 while ( (start=findchar(start, end-start, c)) != NULL ) {
2294 count++;
2295 if (count >= maxcount)
2296 break;
2297 start += 1;
2298 }
2299 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002300}
2301
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002302
2303/* Algorithms for different cases of string replacement */
2304
2305/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2306Py_LOCAL(PyBytesObject *)
2307replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002308 const char *to_s, Py_ssize_t to_len,
2309 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002310{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002311 char *self_s, *result_s;
2312 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002313 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002314 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002315
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002316 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002317
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002318 /* 1 at the end plus 1 after every character;
2319 count = min(maxcount, self_len + 1) */
2320 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002321 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002322 else
2323 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2324 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002325
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002326 /* Check for overflow */
2327 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002328 assert(count > 0);
2329 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002330 PyErr_SetString(PyExc_OverflowError,
2331 "replacement bytes are too long");
2332 return NULL;
2333 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002334 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002335
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002336 if (! (result = (PyBytesObject *)
2337 PyBytes_FromStringAndSize(NULL, result_len)) )
2338 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002339
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002340 self_s = PyBytes_AS_STRING(self);
2341 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002342
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002343 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002344
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002345 /* Lay the first one down (guaranteed this will occur) */
2346 Py_MEMCPY(result_s, to_s, to_len);
2347 result_s += to_len;
2348 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002349
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002350 for (i=0; i<count; i++) {
2351 *result_s++ = *self_s++;
2352 Py_MEMCPY(result_s, to_s, to_len);
2353 result_s += to_len;
2354 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002355
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002356 /* Copy the rest of the original string */
2357 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002358
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002359 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002360}
2361
2362/* Special case for deleting a single character */
2363/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2364Py_LOCAL(PyBytesObject *)
2365replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002366 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002367{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002368 char *self_s, *result_s;
2369 char *start, *next, *end;
2370 Py_ssize_t self_len, result_len;
2371 Py_ssize_t count;
2372 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002373
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002374 self_len = PyBytes_GET_SIZE(self);
2375 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002376
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002377 count = countchar(self_s, self_len, from_c, maxcount);
2378 if (count == 0) {
2379 return return_self(self);
2380 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002381
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002382 result_len = self_len - count; /* from_len == 1 */
2383 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002384
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002385 if ( (result = (PyBytesObject *)
2386 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2387 return NULL;
2388 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002389
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002390 start = self_s;
2391 end = self_s + self_len;
2392 while (count-- > 0) {
2393 next = findchar(start, end-start, from_c);
2394 if (next == NULL)
2395 break;
2396 Py_MEMCPY(result_s, start, next-start);
2397 result_s += (next-start);
2398 start = next+1;
2399 }
2400 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002401
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002402 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002403}
2404
2405/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2406
2407Py_LOCAL(PyBytesObject *)
2408replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002409 const char *from_s, Py_ssize_t from_len,
2410 Py_ssize_t maxcount) {
2411 char *self_s, *result_s;
2412 char *start, *next, *end;
2413 Py_ssize_t self_len, result_len;
2414 Py_ssize_t count, offset;
2415 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002416
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002417 self_len = PyBytes_GET_SIZE(self);
2418 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002419
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002420 count = stringlib_count(self_s, self_len,
2421 from_s, from_len,
2422 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002423
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002424 if (count == 0) {
2425 /* no matches */
2426 return return_self(self);
2427 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002428
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002429 result_len = self_len - (count * from_len);
2430 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002431
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002432 if ( (result = (PyBytesObject *)
2433 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2434 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002435
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002436 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002437
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002438 start = self_s;
2439 end = self_s + self_len;
2440 while (count-- > 0) {
2441 offset = stringlib_find(start, end-start,
2442 from_s, from_len,
2443 0);
2444 if (offset == -1)
2445 break;
2446 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002447
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002448 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002449
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002450 result_s += (next-start);
2451 start = next+from_len;
2452 }
2453 Py_MEMCPY(result_s, start, end-start);
2454 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002455}
2456
2457/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2458Py_LOCAL(PyBytesObject *)
2459replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002460 char from_c, char to_c,
2461 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002462{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002463 char *self_s, *result_s, *start, *end, *next;
2464 Py_ssize_t self_len;
2465 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002466
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002467 /* The result string will be the same size */
2468 self_s = PyBytes_AS_STRING(self);
2469 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002470
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002471 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002472
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002473 if (next == NULL) {
2474 /* No matches; return the original string */
2475 return return_self(self);
2476 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002477
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002478 /* Need to make a new string */
2479 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2480 if (result == NULL)
2481 return NULL;
2482 result_s = PyBytes_AS_STRING(result);
2483 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002484
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002485 /* change everything in-place, starting with this one */
2486 start = result_s + (next-self_s);
2487 *start = to_c;
2488 start++;
2489 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002490
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002491 while (--maxcount > 0) {
2492 next = findchar(start, end-start, from_c);
2493 if (next == NULL)
2494 break;
2495 *next = to_c;
2496 start = next+1;
2497 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002498
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002499 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002500}
2501
2502/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2503Py_LOCAL(PyBytesObject *)
2504replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002505 const char *from_s, Py_ssize_t from_len,
2506 const char *to_s, Py_ssize_t to_len,
2507 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002508{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002509 char *result_s, *start, *end;
2510 char *self_s;
2511 Py_ssize_t self_len, offset;
2512 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002513
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002514 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002515
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002516 self_s = PyBytes_AS_STRING(self);
2517 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002518
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002519 offset = stringlib_find(self_s, self_len,
2520 from_s, from_len,
2521 0);
2522 if (offset == -1) {
2523 /* No matches; return the original string */
2524 return return_self(self);
2525 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002526
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002527 /* Need to make a new string */
2528 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2529 if (result == NULL)
2530 return NULL;
2531 result_s = PyBytes_AS_STRING(result);
2532 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002533
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002534 /* change everything in-place, starting with this one */
2535 start = result_s + offset;
2536 Py_MEMCPY(start, to_s, from_len);
2537 start += from_len;
2538 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002539
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002540 while ( --maxcount > 0) {
2541 offset = stringlib_find(start, end-start,
2542 from_s, from_len,
2543 0);
2544 if (offset==-1)
2545 break;
2546 Py_MEMCPY(start+offset, to_s, from_len);
2547 start += offset+from_len;
2548 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002549
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002550 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002551}
2552
2553/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2554Py_LOCAL(PyBytesObject *)
2555replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002556 char from_c,
2557 const char *to_s, Py_ssize_t to_len,
2558 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002559{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002560 char *self_s, *result_s;
2561 char *start, *next, *end;
2562 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002563 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002564 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002565
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002566 self_s = PyBytes_AS_STRING(self);
2567 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002568
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002569 count = countchar(self_s, self_len, from_c, maxcount);
2570 if (count == 0) {
2571 /* no matches, return unchanged */
2572 return return_self(self);
2573 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002574
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002575 /* use the difference between current and new, hence the "-1" */
2576 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002577 assert(count > 0);
2578 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002579 PyErr_SetString(PyExc_OverflowError,
2580 "replacement bytes are too long");
2581 return NULL;
2582 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002583 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002584
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002585 if ( (result = (PyBytesObject *)
2586 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2587 return NULL;
2588 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002589
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002590 start = self_s;
2591 end = self_s + self_len;
2592 while (count-- > 0) {
2593 next = findchar(start, end-start, from_c);
2594 if (next == NULL)
2595 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002596
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002597 if (next == start) {
2598 /* replace with the 'to' */
2599 Py_MEMCPY(result_s, to_s, to_len);
2600 result_s += to_len;
2601 start += 1;
2602 } else {
2603 /* copy the unchanged old then the 'to' */
2604 Py_MEMCPY(result_s, start, next-start);
2605 result_s += (next-start);
2606 Py_MEMCPY(result_s, to_s, to_len);
2607 result_s += to_len;
2608 start = next+1;
2609 }
2610 }
2611 /* Copy the remainder of the remaining string */
2612 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002613
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002614 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002615}
2616
2617/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2618Py_LOCAL(PyBytesObject *)
2619replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002620 const char *from_s, Py_ssize_t from_len,
2621 const char *to_s, Py_ssize_t to_len,
2622 Py_ssize_t maxcount) {
2623 char *self_s, *result_s;
2624 char *start, *next, *end;
2625 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002626 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002627 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002628
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002629 self_s = PyBytes_AS_STRING(self);
2630 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002631
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002632 count = stringlib_count(self_s, self_len,
2633 from_s, from_len,
2634 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002635
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002636 if (count == 0) {
2637 /* no matches, return unchanged */
2638 return return_self(self);
2639 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002640
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002641 /* Check for overflow */
2642 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002643 assert(count > 0);
2644 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002645 PyErr_SetString(PyExc_OverflowError,
2646 "replacement bytes are too long");
2647 return NULL;
2648 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002649 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002650
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002651 if ( (result = (PyBytesObject *)
2652 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2653 return NULL;
2654 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002655
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002656 start = self_s;
2657 end = self_s + self_len;
2658 while (count-- > 0) {
2659 offset = stringlib_find(start, end-start,
2660 from_s, from_len,
2661 0);
2662 if (offset == -1)
2663 break;
2664 next = start+offset;
2665 if (next == start) {
2666 /* replace with the 'to' */
2667 Py_MEMCPY(result_s, to_s, to_len);
2668 result_s += to_len;
2669 start += from_len;
2670 } else {
2671 /* copy the unchanged old then the 'to' */
2672 Py_MEMCPY(result_s, start, next-start);
2673 result_s += (next-start);
2674 Py_MEMCPY(result_s, to_s, to_len);
2675 result_s += to_len;
2676 start = next+from_len;
2677 }
2678 }
2679 /* Copy the remainder of the remaining string */
2680 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002681
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002682 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002683}
2684
2685
2686Py_LOCAL(PyBytesObject *)
2687replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002688 const char *from_s, Py_ssize_t from_len,
2689 const char *to_s, Py_ssize_t to_len,
2690 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002691{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002692 if (maxcount < 0) {
2693 maxcount = PY_SSIZE_T_MAX;
2694 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2695 /* nothing to do; return the original string */
2696 return return_self(self);
2697 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002699 if (maxcount == 0 ||
2700 (from_len == 0 && to_len == 0)) {
2701 /* nothing to do; return the original string */
2702 return return_self(self);
2703 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002704
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002705 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002706
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002707 if (from_len == 0) {
2708 /* insert the 'to' string everywhere. */
2709 /* >>> "Python".replace("", ".") */
2710 /* '.P.y.t.h.o.n.' */
2711 return replace_interleave(self, to_s, to_len, maxcount);
2712 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002713
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002714 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2715 /* point for an empty self string to generate a non-empty string */
2716 /* Special case so the remaining code always gets a non-empty string */
2717 if (PyBytes_GET_SIZE(self) == 0) {
2718 return return_self(self);
2719 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002720
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002721 if (to_len == 0) {
2722 /* delete all occurrences of 'from' string */
2723 if (from_len == 1) {
2724 return replace_delete_single_character(
2725 self, from_s[0], maxcount);
2726 } else {
2727 return replace_delete_substring(self, from_s,
2728 from_len, maxcount);
2729 }
2730 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002731
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002732 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002733
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002734 if (from_len == to_len) {
2735 if (from_len == 1) {
2736 return replace_single_character_in_place(
2737 self,
2738 from_s[0],
2739 to_s[0],
2740 maxcount);
2741 } else {
2742 return replace_substring_in_place(
2743 self, from_s, from_len, to_s, to_len,
2744 maxcount);
2745 }
2746 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002747
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002748 /* Otherwise use the more generic algorithms */
2749 if (from_len == 1) {
2750 return replace_single_character(self, from_s[0],
2751 to_s, to_len, maxcount);
2752 } else {
2753 /* len('from')>=2, len('to')>=1 */
2754 return replace_substring(self, from_s, from_len, to_s, to_len,
2755 maxcount);
2756 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002757}
2758
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002759
2760/*[clinic input]
2761bytes.replace
2762
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002763 old: Py_buffer
2764 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002765 count: Py_ssize_t = -1
2766 Maximum number of occurrences to replace.
2767 -1 (the default value) means replace all occurrences.
2768 /
2769
2770Return a copy with all occurrences of substring old replaced by new.
2771
2772If the optional argument count is given, only the first count occurrences are
2773replaced.
2774[clinic start generated code]*/
2775
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002776static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002777bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new,
2778 Py_ssize_t count)
2779/*[clinic end generated code: output=403dc9d7a83c5a1d input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002780{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002781 return (PyObject *)replace((PyBytesObject *) self,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002782 (const char *)old->buf, old->len,
2783 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002784}
2785
2786/** End DALKE **/
2787
2788/* Matches the end (direction >= 0) or start (direction < 0) of self
2789 * against substr, using the start and end arguments. Returns
2790 * -1 on error, 0 if not found and 1 if found.
2791 */
2792Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002793_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002794 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002795{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002796 Py_ssize_t len = PyBytes_GET_SIZE(self);
2797 Py_ssize_t slen;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002798 Py_buffer sub_view = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002799 const char* sub;
2800 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002801
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002802 if (PyBytes_Check(substr)) {
2803 sub = PyBytes_AS_STRING(substr);
2804 slen = PyBytes_GET_SIZE(substr);
2805 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002806 else {
2807 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
2808 return -1;
2809 sub = sub_view.buf;
2810 slen = sub_view.len;
2811 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002812 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002813
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002814 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002815
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002816 if (direction < 0) {
2817 /* startswith */
2818 if (start+slen > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002819 goto notfound;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002820 } else {
2821 /* endswith */
2822 if (end-start < slen || start > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002823 goto notfound;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002824
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002825 if (end-slen > start)
2826 start = end - slen;
2827 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002828 if (end-start < slen)
2829 goto notfound;
2830 if (memcmp(str+start, sub, slen) != 0)
2831 goto notfound;
2832
2833 PyBuffer_Release(&sub_view);
2834 return 1;
2835
2836notfound:
2837 PyBuffer_Release(&sub_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002838 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002839}
2840
2841
2842PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002843"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002844\n\
2845Return True if B starts with the specified prefix, False otherwise.\n\
2846With optional start, test B beginning at that position.\n\
2847With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002848prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002849
2850static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002851bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002852{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002853 Py_ssize_t start = 0;
2854 Py_ssize_t end = PY_SSIZE_T_MAX;
2855 PyObject *subobj;
2856 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002857
Jesus Ceaac451502011-04-20 17:09:23 +02002858 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002859 return NULL;
2860 if (PyTuple_Check(subobj)) {
2861 Py_ssize_t i;
2862 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2863 result = _bytes_tailmatch(self,
2864 PyTuple_GET_ITEM(subobj, i),
2865 start, end, -1);
2866 if (result == -1)
2867 return NULL;
2868 else if (result) {
2869 Py_RETURN_TRUE;
2870 }
2871 }
2872 Py_RETURN_FALSE;
2873 }
2874 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002875 if (result == -1) {
2876 if (PyErr_ExceptionMatches(PyExc_TypeError))
2877 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2878 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002879 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002880 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002881 else
2882 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002883}
2884
2885
2886PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002887"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002888\n\
2889Return True if B ends with the specified suffix, False otherwise.\n\
2890With optional start, test B beginning at that position.\n\
2891With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002892suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002893
2894static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002895bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002896{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002897 Py_ssize_t start = 0;
2898 Py_ssize_t end = PY_SSIZE_T_MAX;
2899 PyObject *subobj;
2900 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002901
Jesus Ceaac451502011-04-20 17:09:23 +02002902 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002903 return NULL;
2904 if (PyTuple_Check(subobj)) {
2905 Py_ssize_t i;
2906 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2907 result = _bytes_tailmatch(self,
2908 PyTuple_GET_ITEM(subobj, i),
2909 start, end, +1);
2910 if (result == -1)
2911 return NULL;
2912 else if (result) {
2913 Py_RETURN_TRUE;
2914 }
2915 }
2916 Py_RETURN_FALSE;
2917 }
2918 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002919 if (result == -1) {
2920 if (PyErr_ExceptionMatches(PyExc_TypeError))
2921 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2922 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002923 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002924 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002925 else
2926 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002927}
2928
2929
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002930/*[clinic input]
2931bytes.decode
2932
2933 encoding: str(c_default="NULL") = 'utf-8'
2934 The encoding with which to decode the bytes.
2935 errors: str(c_default="NULL") = 'strict'
2936 The error handling scheme to use for the handling of decoding errors.
2937 The default is 'strict' meaning that decoding errors raise a
2938 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2939 as well as any other name registered with codecs.register_error that
2940 can handle UnicodeDecodeErrors.
2941
2942Decode the bytes using the codec registered for encoding.
2943[clinic start generated code]*/
2944
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002945static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002946bytes_decode_impl(PyBytesObject*self, const char *encoding,
2947 const char *errors)
2948/*[clinic end generated code: output=2d2016ff8e0bb176 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002949{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002950 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002951}
2952
Guido van Rossum20188312006-05-05 15:15:40 +00002953
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002954/*[clinic input]
2955bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002956
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03002957 keepends: int(c_default="0") = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002958
2959Return a list of the lines in the bytes, breaking at line boundaries.
2960
2961Line breaks are not included in the resulting list unless keepends is given and
2962true.
2963[clinic start generated code]*/
2964
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002965static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002966bytes_splitlines_impl(PyBytesObject*self, int keepends)
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03002967/*[clinic end generated code: output=995c3598f7833cad input=7f4aac67144f9944]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002968{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002969 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002970 (PyObject*) self, PyBytes_AS_STRING(self),
2971 PyBytes_GET_SIZE(self), keepends
2972 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002973}
2974
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002975static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002976hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002977{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002978 if (c >= 128)
2979 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002980 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002981 return c - '0';
2982 else {
David Malcolm96960882010-11-05 17:23:41 +00002983 if (Py_ISUPPER(c))
2984 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002985 if (c >= 'a' && c <= 'f')
2986 return c - 'a' + 10;
2987 }
2988 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002989}
2990
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002991/*[clinic input]
2992@classmethod
2993bytes.fromhex
2994
2995 string: unicode
2996 /
2997
2998Create a bytes object from a string of hexadecimal numbers.
2999
3000Spaces between two numbers are accepted.
3001Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
3002[clinic start generated code]*/
3003
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003004static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003005bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03003006/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003007{
3008 PyObject *newstring;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003009 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003010 Py_ssize_t hexlen, byteslen, i, j;
3011 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003012 void *data;
3013 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003014
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003015 assert(PyUnicode_Check(string));
3016 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003017 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003018 kind = PyUnicode_KIND(string);
3019 data = PyUnicode_DATA(string);
3020 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003021
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003022 byteslen = hexlen/2; /* This overestimates if there are spaces */
3023 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
3024 if (!newstring)
3025 return NULL;
3026 buf = PyBytes_AS_STRING(newstring);
3027 for (i = j = 0; i < hexlen; i += 2) {
3028 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003029 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003030 i++;
3031 if (i >= hexlen)
3032 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003033 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
3034 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003035 if (top == -1 || bot == -1) {
3036 PyErr_Format(PyExc_ValueError,
3037 "non-hexadecimal number found in "
3038 "fromhex() arg at position %zd", i);
3039 goto error;
3040 }
3041 buf[j++] = (top << 4) + bot;
3042 }
3043 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
3044 goto error;
3045 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003046
3047 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003048 Py_XDECREF(newstring);
3049 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003050}
3051
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003052PyDoc_STRVAR(hex__doc__,
3053"B.hex() -> string\n\
3054\n\
3055Create a string of hexadecimal numbers from a bytes object.\n\
3056Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
3057
3058static PyObject *
3059bytes_hex(PyBytesObject *self)
3060{
3061 char* argbuf = PyBytes_AS_STRING(self);
3062 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
3063 return _Py_strhex(argbuf, arglen);
3064}
3065
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003066static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003067bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003068{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003069 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003070}
3071
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003072
3073static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003074bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003075 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
3076 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3077 _Py_capitalize__doc__},
3078 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3079 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003080 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003081 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
3082 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02003083 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003084 expandtabs__doc__},
3085 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003086 BYTES_FROMHEX_METHODDEF
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003087 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003088 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3089 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3090 _Py_isalnum__doc__},
3091 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3092 _Py_isalpha__doc__},
3093 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3094 _Py_isdigit__doc__},
3095 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3096 _Py_islower__doc__},
3097 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3098 _Py_isspace__doc__},
3099 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3100 _Py_istitle__doc__},
3101 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3102 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003103 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003104 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3105 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003106 BYTES_LSTRIP_METHODDEF
3107 BYTES_MAKETRANS_METHODDEF
3108 BYTES_PARTITION_METHODDEF
3109 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003110 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3111 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3112 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003113 BYTES_RPARTITION_METHODDEF
3114 BYTES_RSPLIT_METHODDEF
3115 BYTES_RSTRIP_METHODDEF
3116 BYTES_SPLIT_METHODDEF
3117 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003118 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
3119 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003120 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003121 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3122 _Py_swapcase__doc__},
3123 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003124 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003125 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3126 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003127 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003128};
3129
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003130static PyObject *
Ethan Furmanb95b5612015-01-23 20:05:18 -08003131bytes_mod(PyObject *v, PyObject *w)
3132{
3133 if (!PyBytes_Check(v))
3134 Py_RETURN_NOTIMPLEMENTED;
3135 return _PyBytes_Format(v, w);
3136}
3137
3138static PyNumberMethods bytes_as_number = {
3139 0, /*nb_add*/
3140 0, /*nb_subtract*/
3141 0, /*nb_multiply*/
3142 bytes_mod, /*nb_remainder*/
3143};
3144
3145static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02003146bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003147
3148static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003149bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003150{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003151 PyObject *x = NULL;
3152 const char *encoding = NULL;
3153 const char *errors = NULL;
3154 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003155 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003156 Py_ssize_t size;
3157 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003158 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003160 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02003161 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003162 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3163 &encoding, &errors))
3164 return NULL;
3165 if (x == NULL) {
3166 if (encoding != NULL || errors != NULL) {
3167 PyErr_SetString(PyExc_TypeError,
3168 "encoding or errors without sequence "
3169 "argument");
3170 return NULL;
3171 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003172 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003173 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003174
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003175 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003176 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003177 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003178 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003179 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003180 return NULL;
3181 }
3182 new = PyUnicode_AsEncodedString(x, encoding, errors);
3183 if (new == NULL)
3184 return NULL;
3185 assert(PyBytes_Check(new));
3186 return new;
3187 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003188
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003189 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003190 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003191 PyUnicode_Check(x) ?
3192 "string argument without an encoding" :
3193 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003194 return NULL;
3195 }
3196
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003197 /* We'd like to call PyObject_Bytes here, but we need to check for an
3198 integer argument before deferring to PyBytes_FromObject, something
3199 PyObject_Bytes doesn't do. */
3200 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3201 if (func != NULL) {
3202 new = PyObject_CallFunctionObjArgs(func, NULL);
3203 Py_DECREF(func);
3204 if (new == NULL)
3205 return NULL;
3206 if (!PyBytes_Check(new)) {
3207 PyErr_Format(PyExc_TypeError,
3208 "__bytes__ returned non-bytes (type %.200s)",
3209 Py_TYPE(new)->tp_name);
3210 Py_DECREF(new);
3211 return NULL;
3212 }
3213 return new;
3214 }
3215 else if (PyErr_Occurred())
3216 return NULL;
3217
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003218 if (PyUnicode_Check(x)) {
3219 PyErr_SetString(PyExc_TypeError,
3220 "string argument without an encoding");
3221 return NULL;
3222 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003223 /* Is it an integer? */
3224 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3225 if (size == -1 && PyErr_Occurred()) {
3226 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3227 return NULL;
3228 PyErr_Clear();
3229 }
3230 else if (size < 0) {
3231 PyErr_SetString(PyExc_ValueError, "negative count");
3232 return NULL;
3233 }
3234 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003235 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003236 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003237 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003238 return new;
3239 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003240
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003241 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003242}
3243
3244PyObject *
3245PyBytes_FromObject(PyObject *x)
3246{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003247 PyObject *new, *it;
3248 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003249
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003250 if (x == NULL) {
3251 PyErr_BadInternalCall();
3252 return NULL;
3253 }
Larry Hastingsca28e992012-05-24 22:58:30 -07003254
3255 if (PyBytes_CheckExact(x)) {
3256 Py_INCREF(x);
3257 return x;
3258 }
3259
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003260 /* Use the modern buffer interface */
3261 if (PyObject_CheckBuffer(x)) {
3262 Py_buffer view;
3263 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3264 return NULL;
3265 new = PyBytes_FromStringAndSize(NULL, view.len);
3266 if (!new)
3267 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003268 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3269 &view, view.len, 'C') < 0)
3270 goto fail;
3271 PyBuffer_Release(&view);
3272 return new;
3273 fail:
3274 Py_XDECREF(new);
3275 PyBuffer_Release(&view);
3276 return NULL;
3277 }
3278 if (PyUnicode_Check(x)) {
3279 PyErr_SetString(PyExc_TypeError,
3280 "cannot convert unicode object to bytes");
3281 return NULL;
3282 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003283
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003284 if (PyList_CheckExact(x)) {
3285 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3286 if (new == NULL)
3287 return NULL;
3288 for (i = 0; i < Py_SIZE(x); i++) {
3289 Py_ssize_t value = PyNumber_AsSsize_t(
3290 PyList_GET_ITEM(x, i), PyExc_ValueError);
3291 if (value == -1 && PyErr_Occurred()) {
3292 Py_DECREF(new);
3293 return NULL;
3294 }
3295 if (value < 0 || value >= 256) {
3296 PyErr_SetString(PyExc_ValueError,
3297 "bytes must be in range(0, 256)");
3298 Py_DECREF(new);
3299 return NULL;
3300 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003301 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003302 }
3303 return new;
3304 }
3305 if (PyTuple_CheckExact(x)) {
3306 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3307 if (new == NULL)
3308 return NULL;
3309 for (i = 0; i < Py_SIZE(x); i++) {
3310 Py_ssize_t value = PyNumber_AsSsize_t(
3311 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
3312 if (value == -1 && PyErr_Occurred()) {
3313 Py_DECREF(new);
3314 return NULL;
3315 }
3316 if (value < 0 || value >= 256) {
3317 PyErr_SetString(PyExc_ValueError,
3318 "bytes must be in range(0, 256)");
3319 Py_DECREF(new);
3320 return NULL;
3321 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003322 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003323 }
3324 return new;
3325 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00003326
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003327 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003328 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003329 if (size == -1 && PyErr_Occurred())
3330 return NULL;
3331 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
3332 returning a shared empty bytes string. This required because we
3333 want to call _PyBytes_Resize() the returned object, which we can
3334 only do on bytes objects with refcount == 1. */
Victor Stinner88d146b2014-08-17 21:12:18 +02003335 if (size == 0)
3336 size = 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003337 new = PyBytes_FromStringAndSize(NULL, size);
3338 if (new == NULL)
3339 return NULL;
Victor Stinner88d146b2014-08-17 21:12:18 +02003340 assert(Py_REFCNT(new) == 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003341
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003342 /* Get the iterator */
3343 it = PyObject_GetIter(x);
3344 if (it == NULL)
3345 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003346
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003347 /* Run the iterator to exhaustion */
3348 for (i = 0; ; i++) {
3349 PyObject *item;
3350 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003351
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003352 /* Get the next item */
3353 item = PyIter_Next(it);
3354 if (item == NULL) {
3355 if (PyErr_Occurred())
3356 goto error;
3357 break;
3358 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003359
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003360 /* Interpret it as an int (__index__) */
3361 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3362 Py_DECREF(item);
3363 if (value == -1 && PyErr_Occurred())
3364 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003365
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003366 /* Range check */
3367 if (value < 0 || value >= 256) {
3368 PyErr_SetString(PyExc_ValueError,
3369 "bytes must be in range(0, 256)");
3370 goto error;
3371 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003372
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003373 /* Append the byte */
3374 if (i >= size) {
3375 size = 2 * size + 1;
3376 if (_PyBytes_Resize(&new, size) < 0)
3377 goto error;
3378 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003379 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003380 }
3381 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003382
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003383 /* Clean up and return success */
3384 Py_DECREF(it);
3385 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003386
3387 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003388 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01003389 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003390 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003391}
3392
3393static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02003394bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003395{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003396 PyObject *tmp, *pnew;
3397 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003398
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003399 assert(PyType_IsSubtype(type, &PyBytes_Type));
3400 tmp = bytes_new(&PyBytes_Type, args, kwds);
3401 if (tmp == NULL)
3402 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02003403 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003404 n = PyBytes_GET_SIZE(tmp);
3405 pnew = type->tp_alloc(type, n);
3406 if (pnew != NULL) {
3407 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3408 PyBytes_AS_STRING(tmp), n+1);
3409 ((PyBytesObject *)pnew)->ob_shash =
3410 ((PyBytesObject *)tmp)->ob_shash;
3411 }
3412 Py_DECREF(tmp);
3413 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003414}
3415
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003416PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003417"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003418bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003419bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003420bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3421bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003422\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003423Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003424 - an iterable yielding integers in range(256)\n\
3425 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003426 - any object implementing the buffer API.\n\
3427 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003428
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003429static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003430
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003431PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003432 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3433 "bytes",
3434 PyBytesObject_SIZE,
3435 sizeof(char),
3436 bytes_dealloc, /* tp_dealloc */
3437 0, /* tp_print */
3438 0, /* tp_getattr */
3439 0, /* tp_setattr */
3440 0, /* tp_reserved */
3441 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08003442 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003443 &bytes_as_sequence, /* tp_as_sequence */
3444 &bytes_as_mapping, /* tp_as_mapping */
3445 (hashfunc)bytes_hash, /* tp_hash */
3446 0, /* tp_call */
3447 bytes_str, /* tp_str */
3448 PyObject_GenericGetAttr, /* tp_getattro */
3449 0, /* tp_setattro */
3450 &bytes_as_buffer, /* tp_as_buffer */
3451 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3452 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3453 bytes_doc, /* tp_doc */
3454 0, /* tp_traverse */
3455 0, /* tp_clear */
3456 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3457 0, /* tp_weaklistoffset */
3458 bytes_iter, /* tp_iter */
3459 0, /* tp_iternext */
3460 bytes_methods, /* tp_methods */
3461 0, /* tp_members */
3462 0, /* tp_getset */
3463 &PyBaseObject_Type, /* tp_base */
3464 0, /* tp_dict */
3465 0, /* tp_descr_get */
3466 0, /* tp_descr_set */
3467 0, /* tp_dictoffset */
3468 0, /* tp_init */
3469 0, /* tp_alloc */
3470 bytes_new, /* tp_new */
3471 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003472};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003473
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003474void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003475PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003476{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003477 assert(pv != NULL);
3478 if (*pv == NULL)
3479 return;
3480 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003481 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003482 return;
3483 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003484
3485 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3486 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003487 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003488 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02003489
Antoine Pitrou161d6952014-05-01 14:36:20 +02003490 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003491 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02003492 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3493 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3494 Py_CLEAR(*pv);
3495 return;
3496 }
3497
3498 oldsize = PyBytes_GET_SIZE(*pv);
3499 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3500 PyErr_NoMemory();
3501 goto error;
3502 }
3503 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3504 goto error;
3505
3506 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3507 PyBuffer_Release(&wb);
3508 return;
3509
3510 error:
3511 PyBuffer_Release(&wb);
3512 Py_CLEAR(*pv);
3513 return;
3514 }
3515
3516 else {
3517 /* Multiple references, need to create new object */
3518 PyObject *v;
3519 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03003520 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02003521 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003522}
3523
3524void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003525PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003526{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003527 PyBytes_Concat(pv, w);
3528 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003529}
3530
3531
Ethan Furmanb95b5612015-01-23 20:05:18 -08003532/* The following function breaks the notion that bytes are immutable:
3533 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003534 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08003535 as creating a new bytes object and destroying the old one, only
3536 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003537 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08003538 Note that if there's not enough memory to resize the bytes object, the
3539 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003540 memory" exception is set, and -1 is returned. Else (on success) 0 is
3541 returned, and the value in *pv may or may not be the same as on input.
3542 As always, an extra byte is allocated for a trailing \0 byte (newsize
3543 does *not* include that), and a trailing \0 byte is stored.
3544*/
3545
3546int
3547_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3548{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003549 PyObject *v;
3550 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003551 v = *pv;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003552 if (!PyBytes_Check(v) || newsize < 0) {
3553 goto error;
3554 }
3555 if (Py_SIZE(v) == newsize) {
3556 /* return early if newsize equals to v->ob_size */
3557 return 0;
3558 }
3559 if (Py_REFCNT(v) != 1) {
3560 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003561 }
3562 /* XXX UNREF/NEWREF interface should be more symmetrical */
3563 _Py_DEC_REFTOTAL;
3564 _Py_ForgetReference(v);
3565 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003566 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003567 if (*pv == NULL) {
3568 PyObject_Del(v);
3569 PyErr_NoMemory();
3570 return -1;
3571 }
3572 _Py_NewReference(*pv);
3573 sv = (PyBytesObject *) *pv;
3574 Py_SIZE(sv) = newsize;
3575 sv->ob_sval[newsize] = '\0';
3576 sv->ob_shash = -1; /* invalidate cached hash value */
3577 return 0;
Berker Peksag4a72a7b2016-09-16 17:31:06 +03003578error:
3579 *pv = 0;
3580 Py_DECREF(v);
3581 PyErr_BadInternalCall();
3582 return -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003583}
3584
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003585void
3586PyBytes_Fini(void)
3587{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003588 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003589 for (i = 0; i < UCHAR_MAX + 1; i++)
3590 Py_CLEAR(characters[i]);
3591 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003592}
3593
Benjamin Peterson4116f362008-05-27 00:36:20 +00003594/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003595
3596typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003597 PyObject_HEAD
3598 Py_ssize_t it_index;
3599 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003600} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003601
3602static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003603striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003604{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003605 _PyObject_GC_UNTRACK(it);
3606 Py_XDECREF(it->it_seq);
3607 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003608}
3609
3610static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003611striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003612{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003613 Py_VISIT(it->it_seq);
3614 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003615}
3616
3617static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003618striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003619{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003620 PyBytesObject *seq;
3621 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003622
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003623 assert(it != NULL);
3624 seq = it->it_seq;
3625 if (seq == NULL)
3626 return NULL;
3627 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003628
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003629 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3630 item = PyLong_FromLong(
3631 (unsigned char)seq->ob_sval[it->it_index]);
3632 if (item != NULL)
3633 ++it->it_index;
3634 return item;
3635 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003636
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003637 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003638 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003639 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003640}
3641
3642static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003643striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003644{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003645 Py_ssize_t len = 0;
3646 if (it->it_seq)
3647 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3648 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003649}
3650
3651PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003652 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003653
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003654static PyObject *
3655striter_reduce(striterobject *it)
3656{
3657 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003658 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003659 it->it_seq, it->it_index);
3660 } else {
3661 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3662 if (u == NULL)
3663 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02003664 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003665 }
3666}
3667
3668PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3669
3670static PyObject *
3671striter_setstate(striterobject *it, PyObject *state)
3672{
3673 Py_ssize_t index = PyLong_AsSsize_t(state);
3674 if (index == -1 && PyErr_Occurred())
3675 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003676 if (it->it_seq != NULL) {
3677 if (index < 0)
3678 index = 0;
3679 else if (index > PyBytes_GET_SIZE(it->it_seq))
3680 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3681 it->it_index = index;
3682 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003683 Py_RETURN_NONE;
3684}
3685
3686PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3687
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003688static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003689 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3690 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003691 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3692 reduce_doc},
3693 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3694 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003695 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003696};
3697
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003698PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003699 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3700 "bytes_iterator", /* tp_name */
3701 sizeof(striterobject), /* tp_basicsize */
3702 0, /* tp_itemsize */
3703 /* methods */
3704 (destructor)striter_dealloc, /* tp_dealloc */
3705 0, /* tp_print */
3706 0, /* tp_getattr */
3707 0, /* tp_setattr */
3708 0, /* tp_reserved */
3709 0, /* tp_repr */
3710 0, /* tp_as_number */
3711 0, /* tp_as_sequence */
3712 0, /* tp_as_mapping */
3713 0, /* tp_hash */
3714 0, /* tp_call */
3715 0, /* tp_str */
3716 PyObject_GenericGetAttr, /* tp_getattro */
3717 0, /* tp_setattro */
3718 0, /* tp_as_buffer */
3719 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3720 0, /* tp_doc */
3721 (traverseproc)striter_traverse, /* tp_traverse */
3722 0, /* tp_clear */
3723 0, /* tp_richcompare */
3724 0, /* tp_weaklistoffset */
3725 PyObject_SelfIter, /* tp_iter */
3726 (iternextfunc)striter_next, /* tp_iternext */
3727 striter_methods, /* tp_methods */
3728 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003729};
3730
3731static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003732bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003733{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003734 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003735
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003736 if (!PyBytes_Check(seq)) {
3737 PyErr_BadInternalCall();
3738 return NULL;
3739 }
3740 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3741 if (it == NULL)
3742 return NULL;
3743 it->it_index = 0;
3744 Py_INCREF(seq);
3745 it->it_seq = (PyBytesObject *)seq;
3746 _PyObject_GC_TRACK(it);
3747 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003748}