blob: 46322aadf041fcbf7fbde0d87834084b68afd447 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +00008#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00009#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000010
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020011/*[clinic input]
Martin v. Löwis0efea322014-07-27 17:29:17 +020012class bytes "PyBytesObject*" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020013[clinic start generated code]*/
Martin v. Löwis0efea322014-07-27 17:29:17 +020014/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030016#include "clinic/bytesobject.c.h"
17
Christian Heimes2c9c7a52008-05-26 13:42:13 +000018#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000019Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000020#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000021
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022static PyBytesObject *characters[UCHAR_MAX + 1];
23static PyBytesObject *nullstring;
24
Mark Dickinsonfd24b322008-12-06 15:33:31 +000025/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
26 for a string of length n should request PyBytesObject_SIZE + n bytes.
27
28 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29 3 bytes per string allocation on a typical system.
30*/
31#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32
Christian Heimes2c9c7a52008-05-26 13:42:13 +000033/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000034 For PyBytes_FromString(), the parameter `str' points to a null-terminated
35 string containing exactly `size' bytes.
36
37 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
38 either NULL or else points to a string containing at least `size' bytes.
39 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
40 not have to be null-terminated. (Therefore it is safe to construct a
41 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
42 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
43 bytes (setting the last byte to the null terminating character) and you can
44 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000045 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000046 alter the data yourself, since the strings may be shared.
47
48 The PyObject member `op->ob_size', which denotes the number of "extra
49 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020050 allocated for string data, not counting the null terminating character.
51 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000052 PyBytes_FromStringAndSize()) or the length of the string in the `str'
53 parameter (for PyBytes_FromString()).
54*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020055static PyObject *
56_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000057{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020058 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020059 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020060
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000061 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000062#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000063 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000064#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 Py_INCREF(op);
66 return (PyObject *)op;
67 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000068
Victor Stinner049e5092014-08-17 22:20:00 +020069 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000070 PyErr_SetString(PyExc_OverflowError,
71 "byte string is too large");
72 return NULL;
73 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000074
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000075 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020076 if (use_calloc)
77 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
78 else
79 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 if (op == NULL)
81 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010082 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020084 if (!use_calloc)
85 op->ob_sval[size] = '\0';
86 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 if (size == 0) {
88 nullstring = op;
89 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020090 }
91 return (PyObject *) op;
92}
93
94PyObject *
95PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
96{
97 PyBytesObject *op;
98 if (size < 0) {
99 PyErr_SetString(PyExc_SystemError,
100 "Negative size passed to PyBytes_FromStringAndSize");
101 return NULL;
102 }
103 if (size == 1 && str != NULL &&
104 (op = characters[*str & UCHAR_MAX]) != NULL)
105 {
106#ifdef COUNT_ALLOCS
107 one_strings++;
108#endif
109 Py_INCREF(op);
110 return (PyObject *)op;
111 }
112
113 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
114 if (op == NULL)
115 return NULL;
116 if (str == NULL)
117 return (PyObject *) op;
118
119 Py_MEMCPY(op->ob_sval, str, size);
120 /* share short strings */
121 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 characters[*str & UCHAR_MAX] = op;
123 Py_INCREF(op);
124 }
125 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000126}
127
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000128PyObject *
129PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000130{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200131 size_t size;
132 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 assert(str != NULL);
135 size = strlen(str);
136 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
137 PyErr_SetString(PyExc_OverflowError,
138 "byte string is too long");
139 return NULL;
140 }
141 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000144#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
148 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000151#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 Py_INCREF(op);
153 return (PyObject *)op;
154 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 /* Inline PyObject_NewVar */
157 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
158 if (op == NULL)
159 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100160 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 op->ob_shash = -1;
162 Py_MEMCPY(op->ob_sval, str, size+1);
163 /* share short strings */
164 if (size == 0) {
165 nullstring = op;
166 Py_INCREF(op);
167 } else if (size == 1) {
168 characters[*str & UCHAR_MAX] = op;
169 Py_INCREF(op);
170 }
171 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000172}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000173
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000174PyObject *
175PyBytes_FromFormatV(const char *format, va_list vargs)
176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 va_list count;
178 Py_ssize_t n = 0;
179 const char* f;
180 char *s;
181 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000182
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000183 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 /* step 1: figure out how large a buffer we need */
185 for (f = format; *f; f++) {
186 if (*f == '%') {
187 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000188 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
192 * they don't affect the amount of space we reserve.
193 */
194 if ((*f == 'l' || *f == 'z') &&
195 (f[1] == 'd' || f[1] == 'u'))
196 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000197
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000198 switch (*f) {
199 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100200 {
201 int c = va_arg(count, int);
202 if (c < 0 || c > 255) {
203 PyErr_SetString(PyExc_OverflowError,
204 "PyBytes_FromFormatV(): %c format "
205 "expects an integer in range [0; 255]");
206 return NULL;
207 }
208 n++;
209 break;
210 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 case '%':
212 n++;
213 break;
214 case 'd': case 'u': case 'i': case 'x':
215 (void) va_arg(count, int);
216 /* 20 bytes is enough to hold a 64-bit
217 integer. Decimal takes the most space.
218 This isn't enough for octal. */
219 n += 20;
220 break;
221 case 's':
222 s = va_arg(count, char*);
223 n += strlen(s);
224 break;
225 case 'p':
226 (void) va_arg(count, int);
227 /* maximum 64-bit pointer representation:
228 * 0xffffffffffffffff
229 * so 19 characters is enough.
230 * XXX I count 18 -- what's the extra for?
231 */
232 n += 19;
233 break;
234 default:
235 /* if we stumble upon an unknown
236 formatting code, copy the rest of
237 the format string to the output
238 string. (we cannot just skip the
239 code, since there's no way to know
240 what's in the argument list) */
241 n += strlen(p);
242 goto expand;
243 }
244 } else
245 n++;
246 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000247 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 /* step 2: fill the buffer */
249 /* Since we've analyzed how much space we need for the worst case,
250 use sprintf directly instead of the slower PyOS_snprintf. */
251 string = PyBytes_FromStringAndSize(NULL, n);
252 if (!string)
253 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000254
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000256
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000257 for (f = format; *f; f++) {
258 if (*f == '%') {
259 const char* p = f++;
260 Py_ssize_t i;
261 int longflag = 0;
262 int size_tflag = 0;
263 /* parse the width.precision part (we're only
264 interested in the precision value, if any) */
265 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000266 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000267 n = (n*10) + *f++ - '0';
268 if (*f == '.') {
269 f++;
270 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000271 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 n = (n*10) + *f++ - '0';
273 }
David Malcolm96960882010-11-05 17:23:41 +0000274 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 f++;
276 /* handle the long flag, but only for %ld and %lu.
277 others can be added when necessary. */
278 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
279 longflag = 1;
280 ++f;
281 }
282 /* handle the size_t flag. */
283 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
284 size_tflag = 1;
285 ++f;
286 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000287
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000288 switch (*f) {
289 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100290 {
291 int c = va_arg(vargs, int);
292 /* c has been checked for overflow in the first step */
293 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000294 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100295 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 case 'd':
297 if (longflag)
298 sprintf(s, "%ld", va_arg(vargs, long));
299 else if (size_tflag)
300 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
301 va_arg(vargs, Py_ssize_t));
302 else
303 sprintf(s, "%d", va_arg(vargs, int));
304 s += strlen(s);
305 break;
306 case 'u':
307 if (longflag)
308 sprintf(s, "%lu",
309 va_arg(vargs, unsigned long));
310 else if (size_tflag)
311 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
312 va_arg(vargs, size_t));
313 else
314 sprintf(s, "%u",
315 va_arg(vargs, unsigned int));
316 s += strlen(s);
317 break;
318 case 'i':
319 sprintf(s, "%i", va_arg(vargs, int));
320 s += strlen(s);
321 break;
322 case 'x':
323 sprintf(s, "%x", va_arg(vargs, int));
324 s += strlen(s);
325 break;
326 case 's':
327 p = va_arg(vargs, char*);
328 i = strlen(p);
329 if (n > 0 && i > n)
330 i = n;
331 Py_MEMCPY(s, p, i);
332 s += i;
333 break;
334 case 'p':
335 sprintf(s, "%p", va_arg(vargs, void*));
336 /* %p is ill-defined: ensure leading 0x. */
337 if (s[1] == 'X')
338 s[1] = 'x';
339 else if (s[1] != 'x') {
340 memmove(s+2, s, strlen(s)+1);
341 s[0] = '0';
342 s[1] = 'x';
343 }
344 s += strlen(s);
345 break;
346 case '%':
347 *s++ = '%';
348 break;
349 default:
350 strcpy(s, p);
351 s += strlen(s);
352 goto end;
353 }
354 } else
355 *s++ = *f;
356 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000357
358 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
360 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361}
362
363PyObject *
364PyBytes_FromFormat(const char *format, ...)
365{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 PyObject* ret;
367 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368
369#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000373#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000374 ret = PyBytes_FromFormatV(format, vargs);
375 va_end(vargs);
376 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000377}
378
Ethan Furmanb95b5612015-01-23 20:05:18 -0800379/* Helpers for formatstring */
380
381Py_LOCAL_INLINE(PyObject *)
382getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
383{
384 Py_ssize_t argidx = *p_argidx;
385 if (argidx < arglen) {
386 (*p_argidx)++;
387 if (arglen < 0)
388 return args;
389 else
390 return PyTuple_GetItem(args, argidx);
391 }
392 PyErr_SetString(PyExc_TypeError,
393 "not enough arguments for format string");
394 return NULL;
395}
396
397/* Format codes
398 * F_LJUST '-'
399 * F_SIGN '+'
400 * F_BLANK ' '
401 * F_ALT '#'
402 * F_ZERO '0'
403 */
404#define F_LJUST (1<<0)
405#define F_SIGN (1<<1)
406#define F_BLANK (1<<2)
407#define F_ALT (1<<3)
408#define F_ZERO (1<<4)
409
410/* Returns a new reference to a PyBytes object, or NULL on failure. */
411
412static PyObject *
413formatfloat(PyObject *v, int flags, int prec, int type)
414{
415 char *p;
416 PyObject *result;
417 double x;
418
419 x = PyFloat_AsDouble(v);
420 if (x == -1.0 && PyErr_Occurred()) {
421 PyErr_Format(PyExc_TypeError, "float argument required, "
422 "not %.200s", Py_TYPE(v)->tp_name);
423 return NULL;
424 }
425
426 if (prec < 0)
427 prec = 6;
428
429 p = PyOS_double_to_string(x, type, prec,
430 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
431
432 if (p == NULL)
433 return NULL;
434 result = PyBytes_FromStringAndSize(p, strlen(p));
435 PyMem_Free(p);
436 return result;
437}
438
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300439static PyObject *
440formatlong(PyObject *v, int flags, int prec, int type)
441{
442 PyObject *result, *iobj;
443 if (type == 'i')
444 type = 'd';
445 if (PyLong_Check(v))
446 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
447 if (PyNumber_Check(v)) {
448 /* make sure number is a type of integer for o, x, and X */
449 if (type == 'o' || type == 'x' || type == 'X')
450 iobj = PyNumber_Index(v);
451 else
452 iobj = PyNumber_Long(v);
453 if (iobj == NULL) {
454 if (!PyErr_ExceptionMatches(PyExc_TypeError))
455 return NULL;
456 }
457 else if (!PyLong_Check(iobj))
458 Py_CLEAR(iobj);
459 if (iobj != NULL) {
460 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
461 Py_DECREF(iobj);
462 return result;
463 }
464 }
465 PyErr_Format(PyExc_TypeError,
466 "%%%c format: %s is required, not %.200s", type,
467 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
468 : "a number",
469 Py_TYPE(v)->tp_name);
470 return NULL;
471}
472
473static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200474byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800475{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200476 if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) {
477 *p = PyBytes_AS_STRING(arg)[0];
478 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800479 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200480 else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) {
481 *p = PyByteArray_AS_STRING(arg)[0];
482 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800483 }
484 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300485 PyObject *iobj;
486 long ival;
487 int overflow;
488 /* make sure number is a type of integer */
489 if (PyLong_Check(arg)) {
490 ival = PyLong_AsLongAndOverflow(arg, &overflow);
491 }
492 else {
493 iobj = PyNumber_Index(arg);
494 if (iobj == NULL) {
495 if (!PyErr_ExceptionMatches(PyExc_TypeError))
496 return 0;
497 goto onError;
498 }
499 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
500 Py_DECREF(iobj);
501 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300502 if (!overflow && ival == -1 && PyErr_Occurred())
503 goto onError;
504 if (overflow || !(0 <= ival && ival <= 255)) {
505 PyErr_SetString(PyExc_OverflowError,
506 "%c arg not in range(256)");
507 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800508 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300509 *p = (char)ival;
510 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800511 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300512 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200513 PyErr_SetString(PyExc_TypeError,
514 "%c requires an integer in range(256) or a single byte");
515 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800516}
517
518static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200519format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800520{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200521 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800522 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800523 /* is it a bytes object? */
524 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200525 *pbuf = PyBytes_AS_STRING(v);
526 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800527 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200528 return v;
529 }
530 if (PyByteArray_Check(v)) {
531 *pbuf = PyByteArray_AS_STRING(v);
532 *plen = PyByteArray_GET_SIZE(v);
533 Py_INCREF(v);
534 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800535 }
536 /* does it support __bytes__? */
537 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
538 if (func != NULL) {
539 result = PyObject_CallFunctionObjArgs(func, NULL);
540 Py_DECREF(func);
541 if (result == NULL)
542 return NULL;
543 if (!PyBytes_Check(result)) {
544 PyErr_Format(PyExc_TypeError,
545 "__bytes__ returned non-bytes (type %.200s)",
546 Py_TYPE(result)->tp_name);
547 Py_DECREF(result);
548 return NULL;
549 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200550 *pbuf = PyBytes_AS_STRING(result);
551 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800552 return result;
553 }
554 PyErr_Format(PyExc_TypeError,
555 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
556 Py_TYPE(v)->tp_name);
557 return NULL;
558}
559
560/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
561
562 FORMATBUFLEN is the length of the buffer in which the ints &
563 chars are formatted. XXX This is a magic number. Each formatting
564 routine does bounds checking to ensure no overflow, but a better
565 solution may be to malloc a buffer of appropriate size for each
566 format. For now, the current solution is sufficient.
567*/
568#define FORMATBUFLEN (size_t)120
569
570PyObject *
571_PyBytes_Format(PyObject *format, PyObject *args)
572{
573 char *fmt, *res;
574 Py_ssize_t arglen, argidx;
575 Py_ssize_t reslen, rescnt, fmtcnt;
576 int args_owned = 0;
577 PyObject *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800578 PyObject *dict = NULL;
579 if (format == NULL || !PyBytes_Check(format) || args == NULL) {
580 PyErr_BadInternalCall();
581 return NULL;
582 }
583 fmt = PyBytes_AS_STRING(format);
584 fmtcnt = PyBytes_GET_SIZE(format);
585 reslen = rescnt = fmtcnt + 100;
586 result = PyBytes_FromStringAndSize((char *)NULL, reslen);
587 if (result == NULL)
588 return NULL;
589 res = PyBytes_AsString(result);
590 if (PyTuple_Check(args)) {
591 arglen = PyTuple_GET_SIZE(args);
592 argidx = 0;
593 }
594 else {
595 arglen = -1;
596 argidx = -2;
597 }
598 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
599 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
600 !PyByteArray_Check(args)) {
601 dict = args;
602 }
603 while (--fmtcnt >= 0) {
604 if (*fmt != '%') {
605 if (--rescnt < 0) {
606 rescnt = fmtcnt + 100;
607 reslen += rescnt;
608 if (_PyBytes_Resize(&result, reslen))
609 return NULL;
610 res = PyBytes_AS_STRING(result)
611 + reslen - rescnt;
612 --rescnt;
613 }
614 *res++ = *fmt++;
615 }
616 else {
617 /* Got a format specifier */
618 int flags = 0;
619 Py_ssize_t width = -1;
620 int prec = -1;
621 int c = '\0';
622 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800623 PyObject *v = NULL;
624 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200625 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800626 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200627 Py_ssize_t len = 0;
628 char onechar; /* For byte_converter() */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800629
Ethan Furmanb95b5612015-01-23 20:05:18 -0800630 fmt++;
631 if (*fmt == '(') {
632 char *keystart;
633 Py_ssize_t keylen;
634 PyObject *key;
635 int pcount = 1;
636
637 if (dict == NULL) {
638 PyErr_SetString(PyExc_TypeError,
639 "format requires a mapping");
640 goto error;
641 }
642 ++fmt;
643 --fmtcnt;
644 keystart = fmt;
645 /* Skip over balanced parentheses */
646 while (pcount > 0 && --fmtcnt >= 0) {
647 if (*fmt == ')')
648 --pcount;
649 else if (*fmt == '(')
650 ++pcount;
651 fmt++;
652 }
653 keylen = fmt - keystart - 1;
654 if (fmtcnt < 0 || pcount > 0) {
655 PyErr_SetString(PyExc_ValueError,
656 "incomplete format key");
657 goto error;
658 }
659 key = PyBytes_FromStringAndSize(keystart,
660 keylen);
661 if (key == NULL)
662 goto error;
663 if (args_owned) {
664 Py_DECREF(args);
665 args_owned = 0;
666 }
667 args = PyObject_GetItem(dict, key);
668 Py_DECREF(key);
669 if (args == NULL) {
670 goto error;
671 }
672 args_owned = 1;
673 arglen = -1;
674 argidx = -2;
675 }
676 while (--fmtcnt >= 0) {
677 switch (c = *fmt++) {
678 case '-': flags |= F_LJUST; continue;
679 case '+': flags |= F_SIGN; continue;
680 case ' ': flags |= F_BLANK; continue;
681 case '#': flags |= F_ALT; continue;
682 case '0': flags |= F_ZERO; continue;
683 }
684 break;
685 }
686 if (c == '*') {
687 v = getnextarg(args, arglen, &argidx);
688 if (v == NULL)
689 goto error;
690 if (!PyLong_Check(v)) {
691 PyErr_SetString(PyExc_TypeError,
692 "* wants int");
693 goto error;
694 }
695 width = PyLong_AsSsize_t(v);
696 if (width == -1 && PyErr_Occurred())
697 goto error;
698 if (width < 0) {
699 flags |= F_LJUST;
700 width = -width;
701 }
702 if (--fmtcnt >= 0)
703 c = *fmt++;
704 }
705 else if (c >= 0 && isdigit(c)) {
706 width = c - '0';
707 while (--fmtcnt >= 0) {
708 c = Py_CHARMASK(*fmt++);
709 if (!isdigit(c))
710 break;
711 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
712 PyErr_SetString(
713 PyExc_ValueError,
714 "width too big");
715 goto error;
716 }
717 width = width*10 + (c - '0');
718 }
719 }
720 if (c == '.') {
721 prec = 0;
722 if (--fmtcnt >= 0)
723 c = *fmt++;
724 if (c == '*') {
725 v = getnextarg(args, arglen, &argidx);
726 if (v == NULL)
727 goto error;
728 if (!PyLong_Check(v)) {
729 PyErr_SetString(
730 PyExc_TypeError,
731 "* wants int");
732 goto error;
733 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200734 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800735 if (prec == -1 && PyErr_Occurred())
736 goto error;
737 if (prec < 0)
738 prec = 0;
739 if (--fmtcnt >= 0)
740 c = *fmt++;
741 }
742 else if (c >= 0 && isdigit(c)) {
743 prec = c - '0';
744 while (--fmtcnt >= 0) {
745 c = Py_CHARMASK(*fmt++);
746 if (!isdigit(c))
747 break;
748 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
749 PyErr_SetString(
750 PyExc_ValueError,
751 "prec too big");
752 goto error;
753 }
754 prec = prec*10 + (c - '0');
755 }
756 }
757 } /* prec */
758 if (fmtcnt >= 0) {
759 if (c == 'h' || c == 'l' || c == 'L') {
760 if (--fmtcnt >= 0)
761 c = *fmt++;
762 }
763 }
764 if (fmtcnt < 0) {
765 PyErr_SetString(PyExc_ValueError,
766 "incomplete format");
767 goto error;
768 }
769 if (c != '%') {
770 v = getnextarg(args, arglen, &argidx);
771 if (v == NULL)
772 goto error;
773 }
774 sign = 0;
775 fill = ' ';
776 switch (c) {
777 case '%':
778 pbuf = "%";
779 len = 1;
780 break;
Ethan Furman62e977f2015-03-11 08:17:00 -0700781 case 'r':
782 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800783 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200784 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800785 if (temp == NULL)
786 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200787 assert(PyUnicode_IS_ASCII(temp));
788 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
789 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800790 if (prec >= 0 && len > prec)
791 len = prec;
792 break;
793 case 's':
794 // %s is only for 2/3 code; 3 only code should use %b
795 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200796 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800797 if (temp == NULL)
798 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800799 if (prec >= 0 && len > prec)
800 len = prec;
801 break;
802 case 'i':
803 case 'd':
804 case 'u':
805 case 'o':
806 case 'x':
807 case 'X':
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300808 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200809 if (!temp)
810 goto error;
811 assert(PyUnicode_IS_ASCII(temp));
812 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
813 len = PyUnicode_GET_LENGTH(temp);
814 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800815 if (flags & F_ZERO)
816 fill = '0';
817 break;
818 case 'e':
819 case 'E':
820 case 'f':
821 case 'F':
822 case 'g':
823 case 'G':
824 temp = formatfloat(v, flags, prec, c);
825 if (temp == NULL)
826 goto error;
827 pbuf = PyBytes_AS_STRING(temp);
828 len = PyBytes_GET_SIZE(temp);
829 sign = 1;
830 if (flags & F_ZERO)
831 fill = '0';
832 break;
833 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200834 pbuf = &onechar;
835 len = byte_converter(v, &onechar);
836 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800837 goto error;
838 break;
839 default:
840 PyErr_Format(PyExc_ValueError,
841 "unsupported format character '%c' (0x%x) "
842 "at index %zd",
843 c, c,
844 (Py_ssize_t)(fmt - 1 -
845 PyBytes_AsString(format)));
846 goto error;
847 }
848 if (sign) {
849 if (*pbuf == '-' || *pbuf == '+') {
850 sign = *pbuf++;
851 len--;
852 }
853 else if (flags & F_SIGN)
854 sign = '+';
855 else if (flags & F_BLANK)
856 sign = ' ';
857 else
858 sign = 0;
859 }
860 if (width < len)
861 width = len;
862 if (rescnt - (sign != 0) < width) {
863 reslen -= rescnt;
864 rescnt = width + fmtcnt + 100;
865 reslen += rescnt;
866 if (reslen < 0) {
867 Py_DECREF(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800868 Py_XDECREF(temp);
869 return PyErr_NoMemory();
870 }
871 if (_PyBytes_Resize(&result, reslen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800872 Py_XDECREF(temp);
873 return NULL;
874 }
875 res = PyBytes_AS_STRING(result)
876 + reslen - rescnt;
877 }
878 if (sign) {
879 if (fill != ' ')
880 *res++ = sign;
881 rescnt--;
882 if (width > len)
883 width--;
884 }
885 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
886 assert(pbuf[0] == '0');
887 assert(pbuf[1] == c);
888 if (fill != ' ') {
889 *res++ = *pbuf++;
890 *res++ = *pbuf++;
891 }
892 rescnt -= 2;
893 width -= 2;
894 if (width < 0)
895 width = 0;
896 len -= 2;
897 }
898 if (width > len && !(flags & F_LJUST)) {
899 do {
900 --rescnt;
901 *res++ = fill;
902 } while (--width > len);
903 }
904 if (fill == ' ') {
905 if (sign)
906 *res++ = sign;
907 if ((flags & F_ALT) &&
908 (c == 'x' || c == 'X')) {
909 assert(pbuf[0] == '0');
910 assert(pbuf[1] == c);
911 *res++ = *pbuf++;
912 *res++ = *pbuf++;
913 }
914 }
915 Py_MEMCPY(res, pbuf, len);
916 res += len;
917 rescnt -= len;
918 while (--width >= len) {
919 --rescnt;
920 *res++ = ' ';
921 }
922 if (dict && (argidx < arglen) && c != '%') {
923 PyErr_SetString(PyExc_TypeError,
924 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -0800925 Py_XDECREF(temp);
926 goto error;
927 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800928 Py_XDECREF(temp);
929 } /* '%' */
930 } /* until end */
931 if (argidx < arglen && !dict) {
932 PyErr_SetString(PyExc_TypeError,
933 "not all arguments converted during bytes formatting");
934 goto error;
935 }
936 if (args_owned) {
937 Py_DECREF(args);
938 }
939 if (_PyBytes_Resize(&result, reslen - rescnt))
940 return NULL;
941 return result;
942
943 error:
944 Py_DECREF(result);
945 if (args_owned) {
946 Py_DECREF(args);
947 }
948 return NULL;
949}
950
951/* =-= */
952
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000953static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000954bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000955{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000956 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000957}
958
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000959/* Unescape a backslash-escaped string. If unicode is non-zero,
960 the string is a u-literal. If recode_encoding is non-zero,
961 the string is UTF-8 encoded and should be re-encoded in the
962 specified encoding. */
963
964PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000965 Py_ssize_t len,
966 const char *errors,
967 Py_ssize_t unicode,
968 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000969{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000970 int c;
971 char *p, *buf;
972 const char *end;
973 PyObject *v;
974 Py_ssize_t newlen = recode_encoding ? 4*len:len;
975 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
976 if (v == NULL)
977 return NULL;
978 p = buf = PyBytes_AsString(v);
979 end = s + len;
980 while (s < end) {
981 if (*s != '\\') {
982 non_esc:
983 if (recode_encoding && (*s & 0x80)) {
984 PyObject *u, *w;
985 char *r;
986 const char* t;
987 Py_ssize_t rn;
988 t = s;
989 /* Decode non-ASCII bytes as UTF-8. */
990 while (t < end && (*t & 0x80)) t++;
991 u = PyUnicode_DecodeUTF8(s, t - s, errors);
992 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000993
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000994 /* Recode them in target encoding. */
995 w = PyUnicode_AsEncodedString(
996 u, recode_encoding, errors);
997 Py_DECREF(u);
998 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000999
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001000 /* Append bytes to output buffer. */
1001 assert(PyBytes_Check(w));
1002 r = PyBytes_AS_STRING(w);
1003 rn = PyBytes_GET_SIZE(w);
1004 Py_MEMCPY(p, r, rn);
1005 p += rn;
1006 Py_DECREF(w);
1007 s = t;
1008 } else {
1009 *p++ = *s++;
1010 }
1011 continue;
1012 }
1013 s++;
1014 if (s==end) {
1015 PyErr_SetString(PyExc_ValueError,
1016 "Trailing \\ in string");
1017 goto failed;
1018 }
1019 switch (*s++) {
1020 /* XXX This assumes ASCII! */
1021 case '\n': break;
1022 case '\\': *p++ = '\\'; break;
1023 case '\'': *p++ = '\''; break;
1024 case '\"': *p++ = '\"'; break;
1025 case 'b': *p++ = '\b'; break;
1026 case 'f': *p++ = '\014'; break; /* FF */
1027 case 't': *p++ = '\t'; break;
1028 case 'n': *p++ = '\n'; break;
1029 case 'r': *p++ = '\r'; break;
1030 case 'v': *p++ = '\013'; break; /* VT */
1031 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1032 case '0': case '1': case '2': case '3':
1033 case '4': case '5': case '6': case '7':
1034 c = s[-1] - '0';
1035 if (s < end && '0' <= *s && *s <= '7') {
1036 c = (c<<3) + *s++ - '0';
1037 if (s < end && '0' <= *s && *s <= '7')
1038 c = (c<<3) + *s++ - '0';
1039 }
1040 *p++ = c;
1041 break;
1042 case 'x':
David Malcolm96960882010-11-05 17:23:41 +00001043 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001044 unsigned int x = 0;
1045 c = Py_CHARMASK(*s);
1046 s++;
David Malcolm96960882010-11-05 17:23:41 +00001047 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001048 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001049 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001050 x = 10 + c - 'a';
1051 else
1052 x = 10 + c - 'A';
1053 x = x << 4;
1054 c = Py_CHARMASK(*s);
1055 s++;
David Malcolm96960882010-11-05 17:23:41 +00001056 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001057 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001058 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001059 x += 10 + c - 'a';
1060 else
1061 x += 10 + c - 'A';
1062 *p++ = x;
1063 break;
1064 }
1065 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001066 PyErr_Format(PyExc_ValueError,
1067 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001068 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001069 goto failed;
1070 }
1071 if (strcmp(errors, "replace") == 0) {
1072 *p++ = '?';
1073 } else if (strcmp(errors, "ignore") == 0)
1074 /* do nothing */;
1075 else {
1076 PyErr_Format(PyExc_ValueError,
1077 "decoding error; unknown "
1078 "error handling code: %.400s",
1079 errors);
1080 goto failed;
1081 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001082 /* skip \x */
1083 if (s < end && Py_ISXDIGIT(s[0]))
1084 s++; /* and a hexdigit */
1085 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 default:
1087 *p++ = '\\';
1088 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001089 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001090 UTF-8 bytes may follow. */
1091 }
1092 }
1093 if (p-buf < newlen)
1094 _PyBytes_Resize(&v, p - buf);
1095 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001096 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 Py_DECREF(v);
1098 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001099}
1100
1101/* -------------------------------------------------------------------- */
1102/* object api */
1103
1104Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001105PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001106{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001107 if (!PyBytes_Check(op)) {
1108 PyErr_Format(PyExc_TypeError,
1109 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1110 return -1;
1111 }
1112 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001113}
1114
1115char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001116PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001117{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001118 if (!PyBytes_Check(op)) {
1119 PyErr_Format(PyExc_TypeError,
1120 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1121 return NULL;
1122 }
1123 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001124}
1125
1126int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001127PyBytes_AsStringAndSize(PyObject *obj,
1128 char **s,
1129 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001130{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001131 if (s == NULL) {
1132 PyErr_BadInternalCall();
1133 return -1;
1134 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001135
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001136 if (!PyBytes_Check(obj)) {
1137 PyErr_Format(PyExc_TypeError,
1138 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1139 return -1;
1140 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001141
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001142 *s = PyBytes_AS_STRING(obj);
1143 if (len != NULL)
1144 *len = PyBytes_GET_SIZE(obj);
1145 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001146 PyErr_SetString(PyExc_ValueError,
1147 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001148 return -1;
1149 }
1150 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001151}
Neal Norwitz6968b052007-02-27 19:02:19 +00001152
1153/* -------------------------------------------------------------------- */
1154/* Methods */
1155
Eric Smith0923d1d2009-04-16 20:16:10 +00001156#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001157
1158#include "stringlib/fastsearch.h"
1159#include "stringlib/count.h"
1160#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001161#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001162#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001163#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001164#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001165
Eric Smith0f78bff2009-11-30 01:01:42 +00001166#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001167
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001168PyObject *
1169PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001170{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001171 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001172 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001173 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001174 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001175 unsigned char quote, *s, *p;
1176
1177 /* Compute size of output string */
1178 squotes = dquotes = 0;
1179 newsize = 3; /* b'' */
1180 s = (unsigned char*)op->ob_sval;
1181 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001182 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001183 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001184 case '\'': squotes++; break;
1185 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001186 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001187 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001188 default:
1189 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001190 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001191 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001192 if (newsize > PY_SSIZE_T_MAX - incr)
1193 goto overflow;
1194 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001195 }
1196 quote = '\'';
1197 if (smartquotes && squotes && !dquotes)
1198 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001199 if (squotes && quote == '\'') {
1200 if (newsize > PY_SSIZE_T_MAX - squotes)
1201 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001202 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001203 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001204
1205 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001206 if (v == NULL) {
1207 return NULL;
1208 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001209 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001210
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001211 *p++ = 'b', *p++ = quote;
1212 for (i = 0; i < length; i++) {
1213 unsigned char c = op->ob_sval[i];
1214 if (c == quote || c == '\\')
1215 *p++ = '\\', *p++ = c;
1216 else if (c == '\t')
1217 *p++ = '\\', *p++ = 't';
1218 else if (c == '\n')
1219 *p++ = '\\', *p++ = 'n';
1220 else if (c == '\r')
1221 *p++ = '\\', *p++ = 'r';
1222 else if (c < ' ' || c >= 0x7f) {
1223 *p++ = '\\';
1224 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001225 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1226 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001227 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001228 else
1229 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001230 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001231 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001232 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001233 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001234
1235 overflow:
1236 PyErr_SetString(PyExc_OverflowError,
1237 "bytes object is too large to make repr");
1238 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001239}
1240
Neal Norwitz6968b052007-02-27 19:02:19 +00001241static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001242bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001243{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001244 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001245}
1246
Neal Norwitz6968b052007-02-27 19:02:19 +00001247static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001248bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001249{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 if (Py_BytesWarningFlag) {
1251 if (PyErr_WarnEx(PyExc_BytesWarning,
1252 "str() on a bytes instance", 1))
1253 return NULL;
1254 }
1255 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001256}
1257
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001258static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001259bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001260{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001261 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001262}
Neal Norwitz6968b052007-02-27 19:02:19 +00001263
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001264/* This is also used by PyBytes_Concat() */
1265static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001266bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001267{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001268 Py_ssize_t size;
1269 Py_buffer va, vb;
1270 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001271
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001272 va.len = -1;
1273 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001274 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1275 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001276 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1277 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1278 goto done;
1279 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001280
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001281 /* Optimize end cases */
1282 if (va.len == 0 && PyBytes_CheckExact(b)) {
1283 result = b;
1284 Py_INCREF(result);
1285 goto done;
1286 }
1287 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1288 result = a;
1289 Py_INCREF(result);
1290 goto done;
1291 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001292
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001293 size = va.len + vb.len;
1294 if (size < 0) {
1295 PyErr_NoMemory();
1296 goto done;
1297 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001298
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001299 result = PyBytes_FromStringAndSize(NULL, size);
1300 if (result != NULL) {
1301 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1302 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1303 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001304
1305 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001306 if (va.len != -1)
1307 PyBuffer_Release(&va);
1308 if (vb.len != -1)
1309 PyBuffer_Release(&vb);
1310 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001311}
Neal Norwitz6968b052007-02-27 19:02:19 +00001312
1313static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001314bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001315{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001316 Py_ssize_t i;
1317 Py_ssize_t j;
1318 Py_ssize_t size;
1319 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001320 size_t nbytes;
1321 if (n < 0)
1322 n = 0;
1323 /* watch out for overflows: the size can overflow int,
1324 * and the # of bytes needed can overflow size_t
1325 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001326 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001327 PyErr_SetString(PyExc_OverflowError,
1328 "repeated bytes are too long");
1329 return NULL;
1330 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001331 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001332 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1333 Py_INCREF(a);
1334 return (PyObject *)a;
1335 }
1336 nbytes = (size_t)size;
1337 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1338 PyErr_SetString(PyExc_OverflowError,
1339 "repeated bytes are too long");
1340 return NULL;
1341 }
1342 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1343 if (op == NULL)
1344 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001345 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001346 op->ob_shash = -1;
1347 op->ob_sval[size] = '\0';
1348 if (Py_SIZE(a) == 1 && n > 0) {
1349 memset(op->ob_sval, a->ob_sval[0] , n);
1350 return (PyObject *) op;
1351 }
1352 i = 0;
1353 if (i < size) {
1354 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1355 i = Py_SIZE(a);
1356 }
1357 while (i < size) {
1358 j = (i <= size-i) ? i : size-i;
1359 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1360 i += j;
1361 }
1362 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001363}
1364
Guido van Rossum98297ee2007-11-06 21:34:58 +00001365static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001366bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001367{
1368 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1369 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001370 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +00001371 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +00001372 PyErr_Clear();
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001373 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
Antoine Pitroud1188562010-06-09 16:38:55 +00001374 return -1;
1375 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
1376 varg.buf, varg.len, 0);
1377 PyBuffer_Release(&varg);
1378 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001379 }
1380 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001381 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1382 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001383 }
1384
Antoine Pitrou0010d372010-08-15 17:12:55 +00001385 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001386}
1387
Neal Norwitz6968b052007-02-27 19:02:19 +00001388static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001389bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001390{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001391 if (i < 0 || i >= Py_SIZE(a)) {
1392 PyErr_SetString(PyExc_IndexError, "index out of range");
1393 return NULL;
1394 }
1395 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001396}
1397
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001398Py_LOCAL(int)
1399bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1400{
1401 int cmp;
1402 Py_ssize_t len;
1403
1404 len = Py_SIZE(a);
1405 if (Py_SIZE(b) != len)
1406 return 0;
1407
1408 if (a->ob_sval[0] != b->ob_sval[0])
1409 return 0;
1410
1411 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1412 return (cmp == 0);
1413}
1414
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001415static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001416bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001417{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001418 int c;
1419 Py_ssize_t len_a, len_b;
1420 Py_ssize_t min_len;
1421 PyObject *result;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001422 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001423
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001424 /* Make sure both arguments are strings. */
1425 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001426 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001427 rc = PyObject_IsInstance((PyObject*)a,
1428 (PyObject*)&PyUnicode_Type);
1429 if (!rc)
1430 rc = PyObject_IsInstance((PyObject*)b,
1431 (PyObject*)&PyUnicode_Type);
1432 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001433 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001434 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001435 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001436 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001437 return NULL;
1438 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001439 else {
1440 rc = PyObject_IsInstance((PyObject*)a,
1441 (PyObject*)&PyLong_Type);
1442 if (!rc)
1443 rc = PyObject_IsInstance((PyObject*)b,
1444 (PyObject*)&PyLong_Type);
1445 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001446 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001447 if (rc) {
1448 if (PyErr_WarnEx(PyExc_BytesWarning,
1449 "Comparison between bytes and int", 1))
1450 return NULL;
1451 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001452 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001453 }
1454 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001455 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001456 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001457 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001458 case Py_EQ:
1459 case Py_LE:
1460 case Py_GE:
1461 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001462 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001463 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001464 case Py_NE:
1465 case Py_LT:
1466 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001467 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001468 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001469 default:
1470 PyErr_BadArgument();
1471 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001472 }
1473 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001474 else if (op == Py_EQ || op == Py_NE) {
1475 int eq = bytes_compare_eq(a, b);
1476 eq ^= (op == Py_NE);
1477 result = eq ? Py_True : Py_False;
1478 }
1479 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001480 len_a = Py_SIZE(a);
1481 len_b = Py_SIZE(b);
1482 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001483 if (min_len > 0) {
1484 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001485 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001486 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001487 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001488 else
1489 c = 0;
1490 if (c == 0)
1491 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1492 switch (op) {
1493 case Py_LT: c = c < 0; break;
1494 case Py_LE: c = c <= 0; break;
1495 case Py_GT: c = c > 0; break;
1496 case Py_GE: c = c >= 0; break;
1497 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001498 PyErr_BadArgument();
1499 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001500 }
1501 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001502 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001503
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001504 Py_INCREF(result);
1505 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001506}
1507
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001508static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001509bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001510{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001511 if (a->ob_shash == -1) {
1512 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001513 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001514 }
1515 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001516}
1517
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001518static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001519bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001520{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001521 if (PyIndex_Check(item)) {
1522 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1523 if (i == -1 && PyErr_Occurred())
1524 return NULL;
1525 if (i < 0)
1526 i += PyBytes_GET_SIZE(self);
1527 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1528 PyErr_SetString(PyExc_IndexError,
1529 "index out of range");
1530 return NULL;
1531 }
1532 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1533 }
1534 else if (PySlice_Check(item)) {
1535 Py_ssize_t start, stop, step, slicelength, cur, i;
1536 char* source_buf;
1537 char* result_buf;
1538 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001539
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001540 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001541 PyBytes_GET_SIZE(self),
1542 &start, &stop, &step, &slicelength) < 0) {
1543 return NULL;
1544 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001545
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001546 if (slicelength <= 0) {
1547 return PyBytes_FromStringAndSize("", 0);
1548 }
1549 else if (start == 0 && step == 1 &&
1550 slicelength == PyBytes_GET_SIZE(self) &&
1551 PyBytes_CheckExact(self)) {
1552 Py_INCREF(self);
1553 return (PyObject *)self;
1554 }
1555 else if (step == 1) {
1556 return PyBytes_FromStringAndSize(
1557 PyBytes_AS_STRING(self) + start,
1558 slicelength);
1559 }
1560 else {
1561 source_buf = PyBytes_AS_STRING(self);
1562 result = PyBytes_FromStringAndSize(NULL, slicelength);
1563 if (result == NULL)
1564 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001565
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001566 result_buf = PyBytes_AS_STRING(result);
1567 for (cur = start, i = 0; i < slicelength;
1568 cur += step, i++) {
1569 result_buf[i] = source_buf[cur];
1570 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001571
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001572 return result;
1573 }
1574 }
1575 else {
1576 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001577 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001578 Py_TYPE(item)->tp_name);
1579 return NULL;
1580 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001581}
1582
1583static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001584bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001585{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001586 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1587 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001588}
1589
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001590static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001591 (lenfunc)bytes_length, /*sq_length*/
1592 (binaryfunc)bytes_concat, /*sq_concat*/
1593 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1594 (ssizeargfunc)bytes_item, /*sq_item*/
1595 0, /*sq_slice*/
1596 0, /*sq_ass_item*/
1597 0, /*sq_ass_slice*/
1598 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001599};
1600
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001601static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001602 (lenfunc)bytes_length,
1603 (binaryfunc)bytes_subscript,
1604 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001605};
1606
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001607static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001608 (getbufferproc)bytes_buffer_getbuffer,
1609 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001610};
1611
1612
1613#define LEFTSTRIP 0
1614#define RIGHTSTRIP 1
1615#define BOTHSTRIP 2
1616
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001617/*[clinic input]
1618bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001619
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001620 sep: object = None
1621 The delimiter according which to split the bytes.
1622 None (the default value) means split on ASCII whitespace characters
1623 (space, tab, return, newline, formfeed, vertical tab).
1624 maxsplit: Py_ssize_t = -1
1625 Maximum number of splits to do.
1626 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001627
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001628Return a list of the sections in the bytes, using sep as the delimiter.
1629[clinic start generated code]*/
1630
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001631static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001632bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001633/*[clinic end generated code: output=8bde44dacb36ef2e input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001634{
1635 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001636 const char *s = PyBytes_AS_STRING(self), *sub;
1637 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001638 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001639
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001640 if (maxsplit < 0)
1641 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001642 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001644 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001645 return NULL;
1646 sub = vsub.buf;
1647 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001648
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001649 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1650 PyBuffer_Release(&vsub);
1651 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001652}
1653
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001654/*[clinic input]
1655bytes.partition
1656
1657 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001658 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001659 /
1660
1661Partition the bytes into three parts using the given separator.
1662
1663This will search for the separator sep in the bytes. If the separator is found,
1664returns a 3-tuple containing the part before the separator, the separator
1665itself, and the part after it.
1666
1667If the separator is not found, returns a 3-tuple containing the original bytes
1668object and two empty bytes objects.
1669[clinic start generated code]*/
1670
Neal Norwitz6968b052007-02-27 19:02:19 +00001671static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001672bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001673/*[clinic end generated code: output=f532b392a17ff695 input=bc855dc63ca949de]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001674{
Neal Norwitz6968b052007-02-27 19:02:19 +00001675 return stringlib_partition(
1676 (PyObject*) self,
1677 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001678 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001679 );
1680}
1681
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001682/*[clinic input]
1683bytes.rpartition
1684
1685 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001686 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001687 /
1688
1689Partition the bytes into three parts using the given separator.
1690
1691This will search for the separator sep in the bytes, starting and the end. If
1692the separator is found, returns a 3-tuple containing the part before the
1693separator, the separator itself, and the part after it.
1694
1695If the separator is not found, returns a 3-tuple containing two empty bytes
1696objects and the original bytes object.
1697[clinic start generated code]*/
1698
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001699static PyObject *
1700bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001701/*[clinic end generated code: output=191b114cbb028e50 input=6588fff262a9170e]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001702{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001703 return stringlib_rpartition(
1704 (PyObject*) self,
1705 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001706 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001707 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001708}
1709
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001710/*[clinic input]
1711bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001712
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001713Return a list of the sections in the bytes, using sep as the delimiter.
1714
1715Splitting is done starting at the end of the bytes and working to the front.
1716[clinic start generated code]*/
1717
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001718static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001719bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001720/*[clinic end generated code: output=0b6570b977911d88 input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001721{
1722 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001723 const char *s = PyBytes_AS_STRING(self), *sub;
1724 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001725 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001726
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001727 if (maxsplit < 0)
1728 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001729 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001730 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001731 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001732 return NULL;
1733 sub = vsub.buf;
1734 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001735
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001736 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1737 PyBuffer_Release(&vsub);
1738 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001739}
1740
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001741
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001742/*[clinic input]
1743bytes.join
1744
1745 iterable_of_bytes: object
1746 /
1747
1748Concatenate any number of bytes objects.
1749
1750The bytes whose method is called is inserted in between each pair.
1751
1752The result is returned as a new bytes object.
1753
1754Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1755[clinic start generated code]*/
1756
Neal Norwitz6968b052007-02-27 19:02:19 +00001757static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001758bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001759/*[clinic end generated code: output=634aff14764ff997 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001760{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001761 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001762}
1763
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001764PyObject *
1765_PyBytes_Join(PyObject *sep, PyObject *x)
1766{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001767 assert(sep != NULL && PyBytes_Check(sep));
1768 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001769 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001770}
1771
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001772/* helper macro to fixup start/end slice values */
1773#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001774 if (end > len) \
1775 end = len; \
1776 else if (end < 0) { \
1777 end += len; \
1778 if (end < 0) \
1779 end = 0; \
1780 } \
1781 if (start < 0) { \
1782 start += len; \
1783 if (start < 0) \
1784 start = 0; \
1785 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001786
1787Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001788bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001789{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001790 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001791 char byte;
1792 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001793 const char *sub;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001794 Py_ssize_t len, sub_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001795 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001796 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001797
Antoine Pitrouac65d962011-10-20 23:54:17 +02001798 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1799 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001800 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001801
Antoine Pitrouac65d962011-10-20 23:54:17 +02001802 if (subobj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001803 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001804 return -2;
1805
1806 sub = subbuf.buf;
1807 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001808 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001809 else {
1810 sub = &byte;
1811 sub_len = 1;
1812 }
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001813 len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001814
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001815 ADJUST_INDICES(start, end, len);
1816 if (end - start < sub_len)
1817 res = -1;
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001818 else if (sub_len == 1
1819#ifndef HAVE_MEMRCHR
1820 && dir > 0
1821#endif
1822 ) {
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001823 unsigned char needle = *sub;
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001824 int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001825 res = stringlib_fastsearch_memchr_1char(
1826 PyBytes_AS_STRING(self) + start, end - start,
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001827 needle, needle, mode);
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001828 if (res >= 0)
1829 res += start;
1830 }
1831 else {
1832 if (dir > 0)
1833 res = stringlib_find_slice(
1834 PyBytes_AS_STRING(self), len,
1835 sub, sub_len, start, end);
1836 else
1837 res = stringlib_rfind_slice(
1838 PyBytes_AS_STRING(self), len,
1839 sub, sub_len, start, end);
1840 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001841
1842 if (subobj)
1843 PyBuffer_Release(&subbuf);
1844
1845 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001846}
1847
1848
1849PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001850"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001851\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001852Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001853such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001854arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001855\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001856Return -1 on failure.");
1857
Neal Norwitz6968b052007-02-27 19:02:19 +00001858static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001859bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001860{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001861 Py_ssize_t result = bytes_find_internal(self, args, +1);
1862 if (result == -2)
1863 return NULL;
1864 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001865}
1866
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001867
1868PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001869"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001870\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001871Like B.find() but raise ValueError when the substring is not found.");
1872
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001873static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001874bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001875{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001876 Py_ssize_t result = bytes_find_internal(self, args, +1);
1877 if (result == -2)
1878 return NULL;
1879 if (result == -1) {
1880 PyErr_SetString(PyExc_ValueError,
1881 "substring not found");
1882 return NULL;
1883 }
1884 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001885}
1886
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001887
1888PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001889"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001890\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001891Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001892such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001893arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001894\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001895Return -1 on failure.");
1896
Neal Norwitz6968b052007-02-27 19:02:19 +00001897static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001898bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001899{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001900 Py_ssize_t result = bytes_find_internal(self, args, -1);
1901 if (result == -2)
1902 return NULL;
1903 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001904}
1905
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001906
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001907PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001908"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001909\n\
1910Like B.rfind() but raise ValueError when the substring is not found.");
1911
1912static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001913bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001914{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001915 Py_ssize_t result = bytes_find_internal(self, args, -1);
1916 if (result == -2)
1917 return NULL;
1918 if (result == -1) {
1919 PyErr_SetString(PyExc_ValueError,
1920 "substring not found");
1921 return NULL;
1922 }
1923 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001924}
1925
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001926
1927Py_LOCAL_INLINE(PyObject *)
1928do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001929{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001930 Py_buffer vsep;
1931 char *s = PyBytes_AS_STRING(self);
1932 Py_ssize_t len = PyBytes_GET_SIZE(self);
1933 char *sep;
1934 Py_ssize_t seplen;
1935 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001936
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001937 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001938 return NULL;
1939 sep = vsep.buf;
1940 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001941
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001942 i = 0;
1943 if (striptype != RIGHTSTRIP) {
1944 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1945 i++;
1946 }
1947 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001948
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001949 j = len;
1950 if (striptype != LEFTSTRIP) {
1951 do {
1952 j--;
1953 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1954 j++;
1955 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001956
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001957 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001958
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001959 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1960 Py_INCREF(self);
1961 return (PyObject*)self;
1962 }
1963 else
1964 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001965}
1966
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001967
1968Py_LOCAL_INLINE(PyObject *)
1969do_strip(PyBytesObject *self, int striptype)
1970{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001971 char *s = PyBytes_AS_STRING(self);
1972 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001973
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001974 i = 0;
1975 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001976 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001977 i++;
1978 }
1979 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001980
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001981 j = len;
1982 if (striptype != LEFTSTRIP) {
1983 do {
1984 j--;
David Malcolm96960882010-11-05 17:23:41 +00001985 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001986 j++;
1987 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001988
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001989 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1990 Py_INCREF(self);
1991 return (PyObject*)self;
1992 }
1993 else
1994 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001995}
1996
1997
1998Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001999do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002000{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002001 if (bytes != NULL && bytes != Py_None) {
2002 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002003 }
2004 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002005}
2006
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002007/*[clinic input]
2008bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002009
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002010 self: self(type="PyBytesObject *")
2011 bytes: object = None
2012 /
2013
2014Strip leading and trailing bytes contained in the argument.
2015
2016If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2017[clinic start generated code]*/
2018
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002019static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002020bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002021/*[clinic end generated code: output=c7c228d3bd104a1b input=37daa5fad1395d95]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002022{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002023 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002024}
2025
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002026/*[clinic input]
2027bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002028
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002029 self: self(type="PyBytesObject *")
2030 bytes: object = None
2031 /
2032
2033Strip leading bytes contained in the argument.
2034
2035If the argument is omitted or None, strip leading ASCII whitespace.
2036[clinic start generated code]*/
2037
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002038static PyObject *
2039bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002040/*[clinic end generated code: output=28602e586f524e82 input=88811b09dfbc2988]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002041{
2042 return do_argstrip(self, LEFTSTRIP, bytes);
2043}
2044
2045/*[clinic input]
2046bytes.rstrip
2047
2048 self: self(type="PyBytesObject *")
2049 bytes: object = None
2050 /
2051
2052Strip trailing bytes contained in the argument.
2053
2054If the argument is omitted or None, strip trailing ASCII whitespace.
2055[clinic start generated code]*/
2056
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002057static PyObject *
2058bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002059/*[clinic end generated code: output=547e3815c95447da input=8f93c9cd361f0140]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002060{
2061 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002062}
Neal Norwitz6968b052007-02-27 19:02:19 +00002063
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002064
2065PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002066"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002067\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002068Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002069string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002070as in slice notation.");
2071
2072static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002073bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002074{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002075 PyObject *sub_obj;
2076 const char *str = PyBytes_AS_STRING(self), *sub;
2077 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02002078 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002079 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002080
Antoine Pitrouac65d962011-10-20 23:54:17 +02002081 Py_buffer vsub;
2082 PyObject *count_obj;
2083
2084 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
2085 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002086 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002087
Antoine Pitrouac65d962011-10-20 23:54:17 +02002088 if (sub_obj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002089 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02002090 return NULL;
2091
2092 sub = vsub.buf;
2093 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002094 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02002095 else {
2096 sub = &byte;
2097 sub_len = 1;
2098 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002099
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002100 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002101
Antoine Pitrouac65d962011-10-20 23:54:17 +02002102 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002103 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2104 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02002105
2106 if (sub_obj)
2107 PyBuffer_Release(&vsub);
2108
2109 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002110}
2111
2112
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002113/*[clinic input]
2114bytes.translate
2115
2116 self: self(type="PyBytesObject *")
Victor Stinner049e5092014-08-17 22:20:00 +02002117 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002118 Translation table, which must be a bytes object of length 256.
2119 [
2120 deletechars: object
2121 ]
2122 /
2123
2124Return a copy with each character mapped by the given translation table.
2125
2126All characters occurring in the optional argument deletechars are removed.
2127The remaining characters are mapped through the given translation table.
2128[clinic start generated code]*/
2129
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002130static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002131bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1,
2132 PyObject *deletechars)
2133/*[clinic end generated code: output=233df850eb50bf8d input=d8fa5519d7cc4be7]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002134{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002135 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002136 Py_buffer table_view = {NULL, NULL};
2137 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002138 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002139 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002140 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002141 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002142 Py_ssize_t inlen, tablen, dellen = 0;
2143 PyObject *result;
2144 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002145
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002146 if (PyBytes_Check(table)) {
2147 table_chars = PyBytes_AS_STRING(table);
2148 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002149 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002150 else if (table == Py_None) {
2151 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002152 tablen = 256;
2153 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002154 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002155 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002156 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002157 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002158 tablen = table_view.len;
2159 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002160
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002161 if (tablen != 256) {
2162 PyErr_SetString(PyExc_ValueError,
2163 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002164 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002165 return NULL;
2166 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002167
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002168 if (deletechars != NULL) {
2169 if (PyBytes_Check(deletechars)) {
2170 del_table_chars = PyBytes_AS_STRING(deletechars);
2171 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002172 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002173 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002174 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002175 PyBuffer_Release(&table_view);
2176 return NULL;
2177 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002178 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002179 dellen = del_table_view.len;
2180 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002181 }
2182 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002183 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002184 dellen = 0;
2185 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002187 inlen = PyBytes_GET_SIZE(input_obj);
2188 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002189 if (result == NULL) {
2190 PyBuffer_Release(&del_table_view);
2191 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002192 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002193 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002194 output_start = output = PyBytes_AsString(result);
2195 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002196
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002197 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002198 /* If no deletions are required, use faster code */
2199 for (i = inlen; --i >= 0; ) {
2200 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002201 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002202 changed = 1;
2203 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002204 if (!changed && PyBytes_CheckExact(input_obj)) {
2205 Py_INCREF(input_obj);
2206 Py_DECREF(result);
2207 result = input_obj;
2208 }
2209 PyBuffer_Release(&del_table_view);
2210 PyBuffer_Release(&table_view);
2211 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002212 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002213
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002214 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002215 for (i = 0; i < 256; i++)
2216 trans_table[i] = Py_CHARMASK(i);
2217 } else {
2218 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002219 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002220 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002221 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002222
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002223 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002224 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002225 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002226
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002227 for (i = inlen; --i >= 0; ) {
2228 c = Py_CHARMASK(*input++);
2229 if (trans_table[c] != -1)
2230 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2231 continue;
2232 changed = 1;
2233 }
2234 if (!changed && PyBytes_CheckExact(input_obj)) {
2235 Py_DECREF(result);
2236 Py_INCREF(input_obj);
2237 return input_obj;
2238 }
2239 /* Fix the size of the resulting string */
2240 if (inlen > 0)
2241 _PyBytes_Resize(&result, output - output_start);
2242 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002243}
2244
2245
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002246/*[clinic input]
2247
2248@staticmethod
2249bytes.maketrans
2250
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002251 frm: Py_buffer
2252 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002253 /
2254
2255Return a translation table useable for the bytes or bytearray translate method.
2256
2257The returned table will be one where each byte in frm is mapped to the byte at
2258the same position in to.
2259
2260The bytes objects frm and to must be of the same length.
2261[clinic start generated code]*/
2262
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002263static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002264bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002265/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002266{
2267 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002268}
2269
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002270/* find and count characters and substrings */
2271
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002272#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002273 ((char *)memchr((const void *)(target), c, target_len))
2274
2275/* String ops must return a string. */
2276/* If the object is subclass of string, create a copy */
2277Py_LOCAL(PyBytesObject *)
2278return_self(PyBytesObject *self)
2279{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002280 if (PyBytes_CheckExact(self)) {
2281 Py_INCREF(self);
2282 return self;
2283 }
2284 return (PyBytesObject *)PyBytes_FromStringAndSize(
2285 PyBytes_AS_STRING(self),
2286 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002287}
2288
2289Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00002290countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002291{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002292 Py_ssize_t count=0;
2293 const char *start=target;
2294 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002295
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002296 while ( (start=findchar(start, end-start, c)) != NULL ) {
2297 count++;
2298 if (count >= maxcount)
2299 break;
2300 start += 1;
2301 }
2302 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002303}
2304
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002305
2306/* Algorithms for different cases of string replacement */
2307
2308/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2309Py_LOCAL(PyBytesObject *)
2310replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002311 const char *to_s, Py_ssize_t to_len,
2312 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002313{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002314 char *self_s, *result_s;
2315 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002316 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002317 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002318
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002319 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002320
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002321 /* 1 at the end plus 1 after every character;
2322 count = min(maxcount, self_len + 1) */
2323 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002324 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002325 else
2326 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2327 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002328
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002329 /* Check for overflow */
2330 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002331 assert(count > 0);
2332 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002333 PyErr_SetString(PyExc_OverflowError,
2334 "replacement bytes are too long");
2335 return NULL;
2336 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002337 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002338
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002339 if (! (result = (PyBytesObject *)
2340 PyBytes_FromStringAndSize(NULL, result_len)) )
2341 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002342
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002343 self_s = PyBytes_AS_STRING(self);
2344 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002345
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002346 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002347
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002348 /* Lay the first one down (guaranteed this will occur) */
2349 Py_MEMCPY(result_s, to_s, to_len);
2350 result_s += to_len;
2351 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002352
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002353 for (i=0; i<count; i++) {
2354 *result_s++ = *self_s++;
2355 Py_MEMCPY(result_s, to_s, to_len);
2356 result_s += to_len;
2357 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002358
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002359 /* Copy the rest of the original string */
2360 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002361
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002362 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002363}
2364
2365/* Special case for deleting a single character */
2366/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2367Py_LOCAL(PyBytesObject *)
2368replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002369 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002370{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002371 char *self_s, *result_s;
2372 char *start, *next, *end;
2373 Py_ssize_t self_len, result_len;
2374 Py_ssize_t count;
2375 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002376
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002377 self_len = PyBytes_GET_SIZE(self);
2378 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002379
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002380 count = countchar(self_s, self_len, from_c, maxcount);
2381 if (count == 0) {
2382 return return_self(self);
2383 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002384
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002385 result_len = self_len - count; /* from_len == 1 */
2386 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002387
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002388 if ( (result = (PyBytesObject *)
2389 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2390 return NULL;
2391 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002392
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002393 start = self_s;
2394 end = self_s + self_len;
2395 while (count-- > 0) {
2396 next = findchar(start, end-start, from_c);
2397 if (next == NULL)
2398 break;
2399 Py_MEMCPY(result_s, start, next-start);
2400 result_s += (next-start);
2401 start = next+1;
2402 }
2403 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002404
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002405 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002406}
2407
2408/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2409
2410Py_LOCAL(PyBytesObject *)
2411replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002412 const char *from_s, Py_ssize_t from_len,
2413 Py_ssize_t maxcount) {
2414 char *self_s, *result_s;
2415 char *start, *next, *end;
2416 Py_ssize_t self_len, result_len;
2417 Py_ssize_t count, offset;
2418 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002419
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002420 self_len = PyBytes_GET_SIZE(self);
2421 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002422
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002423 count = stringlib_count(self_s, self_len,
2424 from_s, from_len,
2425 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002426
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002427 if (count == 0) {
2428 /* no matches */
2429 return return_self(self);
2430 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002431
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002432 result_len = self_len - (count * from_len);
2433 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002434
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002435 if ( (result = (PyBytesObject *)
2436 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2437 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002438
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002439 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002440
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002441 start = self_s;
2442 end = self_s + self_len;
2443 while (count-- > 0) {
2444 offset = stringlib_find(start, end-start,
2445 from_s, from_len,
2446 0);
2447 if (offset == -1)
2448 break;
2449 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002450
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002451 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002452
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002453 result_s += (next-start);
2454 start = next+from_len;
2455 }
2456 Py_MEMCPY(result_s, start, end-start);
2457 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002458}
2459
2460/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2461Py_LOCAL(PyBytesObject *)
2462replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002463 char from_c, char to_c,
2464 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002465{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002466 char *self_s, *result_s, *start, *end, *next;
2467 Py_ssize_t self_len;
2468 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002469
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002470 /* The result string will be the same size */
2471 self_s = PyBytes_AS_STRING(self);
2472 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002473
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002474 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002475
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002476 if (next == NULL) {
2477 /* No matches; return the original string */
2478 return return_self(self);
2479 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002480
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002481 /* Need to make a new string */
2482 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2483 if (result == NULL)
2484 return NULL;
2485 result_s = PyBytes_AS_STRING(result);
2486 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002487
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002488 /* change everything in-place, starting with this one */
2489 start = result_s + (next-self_s);
2490 *start = to_c;
2491 start++;
2492 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002493
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002494 while (--maxcount > 0) {
2495 next = findchar(start, end-start, from_c);
2496 if (next == NULL)
2497 break;
2498 *next = to_c;
2499 start = next+1;
2500 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002501
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002502 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002503}
2504
2505/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2506Py_LOCAL(PyBytesObject *)
2507replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002508 const char *from_s, Py_ssize_t from_len,
2509 const char *to_s, Py_ssize_t to_len,
2510 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002511{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002512 char *result_s, *start, *end;
2513 char *self_s;
2514 Py_ssize_t self_len, offset;
2515 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002516
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002517 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002518
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002519 self_s = PyBytes_AS_STRING(self);
2520 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002521
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002522 offset = stringlib_find(self_s, self_len,
2523 from_s, from_len,
2524 0);
2525 if (offset == -1) {
2526 /* No matches; return the original string */
2527 return return_self(self);
2528 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002529
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002530 /* Need to make a new string */
2531 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2532 if (result == NULL)
2533 return NULL;
2534 result_s = PyBytes_AS_STRING(result);
2535 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002536
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002537 /* change everything in-place, starting with this one */
2538 start = result_s + offset;
2539 Py_MEMCPY(start, to_s, from_len);
2540 start += from_len;
2541 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002542
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002543 while ( --maxcount > 0) {
2544 offset = stringlib_find(start, end-start,
2545 from_s, from_len,
2546 0);
2547 if (offset==-1)
2548 break;
2549 Py_MEMCPY(start+offset, to_s, from_len);
2550 start += offset+from_len;
2551 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002552
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002553 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002554}
2555
2556/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2557Py_LOCAL(PyBytesObject *)
2558replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002559 char from_c,
2560 const char *to_s, Py_ssize_t to_len,
2561 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002562{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002563 char *self_s, *result_s;
2564 char *start, *next, *end;
2565 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002566 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002567 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002568
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002569 self_s = PyBytes_AS_STRING(self);
2570 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002571
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002572 count = countchar(self_s, self_len, from_c, maxcount);
2573 if (count == 0) {
2574 /* no matches, return unchanged */
2575 return return_self(self);
2576 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002577
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002578 /* use the difference between current and new, hence the "-1" */
2579 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002580 assert(count > 0);
2581 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002582 PyErr_SetString(PyExc_OverflowError,
2583 "replacement bytes are too long");
2584 return NULL;
2585 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002586 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002587
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002588 if ( (result = (PyBytesObject *)
2589 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2590 return NULL;
2591 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002592
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002593 start = self_s;
2594 end = self_s + self_len;
2595 while (count-- > 0) {
2596 next = findchar(start, end-start, from_c);
2597 if (next == NULL)
2598 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002599
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002600 if (next == start) {
2601 /* replace with the 'to' */
2602 Py_MEMCPY(result_s, to_s, to_len);
2603 result_s += to_len;
2604 start += 1;
2605 } else {
2606 /* copy the unchanged old then the 'to' */
2607 Py_MEMCPY(result_s, start, next-start);
2608 result_s += (next-start);
2609 Py_MEMCPY(result_s, to_s, to_len);
2610 result_s += to_len;
2611 start = next+1;
2612 }
2613 }
2614 /* Copy the remainder of the remaining string */
2615 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002616
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002617 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002618}
2619
2620/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2621Py_LOCAL(PyBytesObject *)
2622replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002623 const char *from_s, Py_ssize_t from_len,
2624 const char *to_s, Py_ssize_t to_len,
2625 Py_ssize_t maxcount) {
2626 char *self_s, *result_s;
2627 char *start, *next, *end;
2628 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002629 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002630 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002631
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002632 self_s = PyBytes_AS_STRING(self);
2633 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002634
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002635 count = stringlib_count(self_s, self_len,
2636 from_s, from_len,
2637 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002638
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002639 if (count == 0) {
2640 /* no matches, return unchanged */
2641 return return_self(self);
2642 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002643
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002644 /* Check for overflow */
2645 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002646 assert(count > 0);
2647 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002648 PyErr_SetString(PyExc_OverflowError,
2649 "replacement bytes are too long");
2650 return NULL;
2651 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002652 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002653
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002654 if ( (result = (PyBytesObject *)
2655 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2656 return NULL;
2657 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002658
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002659 start = self_s;
2660 end = self_s + self_len;
2661 while (count-- > 0) {
2662 offset = stringlib_find(start, end-start,
2663 from_s, from_len,
2664 0);
2665 if (offset == -1)
2666 break;
2667 next = start+offset;
2668 if (next == start) {
2669 /* replace with the 'to' */
2670 Py_MEMCPY(result_s, to_s, to_len);
2671 result_s += to_len;
2672 start += from_len;
2673 } else {
2674 /* copy the unchanged old then the 'to' */
2675 Py_MEMCPY(result_s, start, next-start);
2676 result_s += (next-start);
2677 Py_MEMCPY(result_s, to_s, to_len);
2678 result_s += to_len;
2679 start = next+from_len;
2680 }
2681 }
2682 /* Copy the remainder of the remaining string */
2683 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002684
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002685 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002686}
2687
2688
2689Py_LOCAL(PyBytesObject *)
2690replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002691 const char *from_s, Py_ssize_t from_len,
2692 const char *to_s, Py_ssize_t to_len,
2693 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002694{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002695 if (maxcount < 0) {
2696 maxcount = PY_SSIZE_T_MAX;
2697 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2698 /* nothing to do; return the original string */
2699 return return_self(self);
2700 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002701
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002702 if (maxcount == 0 ||
2703 (from_len == 0 && to_len == 0)) {
2704 /* nothing to do; return the original string */
2705 return return_self(self);
2706 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002707
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002708 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002709
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002710 if (from_len == 0) {
2711 /* insert the 'to' string everywhere. */
2712 /* >>> "Python".replace("", ".") */
2713 /* '.P.y.t.h.o.n.' */
2714 return replace_interleave(self, to_s, to_len, maxcount);
2715 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002716
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002717 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2718 /* point for an empty self string to generate a non-empty string */
2719 /* Special case so the remaining code always gets a non-empty string */
2720 if (PyBytes_GET_SIZE(self) == 0) {
2721 return return_self(self);
2722 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002723
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002724 if (to_len == 0) {
2725 /* delete all occurrences of 'from' string */
2726 if (from_len == 1) {
2727 return replace_delete_single_character(
2728 self, from_s[0], maxcount);
2729 } else {
2730 return replace_delete_substring(self, from_s,
2731 from_len, maxcount);
2732 }
2733 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002734
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002735 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002736
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002737 if (from_len == to_len) {
2738 if (from_len == 1) {
2739 return replace_single_character_in_place(
2740 self,
2741 from_s[0],
2742 to_s[0],
2743 maxcount);
2744 } else {
2745 return replace_substring_in_place(
2746 self, from_s, from_len, to_s, to_len,
2747 maxcount);
2748 }
2749 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002750
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002751 /* Otherwise use the more generic algorithms */
2752 if (from_len == 1) {
2753 return replace_single_character(self, from_s[0],
2754 to_s, to_len, maxcount);
2755 } else {
2756 /* len('from')>=2, len('to')>=1 */
2757 return replace_substring(self, from_s, from_len, to_s, to_len,
2758 maxcount);
2759 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002760}
2761
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002762
2763/*[clinic input]
2764bytes.replace
2765
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002766 old: Py_buffer
2767 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002768 count: Py_ssize_t = -1
2769 Maximum number of occurrences to replace.
2770 -1 (the default value) means replace all occurrences.
2771 /
2772
2773Return a copy with all occurrences of substring old replaced by new.
2774
2775If the optional argument count is given, only the first count occurrences are
2776replaced.
2777[clinic start generated code]*/
2778
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002779static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002780bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new,
2781 Py_ssize_t count)
2782/*[clinic end generated code: output=403dc9d7a83c5a1d input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002783{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002784 return (PyObject *)replace((PyBytesObject *) self,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002785 (const char *)old->buf, old->len,
2786 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002787}
2788
2789/** End DALKE **/
2790
2791/* Matches the end (direction >= 0) or start (direction < 0) of self
2792 * against substr, using the start and end arguments. Returns
2793 * -1 on error, 0 if not found and 1 if found.
2794 */
2795Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002796_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002797 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002798{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002799 Py_ssize_t len = PyBytes_GET_SIZE(self);
2800 Py_ssize_t slen;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002801 Py_buffer sub_view = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002802 const char* sub;
2803 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002804
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002805 if (PyBytes_Check(substr)) {
2806 sub = PyBytes_AS_STRING(substr);
2807 slen = PyBytes_GET_SIZE(substr);
2808 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002809 else {
2810 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
2811 return -1;
2812 sub = sub_view.buf;
2813 slen = sub_view.len;
2814 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002815 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002816
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002817 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002818
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002819 if (direction < 0) {
2820 /* startswith */
2821 if (start+slen > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002822 goto notfound;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002823 } else {
2824 /* endswith */
2825 if (end-start < slen || start > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002826 goto notfound;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002827
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002828 if (end-slen > start)
2829 start = end - slen;
2830 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002831 if (end-start < slen)
2832 goto notfound;
2833 if (memcmp(str+start, sub, slen) != 0)
2834 goto notfound;
2835
2836 PyBuffer_Release(&sub_view);
2837 return 1;
2838
2839notfound:
2840 PyBuffer_Release(&sub_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002841 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002842}
2843
2844
2845PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002846"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002847\n\
2848Return True if B starts with the specified prefix, False otherwise.\n\
2849With optional start, test B beginning at that position.\n\
2850With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002851prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002852
2853static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002854bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002855{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002856 Py_ssize_t start = 0;
2857 Py_ssize_t end = PY_SSIZE_T_MAX;
2858 PyObject *subobj;
2859 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002860
Jesus Ceaac451502011-04-20 17:09:23 +02002861 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002862 return NULL;
2863 if (PyTuple_Check(subobj)) {
2864 Py_ssize_t i;
2865 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2866 result = _bytes_tailmatch(self,
2867 PyTuple_GET_ITEM(subobj, i),
2868 start, end, -1);
2869 if (result == -1)
2870 return NULL;
2871 else if (result) {
2872 Py_RETURN_TRUE;
2873 }
2874 }
2875 Py_RETURN_FALSE;
2876 }
2877 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002878 if (result == -1) {
2879 if (PyErr_ExceptionMatches(PyExc_TypeError))
2880 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2881 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002882 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002883 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002884 else
2885 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002886}
2887
2888
2889PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002890"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002891\n\
2892Return True if B ends with the specified suffix, False otherwise.\n\
2893With optional start, test B beginning at that position.\n\
2894With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002895suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002896
2897static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002898bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002899{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002900 Py_ssize_t start = 0;
2901 Py_ssize_t end = PY_SSIZE_T_MAX;
2902 PyObject *subobj;
2903 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002904
Jesus Ceaac451502011-04-20 17:09:23 +02002905 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002906 return NULL;
2907 if (PyTuple_Check(subobj)) {
2908 Py_ssize_t i;
2909 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2910 result = _bytes_tailmatch(self,
2911 PyTuple_GET_ITEM(subobj, i),
2912 start, end, +1);
2913 if (result == -1)
2914 return NULL;
2915 else if (result) {
2916 Py_RETURN_TRUE;
2917 }
2918 }
2919 Py_RETURN_FALSE;
2920 }
2921 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002922 if (result == -1) {
2923 if (PyErr_ExceptionMatches(PyExc_TypeError))
2924 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2925 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002926 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002927 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002928 else
2929 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002930}
2931
2932
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002933/*[clinic input]
2934bytes.decode
2935
2936 encoding: str(c_default="NULL") = 'utf-8'
2937 The encoding with which to decode the bytes.
2938 errors: str(c_default="NULL") = 'strict'
2939 The error handling scheme to use for the handling of decoding errors.
2940 The default is 'strict' meaning that decoding errors raise a
2941 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2942 as well as any other name registered with codecs.register_error that
2943 can handle UnicodeDecodeErrors.
2944
2945Decode the bytes using the codec registered for encoding.
2946[clinic start generated code]*/
2947
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002948static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002949bytes_decode_impl(PyBytesObject*self, const char *encoding,
2950 const char *errors)
2951/*[clinic end generated code: output=2d2016ff8e0bb176 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002952{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002953 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002954}
2955
Guido van Rossum20188312006-05-05 15:15:40 +00002956
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002957/*[clinic input]
2958bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002959
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03002960 keepends: int(c_default="0") = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002961
2962Return a list of the lines in the bytes, breaking at line boundaries.
2963
2964Line breaks are not included in the resulting list unless keepends is given and
2965true.
2966[clinic start generated code]*/
2967
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002968static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002969bytes_splitlines_impl(PyBytesObject*self, int keepends)
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03002970/*[clinic end generated code: output=995c3598f7833cad input=7f4aac67144f9944]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002971{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002972 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002973 (PyObject*) self, PyBytes_AS_STRING(self),
2974 PyBytes_GET_SIZE(self), keepends
2975 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002976}
2977
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002978static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002979hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002980{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002981 if (c >= 128)
2982 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002983 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002984 return c - '0';
2985 else {
David Malcolm96960882010-11-05 17:23:41 +00002986 if (Py_ISUPPER(c))
2987 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002988 if (c >= 'a' && c <= 'f')
2989 return c - 'a' + 10;
2990 }
2991 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002992}
2993
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002994/*[clinic input]
2995@classmethod
2996bytes.fromhex
2997
2998 string: unicode
2999 /
3000
3001Create a bytes object from a string of hexadecimal numbers.
3002
3003Spaces between two numbers are accepted.
3004Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
3005[clinic start generated code]*/
3006
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003007static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003008bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03003009/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003010{
3011 PyObject *newstring;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003012 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003013 Py_ssize_t hexlen, byteslen, i, j;
3014 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003015 void *data;
3016 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003017
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003018 assert(PyUnicode_Check(string));
3019 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003020 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003021 kind = PyUnicode_KIND(string);
3022 data = PyUnicode_DATA(string);
3023 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003024
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003025 byteslen = hexlen/2; /* This overestimates if there are spaces */
3026 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
3027 if (!newstring)
3028 return NULL;
3029 buf = PyBytes_AS_STRING(newstring);
3030 for (i = j = 0; i < hexlen; i += 2) {
3031 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003032 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003033 i++;
3034 if (i >= hexlen)
3035 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003036 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
3037 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003038 if (top == -1 || bot == -1) {
3039 PyErr_Format(PyExc_ValueError,
3040 "non-hexadecimal number found in "
3041 "fromhex() arg at position %zd", i);
3042 goto error;
3043 }
3044 buf[j++] = (top << 4) + bot;
3045 }
3046 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
3047 goto error;
3048 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003049
3050 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003051 Py_XDECREF(newstring);
3052 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003053}
3054
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003055PyDoc_STRVAR(hex__doc__,
3056"B.hex() -> string\n\
3057\n\
3058Create a string of hexadecimal numbers from a bytes object.\n\
3059Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
3060
3061static PyObject *
3062bytes_hex(PyBytesObject *self)
3063{
3064 char* argbuf = PyBytes_AS_STRING(self);
3065 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
3066 return _Py_strhex(argbuf, arglen);
3067}
3068
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003069static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003070bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003071{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003072 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003073}
3074
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003075
3076static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003077bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003078 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
3079 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3080 _Py_capitalize__doc__},
3081 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3082 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003083 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003084 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
3085 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02003086 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003087 expandtabs__doc__},
3088 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003089 BYTES_FROMHEX_METHODDEF
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003090 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003091 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3092 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3093 _Py_isalnum__doc__},
3094 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3095 _Py_isalpha__doc__},
3096 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3097 _Py_isdigit__doc__},
3098 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3099 _Py_islower__doc__},
3100 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3101 _Py_isspace__doc__},
3102 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3103 _Py_istitle__doc__},
3104 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3105 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003106 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003107 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3108 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003109 BYTES_LSTRIP_METHODDEF
3110 BYTES_MAKETRANS_METHODDEF
3111 BYTES_PARTITION_METHODDEF
3112 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003113 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3114 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3115 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003116 BYTES_RPARTITION_METHODDEF
3117 BYTES_RSPLIT_METHODDEF
3118 BYTES_RSTRIP_METHODDEF
3119 BYTES_SPLIT_METHODDEF
3120 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003121 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
3122 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003123 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003124 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3125 _Py_swapcase__doc__},
3126 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003127 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003128 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3129 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003130 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003131};
3132
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003133static PyObject *
Ethan Furmanb95b5612015-01-23 20:05:18 -08003134bytes_mod(PyObject *v, PyObject *w)
3135{
3136 if (!PyBytes_Check(v))
3137 Py_RETURN_NOTIMPLEMENTED;
3138 return _PyBytes_Format(v, w);
3139}
3140
3141static PyNumberMethods bytes_as_number = {
3142 0, /*nb_add*/
3143 0, /*nb_subtract*/
3144 0, /*nb_multiply*/
3145 bytes_mod, /*nb_remainder*/
3146};
3147
3148static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003149str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3150
3151static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003152bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003153{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003154 PyObject *x = NULL;
3155 const char *encoding = NULL;
3156 const char *errors = NULL;
3157 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003158 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003159 Py_ssize_t size;
3160 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003161 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003162
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003163 if (type != &PyBytes_Type)
3164 return str_subtype_new(type, args, kwds);
3165 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3166 &encoding, &errors))
3167 return NULL;
3168 if (x == NULL) {
3169 if (encoding != NULL || errors != NULL) {
3170 PyErr_SetString(PyExc_TypeError,
3171 "encoding or errors without sequence "
3172 "argument");
3173 return NULL;
3174 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003175 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003176 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003177
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003178 if (PyUnicode_Check(x)) {
3179 /* Encode via the codec registry */
3180 if (encoding == NULL) {
3181 PyErr_SetString(PyExc_TypeError,
3182 "string argument without an encoding");
3183 return NULL;
3184 }
3185 new = PyUnicode_AsEncodedString(x, encoding, errors);
3186 if (new == NULL)
3187 return NULL;
3188 assert(PyBytes_Check(new));
3189 return new;
3190 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003191
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003192 /* If it's not unicode, there can't be encoding or errors */
3193 if (encoding != NULL || errors != NULL) {
3194 PyErr_SetString(PyExc_TypeError,
3195 "encoding or errors without a string argument");
3196 return NULL;
3197 }
3198
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003199 /* We'd like to call PyObject_Bytes here, but we need to check for an
3200 integer argument before deferring to PyBytes_FromObject, something
3201 PyObject_Bytes doesn't do. */
3202 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3203 if (func != NULL) {
3204 new = PyObject_CallFunctionObjArgs(func, NULL);
3205 Py_DECREF(func);
3206 if (new == NULL)
3207 return NULL;
3208 if (!PyBytes_Check(new)) {
3209 PyErr_Format(PyExc_TypeError,
3210 "__bytes__ returned non-bytes (type %.200s)",
3211 Py_TYPE(new)->tp_name);
3212 Py_DECREF(new);
3213 return NULL;
3214 }
3215 return new;
3216 }
3217 else if (PyErr_Occurred())
3218 return NULL;
3219
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003220 /* Is it an integer? */
3221 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3222 if (size == -1 && PyErr_Occurred()) {
3223 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3224 return NULL;
3225 PyErr_Clear();
3226 }
3227 else if (size < 0) {
3228 PyErr_SetString(PyExc_ValueError, "negative count");
3229 return NULL;
3230 }
3231 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003232 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003233 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003234 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003235 return new;
3236 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003237
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003238 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003239}
3240
3241PyObject *
3242PyBytes_FromObject(PyObject *x)
3243{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003244 PyObject *new, *it;
3245 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003246
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003247 if (x == NULL) {
3248 PyErr_BadInternalCall();
3249 return NULL;
3250 }
Larry Hastingsca28e992012-05-24 22:58:30 -07003251
3252 if (PyBytes_CheckExact(x)) {
3253 Py_INCREF(x);
3254 return x;
3255 }
3256
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003257 /* Use the modern buffer interface */
3258 if (PyObject_CheckBuffer(x)) {
3259 Py_buffer view;
3260 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3261 return NULL;
3262 new = PyBytes_FromStringAndSize(NULL, view.len);
3263 if (!new)
3264 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003265 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3266 &view, view.len, 'C') < 0)
3267 goto fail;
3268 PyBuffer_Release(&view);
3269 return new;
3270 fail:
3271 Py_XDECREF(new);
3272 PyBuffer_Release(&view);
3273 return NULL;
3274 }
3275 if (PyUnicode_Check(x)) {
3276 PyErr_SetString(PyExc_TypeError,
3277 "cannot convert unicode object to bytes");
3278 return NULL;
3279 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003280
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003281 if (PyList_CheckExact(x)) {
3282 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3283 if (new == NULL)
3284 return NULL;
3285 for (i = 0; i < Py_SIZE(x); i++) {
3286 Py_ssize_t value = PyNumber_AsSsize_t(
3287 PyList_GET_ITEM(x, i), PyExc_ValueError);
3288 if (value == -1 && PyErr_Occurred()) {
3289 Py_DECREF(new);
3290 return NULL;
3291 }
3292 if (value < 0 || value >= 256) {
3293 PyErr_SetString(PyExc_ValueError,
3294 "bytes must be in range(0, 256)");
3295 Py_DECREF(new);
3296 return NULL;
3297 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003298 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003299 }
3300 return new;
3301 }
3302 if (PyTuple_CheckExact(x)) {
3303 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3304 if (new == NULL)
3305 return NULL;
3306 for (i = 0; i < Py_SIZE(x); i++) {
3307 Py_ssize_t value = PyNumber_AsSsize_t(
3308 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
3309 if (value == -1 && PyErr_Occurred()) {
3310 Py_DECREF(new);
3311 return NULL;
3312 }
3313 if (value < 0 || value >= 256) {
3314 PyErr_SetString(PyExc_ValueError,
3315 "bytes must be in range(0, 256)");
3316 Py_DECREF(new);
3317 return NULL;
3318 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003319 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003320 }
3321 return new;
3322 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00003323
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003324 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003325 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003326 if (size == -1 && PyErr_Occurred())
3327 return NULL;
3328 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
3329 returning a shared empty bytes string. This required because we
3330 want to call _PyBytes_Resize() the returned object, which we can
3331 only do on bytes objects with refcount == 1. */
Victor Stinner88d146b2014-08-17 21:12:18 +02003332 if (size == 0)
3333 size = 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003334 new = PyBytes_FromStringAndSize(NULL, size);
3335 if (new == NULL)
3336 return NULL;
Victor Stinner88d146b2014-08-17 21:12:18 +02003337 assert(Py_REFCNT(new) == 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003338
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003339 /* Get the iterator */
3340 it = PyObject_GetIter(x);
3341 if (it == NULL)
3342 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003343
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003344 /* Run the iterator to exhaustion */
3345 for (i = 0; ; i++) {
3346 PyObject *item;
3347 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003348
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003349 /* Get the next item */
3350 item = PyIter_Next(it);
3351 if (item == NULL) {
3352 if (PyErr_Occurred())
3353 goto error;
3354 break;
3355 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003356
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003357 /* Interpret it as an int (__index__) */
3358 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3359 Py_DECREF(item);
3360 if (value == -1 && PyErr_Occurred())
3361 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003362
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003363 /* Range check */
3364 if (value < 0 || value >= 256) {
3365 PyErr_SetString(PyExc_ValueError,
3366 "bytes must be in range(0, 256)");
3367 goto error;
3368 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003369
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003370 /* Append the byte */
3371 if (i >= size) {
3372 size = 2 * size + 1;
3373 if (_PyBytes_Resize(&new, size) < 0)
3374 goto error;
3375 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003376 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003377 }
3378 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003379
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003380 /* Clean up and return success */
3381 Py_DECREF(it);
3382 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003383
3384 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003385 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01003386 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003387 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003388}
3389
3390static PyObject *
3391str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3392{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003393 PyObject *tmp, *pnew;
3394 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003396 assert(PyType_IsSubtype(type, &PyBytes_Type));
3397 tmp = bytes_new(&PyBytes_Type, args, kwds);
3398 if (tmp == NULL)
3399 return NULL;
3400 assert(PyBytes_CheckExact(tmp));
3401 n = PyBytes_GET_SIZE(tmp);
3402 pnew = type->tp_alloc(type, n);
3403 if (pnew != NULL) {
3404 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3405 PyBytes_AS_STRING(tmp), n+1);
3406 ((PyBytesObject *)pnew)->ob_shash =
3407 ((PyBytesObject *)tmp)->ob_shash;
3408 }
3409 Py_DECREF(tmp);
3410 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003411}
3412
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003413PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003414"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003415bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003416bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003417bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3418bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003419\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003420Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003421 - an iterable yielding integers in range(256)\n\
3422 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003423 - any object implementing the buffer API.\n\
3424 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003425
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003426static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003427
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003428PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003429 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3430 "bytes",
3431 PyBytesObject_SIZE,
3432 sizeof(char),
3433 bytes_dealloc, /* tp_dealloc */
3434 0, /* tp_print */
3435 0, /* tp_getattr */
3436 0, /* tp_setattr */
3437 0, /* tp_reserved */
3438 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08003439 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003440 &bytes_as_sequence, /* tp_as_sequence */
3441 &bytes_as_mapping, /* tp_as_mapping */
3442 (hashfunc)bytes_hash, /* tp_hash */
3443 0, /* tp_call */
3444 bytes_str, /* tp_str */
3445 PyObject_GenericGetAttr, /* tp_getattro */
3446 0, /* tp_setattro */
3447 &bytes_as_buffer, /* tp_as_buffer */
3448 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3449 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3450 bytes_doc, /* tp_doc */
3451 0, /* tp_traverse */
3452 0, /* tp_clear */
3453 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3454 0, /* tp_weaklistoffset */
3455 bytes_iter, /* tp_iter */
3456 0, /* tp_iternext */
3457 bytes_methods, /* tp_methods */
3458 0, /* tp_members */
3459 0, /* tp_getset */
3460 &PyBaseObject_Type, /* tp_base */
3461 0, /* tp_dict */
3462 0, /* tp_descr_get */
3463 0, /* tp_descr_set */
3464 0, /* tp_dictoffset */
3465 0, /* tp_init */
3466 0, /* tp_alloc */
3467 bytes_new, /* tp_new */
3468 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003469};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003470
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003471void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003472PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003473{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003474 assert(pv != NULL);
3475 if (*pv == NULL)
3476 return;
3477 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003478 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003479 return;
3480 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003481
3482 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3483 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003484 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003485 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02003486
Antoine Pitrou161d6952014-05-01 14:36:20 +02003487 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003488 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02003489 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3490 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3491 Py_CLEAR(*pv);
3492 return;
3493 }
3494
3495 oldsize = PyBytes_GET_SIZE(*pv);
3496 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3497 PyErr_NoMemory();
3498 goto error;
3499 }
3500 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3501 goto error;
3502
3503 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3504 PyBuffer_Release(&wb);
3505 return;
3506
3507 error:
3508 PyBuffer_Release(&wb);
3509 Py_CLEAR(*pv);
3510 return;
3511 }
3512
3513 else {
3514 /* Multiple references, need to create new object */
3515 PyObject *v;
3516 v = bytes_concat(*pv, w);
3517 Py_DECREF(*pv);
3518 *pv = v;
3519 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003520}
3521
3522void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003523PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003524{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003525 PyBytes_Concat(pv, w);
3526 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003527}
3528
3529
Ethan Furmanb95b5612015-01-23 20:05:18 -08003530/* The following function breaks the notion that bytes are immutable:
3531 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003532 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08003533 as creating a new bytes object and destroying the old one, only
3534 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003535 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08003536 Note that if there's not enough memory to resize the bytes object, the
3537 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003538 memory" exception is set, and -1 is returned. Else (on success) 0 is
3539 returned, and the value in *pv may or may not be the same as on input.
3540 As always, an extra byte is allocated for a trailing \0 byte (newsize
3541 does *not* include that), and a trailing \0 byte is stored.
3542*/
3543
3544int
3545_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3546{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003547 PyObject *v;
3548 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003549 v = *pv;
3550 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3551 *pv = 0;
3552 Py_DECREF(v);
3553 PyErr_BadInternalCall();
3554 return -1;
3555 }
3556 /* XXX UNREF/NEWREF interface should be more symmetrical */
3557 _Py_DEC_REFTOTAL;
3558 _Py_ForgetReference(v);
3559 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003560 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003561 if (*pv == NULL) {
3562 PyObject_Del(v);
3563 PyErr_NoMemory();
3564 return -1;
3565 }
3566 _Py_NewReference(*pv);
3567 sv = (PyBytesObject *) *pv;
3568 Py_SIZE(sv) = newsize;
3569 sv->ob_sval[newsize] = '\0';
3570 sv->ob_shash = -1; /* invalidate cached hash value */
3571 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003572}
3573
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003574void
3575PyBytes_Fini(void)
3576{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003577 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003578 for (i = 0; i < UCHAR_MAX + 1; i++)
3579 Py_CLEAR(characters[i]);
3580 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003581}
3582
Benjamin Peterson4116f362008-05-27 00:36:20 +00003583/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003584
3585typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003586 PyObject_HEAD
3587 Py_ssize_t it_index;
3588 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003589} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003590
3591static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003592striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003593{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003594 _PyObject_GC_UNTRACK(it);
3595 Py_XDECREF(it->it_seq);
3596 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003597}
3598
3599static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003600striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003601{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003602 Py_VISIT(it->it_seq);
3603 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003604}
3605
3606static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003607striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003608{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003609 PyBytesObject *seq;
3610 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003611
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003612 assert(it != NULL);
3613 seq = it->it_seq;
3614 if (seq == NULL)
3615 return NULL;
3616 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003617
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003618 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3619 item = PyLong_FromLong(
3620 (unsigned char)seq->ob_sval[it->it_index]);
3621 if (item != NULL)
3622 ++it->it_index;
3623 return item;
3624 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003625
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003626 Py_DECREF(seq);
3627 it->it_seq = NULL;
3628 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003629}
3630
3631static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003632striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003633{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003634 Py_ssize_t len = 0;
3635 if (it->it_seq)
3636 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3637 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003638}
3639
3640PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003641 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003642
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003643static PyObject *
3644striter_reduce(striterobject *it)
3645{
3646 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003647 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003648 it->it_seq, it->it_index);
3649 } else {
3650 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3651 if (u == NULL)
3652 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02003653 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003654 }
3655}
3656
3657PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3658
3659static PyObject *
3660striter_setstate(striterobject *it, PyObject *state)
3661{
3662 Py_ssize_t index = PyLong_AsSsize_t(state);
3663 if (index == -1 && PyErr_Occurred())
3664 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003665 if (it->it_seq != NULL) {
3666 if (index < 0)
3667 index = 0;
3668 else if (index > PyBytes_GET_SIZE(it->it_seq))
3669 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3670 it->it_index = index;
3671 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003672 Py_RETURN_NONE;
3673}
3674
3675PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3676
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003677static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003678 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3679 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003680 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3681 reduce_doc},
3682 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3683 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003684 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003685};
3686
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003687PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003688 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3689 "bytes_iterator", /* tp_name */
3690 sizeof(striterobject), /* tp_basicsize */
3691 0, /* tp_itemsize */
3692 /* methods */
3693 (destructor)striter_dealloc, /* tp_dealloc */
3694 0, /* tp_print */
3695 0, /* tp_getattr */
3696 0, /* tp_setattr */
3697 0, /* tp_reserved */
3698 0, /* tp_repr */
3699 0, /* tp_as_number */
3700 0, /* tp_as_sequence */
3701 0, /* tp_as_mapping */
3702 0, /* tp_hash */
3703 0, /* tp_call */
3704 0, /* tp_str */
3705 PyObject_GenericGetAttr, /* tp_getattro */
3706 0, /* tp_setattro */
3707 0, /* tp_as_buffer */
3708 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3709 0, /* tp_doc */
3710 (traverseproc)striter_traverse, /* tp_traverse */
3711 0, /* tp_clear */
3712 0, /* tp_richcompare */
3713 0, /* tp_weaklistoffset */
3714 PyObject_SelfIter, /* tp_iter */
3715 (iternextfunc)striter_next, /* tp_iternext */
3716 striter_methods, /* tp_methods */
3717 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003718};
3719
3720static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003721bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003722{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003723 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003724
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003725 if (!PyBytes_Check(seq)) {
3726 PyErr_BadInternalCall();
3727 return NULL;
3728 }
3729 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3730 if (it == NULL)
3731 return NULL;
3732 it->it_index = 0;
3733 Py_INCREF(seq);
3734 it->it_seq = (PyBytesObject *)seq;
3735 _PyObject_GC_TRACK(it);
3736 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003737}
Victor Stinner00165072015-10-09 01:53:21 +02003738
3739
3740/* _PyBytesWriter API */
3741
3742#ifdef MS_WINDOWS
3743 /* On Windows, overallocate by 50% is the best factor */
3744# define OVERALLOCATE_FACTOR 2
3745#else
3746 /* On Linux, overallocate by 25% is the best factor */
3747# define OVERALLOCATE_FACTOR 4
3748#endif
3749
3750void
3751_PyBytesWriter_Init(_PyBytesWriter *writer)
3752{
3753 writer->buffer = NULL;
3754 writer->allocated = 0;
3755 writer->size = 0;
3756 writer->overallocate = 0;
3757 writer->use_stack_buffer = 0;
3758#ifdef Py_DEBUG
3759 memset(writer->stack_buffer, 0xCB, sizeof(writer->stack_buffer));
3760#endif
3761}
3762
3763void
3764_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3765{
3766 Py_CLEAR(writer->buffer);
3767}
3768
3769Py_LOCAL_INLINE(char*)
3770_PyBytesWriter_AsString(_PyBytesWriter *writer)
3771{
3772 if (!writer->use_stack_buffer) {
3773 assert(writer->buffer != NULL);
3774 return PyBytes_AS_STRING(writer->buffer);
3775 }
3776 else {
3777 assert(writer->buffer == NULL);
3778 return writer->stack_buffer;
3779 }
3780}
3781
3782Py_LOCAL_INLINE(Py_ssize_t)
3783_PyBytesWriter_GetPos(_PyBytesWriter *writer, char *str)
3784{
3785 char *start = _PyBytesWriter_AsString(writer);
3786 assert(str != NULL);
3787 assert(str >= start);
3788 return str - start;
3789}
3790
3791Py_LOCAL_INLINE(void)
3792_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3793{
3794#ifdef Py_DEBUG
3795 char *start, *end;
3796
3797 if (!writer->use_stack_buffer) {
3798 assert(writer->buffer != NULL);
3799 assert(PyBytes_CheckExact(writer->buffer));
3800 assert(Py_REFCNT(writer->buffer) == 1);
3801 }
3802 else {
3803 assert(writer->buffer == NULL);
3804 }
3805
3806 start = _PyBytesWriter_AsString(writer);
3807 assert(0 <= writer->size && writer->size <= writer->allocated);
3808 /* the last byte must always be null */
3809 assert(start[writer->allocated] == 0);
3810
3811 end = start + writer->allocated;
3812 assert(str != NULL);
3813 assert(start <= str && str <= end);
3814#endif
3815}
3816
3817char*
3818_PyBytesWriter_Prepare(_PyBytesWriter *writer, char *str, Py_ssize_t size)
3819{
3820 Py_ssize_t allocated, pos;
3821
3822 _PyBytesWriter_CheckConsistency(writer, str);
3823 assert(size >= 0);
3824
3825 if (size == 0) {
3826 /* nothing to do */
3827 return str;
3828 }
3829
3830 if (writer->size > PY_SSIZE_T_MAX - size) {
3831 PyErr_NoMemory();
3832 _PyBytesWriter_Dealloc(writer);
3833 return NULL;
3834 }
3835 writer->size += size;
3836
3837 allocated = writer->allocated;
3838 if (writer->size <= allocated)
3839 return str;
3840
3841 allocated = writer->size;
3842 if (writer->overallocate
3843 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3844 /* overallocate to limit the number of realloc() */
3845 allocated += allocated / OVERALLOCATE_FACTOR;
3846 }
3847
3848 pos = _PyBytesWriter_GetPos(writer, str);
3849 if (!writer->use_stack_buffer) {
3850 /* Note: Don't use a bytearray object because the conversion from
3851 byterray to bytes requires to copy all bytes. */
3852 if (_PyBytes_Resize(&writer->buffer, allocated)) {
3853 assert(writer->buffer == NULL);
3854 return NULL;
3855 }
3856 }
3857 else {
3858 /* convert from stack buffer to bytes object buffer */
3859 assert(writer->buffer == NULL);
3860
3861 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3862 if (writer->buffer == NULL)
3863 return NULL;
3864
3865 if (pos != 0) {
3866 Py_MEMCPY(PyBytes_AS_STRING(writer->buffer),
3867 writer->stack_buffer,
3868 pos);
3869 }
3870
3871#ifdef Py_DEBUG
3872 memset(writer->stack_buffer, 0xDB, sizeof(writer->stack_buffer));
3873#endif
3874
3875 writer->use_stack_buffer = 0;
3876 }
3877 writer->allocated = allocated;
3878
3879 str = _PyBytesWriter_AsString(writer) + pos;
3880 _PyBytesWriter_CheckConsistency(writer, str);
3881 return str;
3882}
3883
3884/* Allocate the buffer to write size bytes.
3885 Return the pointer to the beginning of buffer data.
3886 Raise an exception and return NULL on error. */
3887char*
3888_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3889{
3890 /* ensure that _PyBytesWriter_Alloc() is only called once */
3891 assert(writer->size == 0 && writer->buffer == NULL);
3892 assert(size >= 0);
3893
3894 writer->use_stack_buffer = 1;
3895#if Py_DEBUG
3896 /* the last byte is reserved, it must be '\0' */
3897 writer->stack_buffer[sizeof(writer->stack_buffer) - 1] = 0;
3898 writer->allocated = sizeof(writer->stack_buffer) - 1;
3899#else
3900 writer->allocated = sizeof(writer->stack_buffer);
3901#endif
3902 return _PyBytesWriter_Prepare(writer, writer->stack_buffer, size);
3903}
3904
3905PyObject *
3906_PyBytesWriter_Finish(_PyBytesWriter *writer, char *str)
3907{
3908 Py_ssize_t pos;
3909 PyObject *result;
3910
3911 _PyBytesWriter_CheckConsistency(writer, str);
3912
3913 pos = _PyBytesWriter_GetPos(writer, str);
3914 if (!writer->use_stack_buffer) {
3915 if (pos != writer->allocated) {
3916 if (_PyBytes_Resize(&writer->buffer, pos)) {
3917 assert(writer->buffer == NULL);
3918 return NULL;
3919 }
3920 }
3921
3922 result = writer->buffer;
3923 writer->buffer = NULL;
3924 }
3925 else {
3926 result = PyBytes_FromStringAndSize(writer->stack_buffer, pos);
3927 }
3928
3929 return result;
3930}