blob: 6a6e930f73a01fbfcdcdbeeeed4b583621325187 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +00008#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00009#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000010
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020011/*[clinic input]
Martin v. Löwis0efea322014-07-27 17:29:17 +020012class bytes "PyBytesObject*" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020013[clinic start generated code]*/
Martin v. Löwis0efea322014-07-27 17:29:17 +020014/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030016#include "clinic/bytesobject.c.h"
17
Christian Heimes2c9c7a52008-05-26 13:42:13 +000018#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000019Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000020#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000021
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022static PyBytesObject *characters[UCHAR_MAX + 1];
23static PyBytesObject *nullstring;
24
Mark Dickinsonfd24b322008-12-06 15:33:31 +000025/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
26 for a string of length n should request PyBytesObject_SIZE + n bytes.
27
28 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29 3 bytes per string allocation on a typical system.
30*/
31#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32
Christian Heimes2c9c7a52008-05-26 13:42:13 +000033/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000034 For PyBytes_FromString(), the parameter `str' points to a null-terminated
35 string containing exactly `size' bytes.
36
37 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
38 either NULL or else points to a string containing at least `size' bytes.
39 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
40 not have to be null-terminated. (Therefore it is safe to construct a
41 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
42 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
43 bytes (setting the last byte to the null terminating character) and you can
44 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000045 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000046 alter the data yourself, since the strings may be shared.
47
48 The PyObject member `op->ob_size', which denotes the number of "extra
49 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020050 allocated for string data, not counting the null terminating character.
51 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000052 PyBytes_FromStringAndSize()) or the length of the string in the `str'
53 parameter (for PyBytes_FromString()).
54*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020055static PyObject *
56_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000057{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020058 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020059 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020060
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000061 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000062#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000063 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000064#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 Py_INCREF(op);
66 return (PyObject *)op;
67 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000068
Victor Stinner049e5092014-08-17 22:20:00 +020069 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000070 PyErr_SetString(PyExc_OverflowError,
71 "byte string is too large");
72 return NULL;
73 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000074
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000075 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020076 if (use_calloc)
77 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
78 else
79 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 if (op == NULL)
81 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010082 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020084 if (!use_calloc)
85 op->ob_sval[size] = '\0';
86 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 if (size == 0) {
88 nullstring = op;
89 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020090 }
91 return (PyObject *) op;
92}
93
94PyObject *
95PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
96{
97 PyBytesObject *op;
98 if (size < 0) {
99 PyErr_SetString(PyExc_SystemError,
100 "Negative size passed to PyBytes_FromStringAndSize");
101 return NULL;
102 }
103 if (size == 1 && str != NULL &&
104 (op = characters[*str & UCHAR_MAX]) != NULL)
105 {
106#ifdef COUNT_ALLOCS
107 one_strings++;
108#endif
109 Py_INCREF(op);
110 return (PyObject *)op;
111 }
112
113 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
114 if (op == NULL)
115 return NULL;
116 if (str == NULL)
117 return (PyObject *) op;
118
119 Py_MEMCPY(op->ob_sval, str, size);
120 /* share short strings */
121 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 characters[*str & UCHAR_MAX] = op;
123 Py_INCREF(op);
124 }
125 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000126}
127
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000128PyObject *
129PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000130{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200131 size_t size;
132 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 assert(str != NULL);
135 size = strlen(str);
136 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
137 PyErr_SetString(PyExc_OverflowError,
138 "byte string is too long");
139 return NULL;
140 }
141 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000144#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
148 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000151#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 Py_INCREF(op);
153 return (PyObject *)op;
154 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 /* Inline PyObject_NewVar */
157 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
158 if (op == NULL)
159 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100160 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 op->ob_shash = -1;
162 Py_MEMCPY(op->ob_sval, str, size+1);
163 /* share short strings */
164 if (size == 0) {
165 nullstring = op;
166 Py_INCREF(op);
167 } else if (size == 1) {
168 characters[*str & UCHAR_MAX] = op;
169 Py_INCREF(op);
170 }
171 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000172}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000173
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000174PyObject *
175PyBytes_FromFormatV(const char *format, va_list vargs)
176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 va_list count;
178 Py_ssize_t n = 0;
179 const char* f;
180 char *s;
181 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000182
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000183 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 /* step 1: figure out how large a buffer we need */
185 for (f = format; *f; f++) {
186 if (*f == '%') {
187 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000188 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
192 * they don't affect the amount of space we reserve.
193 */
194 if ((*f == 'l' || *f == 'z') &&
195 (f[1] == 'd' || f[1] == 'u'))
196 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000197
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000198 switch (*f) {
199 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100200 {
201 int c = va_arg(count, int);
202 if (c < 0 || c > 255) {
203 PyErr_SetString(PyExc_OverflowError,
204 "PyBytes_FromFormatV(): %c format "
205 "expects an integer in range [0; 255]");
206 return NULL;
207 }
208 n++;
209 break;
210 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 case '%':
212 n++;
213 break;
214 case 'd': case 'u': case 'i': case 'x':
215 (void) va_arg(count, int);
216 /* 20 bytes is enough to hold a 64-bit
217 integer. Decimal takes the most space.
218 This isn't enough for octal. */
219 n += 20;
220 break;
221 case 's':
222 s = va_arg(count, char*);
223 n += strlen(s);
224 break;
225 case 'p':
226 (void) va_arg(count, int);
227 /* maximum 64-bit pointer representation:
228 * 0xffffffffffffffff
229 * so 19 characters is enough.
230 * XXX I count 18 -- what's the extra for?
231 */
232 n += 19;
233 break;
234 default:
235 /* if we stumble upon an unknown
236 formatting code, copy the rest of
237 the format string to the output
238 string. (we cannot just skip the
239 code, since there's no way to know
240 what's in the argument list) */
241 n += strlen(p);
242 goto expand;
243 }
244 } else
245 n++;
246 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000247 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 /* step 2: fill the buffer */
249 /* Since we've analyzed how much space we need for the worst case,
250 use sprintf directly instead of the slower PyOS_snprintf. */
251 string = PyBytes_FromStringAndSize(NULL, n);
252 if (!string)
253 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000254
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000256
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000257 for (f = format; *f; f++) {
258 if (*f == '%') {
259 const char* p = f++;
260 Py_ssize_t i;
261 int longflag = 0;
262 int size_tflag = 0;
263 /* parse the width.precision part (we're only
264 interested in the precision value, if any) */
265 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000266 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000267 n = (n*10) + *f++ - '0';
268 if (*f == '.') {
269 f++;
270 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000271 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 n = (n*10) + *f++ - '0';
273 }
David Malcolm96960882010-11-05 17:23:41 +0000274 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 f++;
276 /* handle the long flag, but only for %ld and %lu.
277 others can be added when necessary. */
278 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
279 longflag = 1;
280 ++f;
281 }
282 /* handle the size_t flag. */
283 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
284 size_tflag = 1;
285 ++f;
286 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000287
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000288 switch (*f) {
289 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100290 {
291 int c = va_arg(vargs, int);
292 /* c has been checked for overflow in the first step */
293 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000294 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100295 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 case 'd':
297 if (longflag)
298 sprintf(s, "%ld", va_arg(vargs, long));
299 else if (size_tflag)
300 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
301 va_arg(vargs, Py_ssize_t));
302 else
303 sprintf(s, "%d", va_arg(vargs, int));
304 s += strlen(s);
305 break;
306 case 'u':
307 if (longflag)
308 sprintf(s, "%lu",
309 va_arg(vargs, unsigned long));
310 else if (size_tflag)
311 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
312 va_arg(vargs, size_t));
313 else
314 sprintf(s, "%u",
315 va_arg(vargs, unsigned int));
316 s += strlen(s);
317 break;
318 case 'i':
319 sprintf(s, "%i", va_arg(vargs, int));
320 s += strlen(s);
321 break;
322 case 'x':
323 sprintf(s, "%x", va_arg(vargs, int));
324 s += strlen(s);
325 break;
326 case 's':
327 p = va_arg(vargs, char*);
328 i = strlen(p);
329 if (n > 0 && i > n)
330 i = n;
331 Py_MEMCPY(s, p, i);
332 s += i;
333 break;
334 case 'p':
335 sprintf(s, "%p", va_arg(vargs, void*));
336 /* %p is ill-defined: ensure leading 0x. */
337 if (s[1] == 'X')
338 s[1] = 'x';
339 else if (s[1] != 'x') {
340 memmove(s+2, s, strlen(s)+1);
341 s[0] = '0';
342 s[1] = 'x';
343 }
344 s += strlen(s);
345 break;
346 case '%':
347 *s++ = '%';
348 break;
349 default:
350 strcpy(s, p);
351 s += strlen(s);
352 goto end;
353 }
354 } else
355 *s++ = *f;
356 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000357
358 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
360 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361}
362
363PyObject *
364PyBytes_FromFormat(const char *format, ...)
365{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 PyObject* ret;
367 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368
369#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000373#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000374 ret = PyBytes_FromFormatV(format, vargs);
375 va_end(vargs);
376 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000377}
378
Ethan Furmanb95b5612015-01-23 20:05:18 -0800379/* Helpers for formatstring */
380
381Py_LOCAL_INLINE(PyObject *)
382getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
383{
384 Py_ssize_t argidx = *p_argidx;
385 if (argidx < arglen) {
386 (*p_argidx)++;
387 if (arglen < 0)
388 return args;
389 else
390 return PyTuple_GetItem(args, argidx);
391 }
392 PyErr_SetString(PyExc_TypeError,
393 "not enough arguments for format string");
394 return NULL;
395}
396
397/* Format codes
398 * F_LJUST '-'
399 * F_SIGN '+'
400 * F_BLANK ' '
401 * F_ALT '#'
402 * F_ZERO '0'
403 */
404#define F_LJUST (1<<0)
405#define F_SIGN (1<<1)
406#define F_BLANK (1<<2)
407#define F_ALT (1<<3)
408#define F_ZERO (1<<4)
409
410/* Returns a new reference to a PyBytes object, or NULL on failure. */
411
412static PyObject *
413formatfloat(PyObject *v, int flags, int prec, int type)
414{
415 char *p;
416 PyObject *result;
417 double x;
418
419 x = PyFloat_AsDouble(v);
420 if (x == -1.0 && PyErr_Occurred()) {
421 PyErr_Format(PyExc_TypeError, "float argument required, "
422 "not %.200s", Py_TYPE(v)->tp_name);
423 return NULL;
424 }
425
426 if (prec < 0)
427 prec = 6;
428
429 p = PyOS_double_to_string(x, type, prec,
430 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
431
432 if (p == NULL)
433 return NULL;
434 result = PyBytes_FromStringAndSize(p, strlen(p));
435 PyMem_Free(p);
436 return result;
437}
438
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300439static PyObject *
440formatlong(PyObject *v, int flags, int prec, int type)
441{
442 PyObject *result, *iobj;
443 if (type == 'i')
444 type = 'd';
445 if (PyLong_Check(v))
446 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
447 if (PyNumber_Check(v)) {
448 /* make sure number is a type of integer for o, x, and X */
449 if (type == 'o' || type == 'x' || type == 'X')
450 iobj = PyNumber_Index(v);
451 else
452 iobj = PyNumber_Long(v);
453 if (iobj == NULL) {
454 if (!PyErr_ExceptionMatches(PyExc_TypeError))
455 return NULL;
456 }
457 else if (!PyLong_Check(iobj))
458 Py_CLEAR(iobj);
459 if (iobj != NULL) {
460 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
461 Py_DECREF(iobj);
462 return result;
463 }
464 }
465 PyErr_Format(PyExc_TypeError,
466 "%%%c format: %s is required, not %.200s", type,
467 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
468 : "a number",
469 Py_TYPE(v)->tp_name);
470 return NULL;
471}
472
473static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200474byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800475{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200476 if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) {
477 *p = PyBytes_AS_STRING(arg)[0];
478 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800479 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200480 else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) {
481 *p = PyByteArray_AS_STRING(arg)[0];
482 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800483 }
484 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300485 PyObject *iobj;
486 long ival;
487 int overflow;
488 /* make sure number is a type of integer */
489 if (PyLong_Check(arg)) {
490 ival = PyLong_AsLongAndOverflow(arg, &overflow);
491 }
492 else {
493 iobj = PyNumber_Index(arg);
494 if (iobj == NULL) {
495 if (!PyErr_ExceptionMatches(PyExc_TypeError))
496 return 0;
497 goto onError;
498 }
499 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
500 Py_DECREF(iobj);
501 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300502 if (!overflow && ival == -1 && PyErr_Occurred())
503 goto onError;
504 if (overflow || !(0 <= ival && ival <= 255)) {
505 PyErr_SetString(PyExc_OverflowError,
506 "%c arg not in range(256)");
507 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800508 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300509 *p = (char)ival;
510 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800511 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300512 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200513 PyErr_SetString(PyExc_TypeError,
514 "%c requires an integer in range(256) or a single byte");
515 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800516}
517
518static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200519format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800520{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200521 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800522 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800523 /* is it a bytes object? */
524 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200525 *pbuf = PyBytes_AS_STRING(v);
526 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800527 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200528 return v;
529 }
530 if (PyByteArray_Check(v)) {
531 *pbuf = PyByteArray_AS_STRING(v);
532 *plen = PyByteArray_GET_SIZE(v);
533 Py_INCREF(v);
534 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800535 }
536 /* does it support __bytes__? */
537 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
538 if (func != NULL) {
539 result = PyObject_CallFunctionObjArgs(func, NULL);
540 Py_DECREF(func);
541 if (result == NULL)
542 return NULL;
543 if (!PyBytes_Check(result)) {
544 PyErr_Format(PyExc_TypeError,
545 "__bytes__ returned non-bytes (type %.200s)",
546 Py_TYPE(result)->tp_name);
547 Py_DECREF(result);
548 return NULL;
549 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200550 *pbuf = PyBytes_AS_STRING(result);
551 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800552 return result;
553 }
554 PyErr_Format(PyExc_TypeError,
555 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
556 Py_TYPE(v)->tp_name);
557 return NULL;
558}
559
560/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
561
562 FORMATBUFLEN is the length of the buffer in which the ints &
563 chars are formatted. XXX This is a magic number. Each formatting
564 routine does bounds checking to ensure no overflow, but a better
565 solution may be to malloc a buffer of appropriate size for each
566 format. For now, the current solution is sufficient.
567*/
568#define FORMATBUFLEN (size_t)120
569
570PyObject *
571_PyBytes_Format(PyObject *format, PyObject *args)
572{
573 char *fmt, *res;
574 Py_ssize_t arglen, argidx;
575 Py_ssize_t reslen, rescnt, fmtcnt;
576 int args_owned = 0;
577 PyObject *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800578 PyObject *dict = NULL;
579 if (format == NULL || !PyBytes_Check(format) || args == NULL) {
580 PyErr_BadInternalCall();
581 return NULL;
582 }
583 fmt = PyBytes_AS_STRING(format);
584 fmtcnt = PyBytes_GET_SIZE(format);
585 reslen = rescnt = fmtcnt + 100;
586 result = PyBytes_FromStringAndSize((char *)NULL, reslen);
587 if (result == NULL)
588 return NULL;
589 res = PyBytes_AsString(result);
590 if (PyTuple_Check(args)) {
591 arglen = PyTuple_GET_SIZE(args);
592 argidx = 0;
593 }
594 else {
595 arglen = -1;
596 argidx = -2;
597 }
598 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
599 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
600 !PyByteArray_Check(args)) {
601 dict = args;
602 }
603 while (--fmtcnt >= 0) {
604 if (*fmt != '%') {
605 if (--rescnt < 0) {
606 rescnt = fmtcnt + 100;
607 reslen += rescnt;
608 if (_PyBytes_Resize(&result, reslen))
609 return NULL;
610 res = PyBytes_AS_STRING(result)
611 + reslen - rescnt;
612 --rescnt;
613 }
614 *res++ = *fmt++;
615 }
616 else {
617 /* Got a format specifier */
618 int flags = 0;
619 Py_ssize_t width = -1;
620 int prec = -1;
621 int c = '\0';
622 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800623 PyObject *v = NULL;
624 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200625 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800626 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200627 Py_ssize_t len = 0;
628 char onechar; /* For byte_converter() */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800629
Ethan Furmanb95b5612015-01-23 20:05:18 -0800630 fmt++;
631 if (*fmt == '(') {
632 char *keystart;
633 Py_ssize_t keylen;
634 PyObject *key;
635 int pcount = 1;
636
637 if (dict == NULL) {
638 PyErr_SetString(PyExc_TypeError,
639 "format requires a mapping");
640 goto error;
641 }
642 ++fmt;
643 --fmtcnt;
644 keystart = fmt;
645 /* Skip over balanced parentheses */
646 while (pcount > 0 && --fmtcnt >= 0) {
647 if (*fmt == ')')
648 --pcount;
649 else if (*fmt == '(')
650 ++pcount;
651 fmt++;
652 }
653 keylen = fmt - keystart - 1;
654 if (fmtcnt < 0 || pcount > 0) {
655 PyErr_SetString(PyExc_ValueError,
656 "incomplete format key");
657 goto error;
658 }
659 key = PyBytes_FromStringAndSize(keystart,
660 keylen);
661 if (key == NULL)
662 goto error;
663 if (args_owned) {
664 Py_DECREF(args);
665 args_owned = 0;
666 }
667 args = PyObject_GetItem(dict, key);
668 Py_DECREF(key);
669 if (args == NULL) {
670 goto error;
671 }
672 args_owned = 1;
673 arglen = -1;
674 argidx = -2;
675 }
676 while (--fmtcnt >= 0) {
677 switch (c = *fmt++) {
678 case '-': flags |= F_LJUST; continue;
679 case '+': flags |= F_SIGN; continue;
680 case ' ': flags |= F_BLANK; continue;
681 case '#': flags |= F_ALT; continue;
682 case '0': flags |= F_ZERO; continue;
683 }
684 break;
685 }
686 if (c == '*') {
687 v = getnextarg(args, arglen, &argidx);
688 if (v == NULL)
689 goto error;
690 if (!PyLong_Check(v)) {
691 PyErr_SetString(PyExc_TypeError,
692 "* wants int");
693 goto error;
694 }
695 width = PyLong_AsSsize_t(v);
696 if (width == -1 && PyErr_Occurred())
697 goto error;
698 if (width < 0) {
699 flags |= F_LJUST;
700 width = -width;
701 }
702 if (--fmtcnt >= 0)
703 c = *fmt++;
704 }
705 else if (c >= 0 && isdigit(c)) {
706 width = c - '0';
707 while (--fmtcnt >= 0) {
708 c = Py_CHARMASK(*fmt++);
709 if (!isdigit(c))
710 break;
711 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
712 PyErr_SetString(
713 PyExc_ValueError,
714 "width too big");
715 goto error;
716 }
717 width = width*10 + (c - '0');
718 }
719 }
720 if (c == '.') {
721 prec = 0;
722 if (--fmtcnt >= 0)
723 c = *fmt++;
724 if (c == '*') {
725 v = getnextarg(args, arglen, &argidx);
726 if (v == NULL)
727 goto error;
728 if (!PyLong_Check(v)) {
729 PyErr_SetString(
730 PyExc_TypeError,
731 "* wants int");
732 goto error;
733 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200734 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800735 if (prec == -1 && PyErr_Occurred())
736 goto error;
737 if (prec < 0)
738 prec = 0;
739 if (--fmtcnt >= 0)
740 c = *fmt++;
741 }
742 else if (c >= 0 && isdigit(c)) {
743 prec = c - '0';
744 while (--fmtcnt >= 0) {
745 c = Py_CHARMASK(*fmt++);
746 if (!isdigit(c))
747 break;
748 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
749 PyErr_SetString(
750 PyExc_ValueError,
751 "prec too big");
752 goto error;
753 }
754 prec = prec*10 + (c - '0');
755 }
756 }
757 } /* prec */
758 if (fmtcnt >= 0) {
759 if (c == 'h' || c == 'l' || c == 'L') {
760 if (--fmtcnt >= 0)
761 c = *fmt++;
762 }
763 }
764 if (fmtcnt < 0) {
765 PyErr_SetString(PyExc_ValueError,
766 "incomplete format");
767 goto error;
768 }
769 if (c != '%') {
770 v = getnextarg(args, arglen, &argidx);
771 if (v == NULL)
772 goto error;
773 }
774 sign = 0;
775 fill = ' ';
776 switch (c) {
777 case '%':
778 pbuf = "%";
779 len = 1;
780 break;
Ethan Furman62e977f2015-03-11 08:17:00 -0700781 case 'r':
782 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800783 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200784 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800785 if (temp == NULL)
786 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200787 assert(PyUnicode_IS_ASCII(temp));
788 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
789 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800790 if (prec >= 0 && len > prec)
791 len = prec;
792 break;
793 case 's':
794 // %s is only for 2/3 code; 3 only code should use %b
795 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200796 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800797 if (temp == NULL)
798 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800799 if (prec >= 0 && len > prec)
800 len = prec;
801 break;
802 case 'i':
803 case 'd':
804 case 'u':
805 case 'o':
806 case 'x':
807 case 'X':
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300808 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200809 if (!temp)
810 goto error;
811 assert(PyUnicode_IS_ASCII(temp));
812 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
813 len = PyUnicode_GET_LENGTH(temp);
814 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800815 if (flags & F_ZERO)
816 fill = '0';
817 break;
818 case 'e':
819 case 'E':
820 case 'f':
821 case 'F':
822 case 'g':
823 case 'G':
824 temp = formatfloat(v, flags, prec, c);
825 if (temp == NULL)
826 goto error;
827 pbuf = PyBytes_AS_STRING(temp);
828 len = PyBytes_GET_SIZE(temp);
829 sign = 1;
830 if (flags & F_ZERO)
831 fill = '0';
832 break;
833 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200834 pbuf = &onechar;
835 len = byte_converter(v, &onechar);
836 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800837 goto error;
838 break;
839 default:
840 PyErr_Format(PyExc_ValueError,
841 "unsupported format character '%c' (0x%x) "
842 "at index %zd",
843 c, c,
844 (Py_ssize_t)(fmt - 1 -
845 PyBytes_AsString(format)));
846 goto error;
847 }
848 if (sign) {
849 if (*pbuf == '-' || *pbuf == '+') {
850 sign = *pbuf++;
851 len--;
852 }
853 else if (flags & F_SIGN)
854 sign = '+';
855 else if (flags & F_BLANK)
856 sign = ' ';
857 else
858 sign = 0;
859 }
860 if (width < len)
861 width = len;
862 if (rescnt - (sign != 0) < width) {
863 reslen -= rescnt;
864 rescnt = width + fmtcnt + 100;
865 reslen += rescnt;
866 if (reslen < 0) {
867 Py_DECREF(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800868 Py_XDECREF(temp);
869 return PyErr_NoMemory();
870 }
871 if (_PyBytes_Resize(&result, reslen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800872 Py_XDECREF(temp);
873 return NULL;
874 }
875 res = PyBytes_AS_STRING(result)
876 + reslen - rescnt;
877 }
878 if (sign) {
879 if (fill != ' ')
880 *res++ = sign;
881 rescnt--;
882 if (width > len)
883 width--;
884 }
885 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
886 assert(pbuf[0] == '0');
887 assert(pbuf[1] == c);
888 if (fill != ' ') {
889 *res++ = *pbuf++;
890 *res++ = *pbuf++;
891 }
892 rescnt -= 2;
893 width -= 2;
894 if (width < 0)
895 width = 0;
896 len -= 2;
897 }
898 if (width > len && !(flags & F_LJUST)) {
899 do {
900 --rescnt;
901 *res++ = fill;
902 } while (--width > len);
903 }
904 if (fill == ' ') {
905 if (sign)
906 *res++ = sign;
907 if ((flags & F_ALT) &&
908 (c == 'x' || c == 'X')) {
909 assert(pbuf[0] == '0');
910 assert(pbuf[1] == c);
911 *res++ = *pbuf++;
912 *res++ = *pbuf++;
913 }
914 }
915 Py_MEMCPY(res, pbuf, len);
916 res += len;
917 rescnt -= len;
918 while (--width >= len) {
919 --rescnt;
920 *res++ = ' ';
921 }
922 if (dict && (argidx < arglen) && c != '%') {
923 PyErr_SetString(PyExc_TypeError,
924 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -0800925 Py_XDECREF(temp);
926 goto error;
927 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800928 Py_XDECREF(temp);
929 } /* '%' */
930 } /* until end */
931 if (argidx < arglen && !dict) {
932 PyErr_SetString(PyExc_TypeError,
933 "not all arguments converted during bytes formatting");
934 goto error;
935 }
936 if (args_owned) {
937 Py_DECREF(args);
938 }
939 if (_PyBytes_Resize(&result, reslen - rescnt))
940 return NULL;
941 return result;
942
943 error:
944 Py_DECREF(result);
945 if (args_owned) {
946 Py_DECREF(args);
947 }
948 return NULL;
949}
950
951/* =-= */
952
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000953static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000954bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000955{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000956 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000957}
958
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000959/* Unescape a backslash-escaped string. If unicode is non-zero,
960 the string is a u-literal. If recode_encoding is non-zero,
961 the string is UTF-8 encoded and should be re-encoded in the
962 specified encoding. */
963
964PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000965 Py_ssize_t len,
966 const char *errors,
967 Py_ssize_t unicode,
968 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000969{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000970 int c;
971 char *p, *buf;
972 const char *end;
973 PyObject *v;
974 Py_ssize_t newlen = recode_encoding ? 4*len:len;
975 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
976 if (v == NULL)
977 return NULL;
978 p = buf = PyBytes_AsString(v);
979 end = s + len;
980 while (s < end) {
981 if (*s != '\\') {
982 non_esc:
983 if (recode_encoding && (*s & 0x80)) {
984 PyObject *u, *w;
985 char *r;
986 const char* t;
987 Py_ssize_t rn;
988 t = s;
989 /* Decode non-ASCII bytes as UTF-8. */
990 while (t < end && (*t & 0x80)) t++;
991 u = PyUnicode_DecodeUTF8(s, t - s, errors);
992 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000993
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000994 /* Recode them in target encoding. */
995 w = PyUnicode_AsEncodedString(
996 u, recode_encoding, errors);
997 Py_DECREF(u);
998 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000999
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001000 /* Append bytes to output buffer. */
1001 assert(PyBytes_Check(w));
1002 r = PyBytes_AS_STRING(w);
1003 rn = PyBytes_GET_SIZE(w);
1004 Py_MEMCPY(p, r, rn);
1005 p += rn;
1006 Py_DECREF(w);
1007 s = t;
1008 } else {
1009 *p++ = *s++;
1010 }
1011 continue;
1012 }
1013 s++;
1014 if (s==end) {
1015 PyErr_SetString(PyExc_ValueError,
1016 "Trailing \\ in string");
1017 goto failed;
1018 }
1019 switch (*s++) {
1020 /* XXX This assumes ASCII! */
1021 case '\n': break;
1022 case '\\': *p++ = '\\'; break;
1023 case '\'': *p++ = '\''; break;
1024 case '\"': *p++ = '\"'; break;
1025 case 'b': *p++ = '\b'; break;
1026 case 'f': *p++ = '\014'; break; /* FF */
1027 case 't': *p++ = '\t'; break;
1028 case 'n': *p++ = '\n'; break;
1029 case 'r': *p++ = '\r'; break;
1030 case 'v': *p++ = '\013'; break; /* VT */
1031 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1032 case '0': case '1': case '2': case '3':
1033 case '4': case '5': case '6': case '7':
1034 c = s[-1] - '0';
1035 if (s < end && '0' <= *s && *s <= '7') {
1036 c = (c<<3) + *s++ - '0';
1037 if (s < end && '0' <= *s && *s <= '7')
1038 c = (c<<3) + *s++ - '0';
1039 }
1040 *p++ = c;
1041 break;
1042 case 'x':
David Malcolm96960882010-11-05 17:23:41 +00001043 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001044 unsigned int x = 0;
1045 c = Py_CHARMASK(*s);
1046 s++;
David Malcolm96960882010-11-05 17:23:41 +00001047 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001048 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001049 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001050 x = 10 + c - 'a';
1051 else
1052 x = 10 + c - 'A';
1053 x = x << 4;
1054 c = Py_CHARMASK(*s);
1055 s++;
David Malcolm96960882010-11-05 17:23:41 +00001056 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001057 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001058 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001059 x += 10 + c - 'a';
1060 else
1061 x += 10 + c - 'A';
1062 *p++ = x;
1063 break;
1064 }
1065 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001066 PyErr_Format(PyExc_ValueError,
1067 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001068 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001069 goto failed;
1070 }
1071 if (strcmp(errors, "replace") == 0) {
1072 *p++ = '?';
1073 } else if (strcmp(errors, "ignore") == 0)
1074 /* do nothing */;
1075 else {
1076 PyErr_Format(PyExc_ValueError,
1077 "decoding error; unknown "
1078 "error handling code: %.400s",
1079 errors);
1080 goto failed;
1081 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001082 /* skip \x */
1083 if (s < end && Py_ISXDIGIT(s[0]))
1084 s++; /* and a hexdigit */
1085 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 default:
1087 *p++ = '\\';
1088 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001089 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001090 UTF-8 bytes may follow. */
1091 }
1092 }
1093 if (p-buf < newlen)
1094 _PyBytes_Resize(&v, p - buf);
1095 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001096 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 Py_DECREF(v);
1098 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001099}
1100
1101/* -------------------------------------------------------------------- */
1102/* object api */
1103
1104Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001105PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001106{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001107 if (!PyBytes_Check(op)) {
1108 PyErr_Format(PyExc_TypeError,
1109 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1110 return -1;
1111 }
1112 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001113}
1114
1115char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001116PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001117{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001118 if (!PyBytes_Check(op)) {
1119 PyErr_Format(PyExc_TypeError,
1120 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1121 return NULL;
1122 }
1123 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001124}
1125
1126int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001127PyBytes_AsStringAndSize(PyObject *obj,
1128 char **s,
1129 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001130{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001131 if (s == NULL) {
1132 PyErr_BadInternalCall();
1133 return -1;
1134 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001135
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001136 if (!PyBytes_Check(obj)) {
1137 PyErr_Format(PyExc_TypeError,
1138 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1139 return -1;
1140 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001141
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001142 *s = PyBytes_AS_STRING(obj);
1143 if (len != NULL)
1144 *len = PyBytes_GET_SIZE(obj);
1145 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001146 PyErr_SetString(PyExc_ValueError,
1147 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001148 return -1;
1149 }
1150 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001151}
Neal Norwitz6968b052007-02-27 19:02:19 +00001152
1153/* -------------------------------------------------------------------- */
1154/* Methods */
1155
Eric Smith0923d1d2009-04-16 20:16:10 +00001156#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001157
1158#include "stringlib/fastsearch.h"
1159#include "stringlib/count.h"
1160#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001161#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001162#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001163#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001164#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001165
Eric Smith0f78bff2009-11-30 01:01:42 +00001166#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001167
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001168PyObject *
1169PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001170{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001171 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001172 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001173 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001174 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001175 unsigned char quote, *s, *p;
1176
1177 /* Compute size of output string */
1178 squotes = dquotes = 0;
1179 newsize = 3; /* b'' */
1180 s = (unsigned char*)op->ob_sval;
1181 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001182 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001183 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001184 case '\'': squotes++; break;
1185 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001186 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001187 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001188 default:
1189 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001190 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001191 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001192 if (newsize > PY_SSIZE_T_MAX - incr)
1193 goto overflow;
1194 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001195 }
1196 quote = '\'';
1197 if (smartquotes && squotes && !dquotes)
1198 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001199 if (squotes && quote == '\'') {
1200 if (newsize > PY_SSIZE_T_MAX - squotes)
1201 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001202 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001203 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001204
1205 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001206 if (v == NULL) {
1207 return NULL;
1208 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001209 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001210
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001211 *p++ = 'b', *p++ = quote;
1212 for (i = 0; i < length; i++) {
1213 unsigned char c = op->ob_sval[i];
1214 if (c == quote || c == '\\')
1215 *p++ = '\\', *p++ = c;
1216 else if (c == '\t')
1217 *p++ = '\\', *p++ = 't';
1218 else if (c == '\n')
1219 *p++ = '\\', *p++ = 'n';
1220 else if (c == '\r')
1221 *p++ = '\\', *p++ = 'r';
1222 else if (c < ' ' || c >= 0x7f) {
1223 *p++ = '\\';
1224 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001225 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1226 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001227 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001228 else
1229 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001230 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001231 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001232 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001233 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001234
1235 overflow:
1236 PyErr_SetString(PyExc_OverflowError,
1237 "bytes object is too large to make repr");
1238 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001239}
1240
Neal Norwitz6968b052007-02-27 19:02:19 +00001241static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001242bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001243{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001244 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001245}
1246
Neal Norwitz6968b052007-02-27 19:02:19 +00001247static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001248bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001249{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 if (Py_BytesWarningFlag) {
1251 if (PyErr_WarnEx(PyExc_BytesWarning,
1252 "str() on a bytes instance", 1))
1253 return NULL;
1254 }
1255 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001256}
1257
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001258static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001259bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001260{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001261 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001262}
Neal Norwitz6968b052007-02-27 19:02:19 +00001263
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001264/* This is also used by PyBytes_Concat() */
1265static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001266bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001267{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001268 Py_ssize_t size;
1269 Py_buffer va, vb;
1270 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001271
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001272 va.len = -1;
1273 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001274 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1275 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001276 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1277 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1278 goto done;
1279 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001280
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001281 /* Optimize end cases */
1282 if (va.len == 0 && PyBytes_CheckExact(b)) {
1283 result = b;
1284 Py_INCREF(result);
1285 goto done;
1286 }
1287 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1288 result = a;
1289 Py_INCREF(result);
1290 goto done;
1291 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001292
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001293 size = va.len + vb.len;
1294 if (size < 0) {
1295 PyErr_NoMemory();
1296 goto done;
1297 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001298
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001299 result = PyBytes_FromStringAndSize(NULL, size);
1300 if (result != NULL) {
1301 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1302 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1303 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001304
1305 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001306 if (va.len != -1)
1307 PyBuffer_Release(&va);
1308 if (vb.len != -1)
1309 PyBuffer_Release(&vb);
1310 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001311}
Neal Norwitz6968b052007-02-27 19:02:19 +00001312
1313static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001314bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001315{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001316 Py_ssize_t i;
1317 Py_ssize_t j;
1318 Py_ssize_t size;
1319 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001320 size_t nbytes;
1321 if (n < 0)
1322 n = 0;
1323 /* watch out for overflows: the size can overflow int,
1324 * and the # of bytes needed can overflow size_t
1325 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001326 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001327 PyErr_SetString(PyExc_OverflowError,
1328 "repeated bytes are too long");
1329 return NULL;
1330 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001331 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001332 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1333 Py_INCREF(a);
1334 return (PyObject *)a;
1335 }
1336 nbytes = (size_t)size;
1337 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1338 PyErr_SetString(PyExc_OverflowError,
1339 "repeated bytes are too long");
1340 return NULL;
1341 }
1342 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1343 if (op == NULL)
1344 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001345 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001346 op->ob_shash = -1;
1347 op->ob_sval[size] = '\0';
1348 if (Py_SIZE(a) == 1 && n > 0) {
1349 memset(op->ob_sval, a->ob_sval[0] , n);
1350 return (PyObject *) op;
1351 }
1352 i = 0;
1353 if (i < size) {
1354 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1355 i = Py_SIZE(a);
1356 }
1357 while (i < size) {
1358 j = (i <= size-i) ? i : size-i;
1359 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1360 i += j;
1361 }
1362 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001363}
1364
Guido van Rossum98297ee2007-11-06 21:34:58 +00001365static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001366bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001367{
1368 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1369 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001370 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +00001371 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +00001372 PyErr_Clear();
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001373 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
Antoine Pitroud1188562010-06-09 16:38:55 +00001374 return -1;
1375 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
1376 varg.buf, varg.len, 0);
1377 PyBuffer_Release(&varg);
1378 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001379 }
1380 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001381 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1382 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001383 }
1384
Antoine Pitrou0010d372010-08-15 17:12:55 +00001385 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001386}
1387
Neal Norwitz6968b052007-02-27 19:02:19 +00001388static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001389bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001390{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001391 if (i < 0 || i >= Py_SIZE(a)) {
1392 PyErr_SetString(PyExc_IndexError, "index out of range");
1393 return NULL;
1394 }
1395 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001396}
1397
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001398Py_LOCAL(int)
1399bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1400{
1401 int cmp;
1402 Py_ssize_t len;
1403
1404 len = Py_SIZE(a);
1405 if (Py_SIZE(b) != len)
1406 return 0;
1407
1408 if (a->ob_sval[0] != b->ob_sval[0])
1409 return 0;
1410
1411 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1412 return (cmp == 0);
1413}
1414
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001415static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001416bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001417{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001418 int c;
1419 Py_ssize_t len_a, len_b;
1420 Py_ssize_t min_len;
1421 PyObject *result;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001422 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001423
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001424 /* Make sure both arguments are strings. */
1425 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001426 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001427 rc = PyObject_IsInstance((PyObject*)a,
1428 (PyObject*)&PyUnicode_Type);
1429 if (!rc)
1430 rc = PyObject_IsInstance((PyObject*)b,
1431 (PyObject*)&PyUnicode_Type);
1432 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001433 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001434 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001435 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001436 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001437 return NULL;
1438 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001439 else {
1440 rc = PyObject_IsInstance((PyObject*)a,
1441 (PyObject*)&PyLong_Type);
1442 if (!rc)
1443 rc = PyObject_IsInstance((PyObject*)b,
1444 (PyObject*)&PyLong_Type);
1445 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001446 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001447 if (rc) {
1448 if (PyErr_WarnEx(PyExc_BytesWarning,
1449 "Comparison between bytes and int", 1))
1450 return NULL;
1451 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001452 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001453 }
1454 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001455 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001456 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001457 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001458 case Py_EQ:
1459 case Py_LE:
1460 case Py_GE:
1461 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001462 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001463 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001464 case Py_NE:
1465 case Py_LT:
1466 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001467 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001468 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001469 default:
1470 PyErr_BadArgument();
1471 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001472 }
1473 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001474 else if (op == Py_EQ || op == Py_NE) {
1475 int eq = bytes_compare_eq(a, b);
1476 eq ^= (op == Py_NE);
1477 result = eq ? Py_True : Py_False;
1478 }
1479 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001480 len_a = Py_SIZE(a);
1481 len_b = Py_SIZE(b);
1482 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001483 if (min_len > 0) {
1484 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001485 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001486 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001487 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001488 else
1489 c = 0;
1490 if (c == 0)
1491 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1492 switch (op) {
1493 case Py_LT: c = c < 0; break;
1494 case Py_LE: c = c <= 0; break;
1495 case Py_GT: c = c > 0; break;
1496 case Py_GE: c = c >= 0; break;
1497 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001498 PyErr_BadArgument();
1499 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001500 }
1501 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001502 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001503
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001504 Py_INCREF(result);
1505 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001506}
1507
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001508static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001509bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001510{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001511 if (a->ob_shash == -1) {
1512 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001513 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001514 }
1515 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001516}
1517
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001518static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001519bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001520{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001521 if (PyIndex_Check(item)) {
1522 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1523 if (i == -1 && PyErr_Occurred())
1524 return NULL;
1525 if (i < 0)
1526 i += PyBytes_GET_SIZE(self);
1527 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1528 PyErr_SetString(PyExc_IndexError,
1529 "index out of range");
1530 return NULL;
1531 }
1532 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1533 }
1534 else if (PySlice_Check(item)) {
1535 Py_ssize_t start, stop, step, slicelength, cur, i;
1536 char* source_buf;
1537 char* result_buf;
1538 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001539
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001540 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001541 PyBytes_GET_SIZE(self),
1542 &start, &stop, &step, &slicelength) < 0) {
1543 return NULL;
1544 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001545
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001546 if (slicelength <= 0) {
1547 return PyBytes_FromStringAndSize("", 0);
1548 }
1549 else if (start == 0 && step == 1 &&
1550 slicelength == PyBytes_GET_SIZE(self) &&
1551 PyBytes_CheckExact(self)) {
1552 Py_INCREF(self);
1553 return (PyObject *)self;
1554 }
1555 else if (step == 1) {
1556 return PyBytes_FromStringAndSize(
1557 PyBytes_AS_STRING(self) + start,
1558 slicelength);
1559 }
1560 else {
1561 source_buf = PyBytes_AS_STRING(self);
1562 result = PyBytes_FromStringAndSize(NULL, slicelength);
1563 if (result == NULL)
1564 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001565
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001566 result_buf = PyBytes_AS_STRING(result);
1567 for (cur = start, i = 0; i < slicelength;
1568 cur += step, i++) {
1569 result_buf[i] = source_buf[cur];
1570 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001571
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001572 return result;
1573 }
1574 }
1575 else {
1576 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001577 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001578 Py_TYPE(item)->tp_name);
1579 return NULL;
1580 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001581}
1582
1583static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001584bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001585{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001586 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1587 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001588}
1589
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001590static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001591 (lenfunc)bytes_length, /*sq_length*/
1592 (binaryfunc)bytes_concat, /*sq_concat*/
1593 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1594 (ssizeargfunc)bytes_item, /*sq_item*/
1595 0, /*sq_slice*/
1596 0, /*sq_ass_item*/
1597 0, /*sq_ass_slice*/
1598 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001599};
1600
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001601static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001602 (lenfunc)bytes_length,
1603 (binaryfunc)bytes_subscript,
1604 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001605};
1606
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001607static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001608 (getbufferproc)bytes_buffer_getbuffer,
1609 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001610};
1611
1612
1613#define LEFTSTRIP 0
1614#define RIGHTSTRIP 1
1615#define BOTHSTRIP 2
1616
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001617/*[clinic input]
1618bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001619
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001620 sep: object = None
1621 The delimiter according which to split the bytes.
1622 None (the default value) means split on ASCII whitespace characters
1623 (space, tab, return, newline, formfeed, vertical tab).
1624 maxsplit: Py_ssize_t = -1
1625 Maximum number of splits to do.
1626 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001627
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001628Return a list of the sections in the bytes, using sep as the delimiter.
1629[clinic start generated code]*/
1630
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001631static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001632bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001633/*[clinic end generated code: output=8bde44dacb36ef2e input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001634{
1635 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001636 const char *s = PyBytes_AS_STRING(self), *sub;
1637 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001638 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001639
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001640 if (maxsplit < 0)
1641 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001642 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001644 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001645 return NULL;
1646 sub = vsub.buf;
1647 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001648
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001649 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1650 PyBuffer_Release(&vsub);
1651 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001652}
1653
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001654/*[clinic input]
1655bytes.partition
1656
1657 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001658 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001659 /
1660
1661Partition the bytes into three parts using the given separator.
1662
1663This will search for the separator sep in the bytes. If the separator is found,
1664returns a 3-tuple containing the part before the separator, the separator
1665itself, and the part after it.
1666
1667If the separator is not found, returns a 3-tuple containing the original bytes
1668object and two empty bytes objects.
1669[clinic start generated code]*/
1670
Neal Norwitz6968b052007-02-27 19:02:19 +00001671static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001672bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001673/*[clinic end generated code: output=f532b392a17ff695 input=bc855dc63ca949de]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001674{
Neal Norwitz6968b052007-02-27 19:02:19 +00001675 return stringlib_partition(
1676 (PyObject*) self,
1677 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001678 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001679 );
1680}
1681
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001682/*[clinic input]
1683bytes.rpartition
1684
1685 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001686 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001687 /
1688
1689Partition the bytes into three parts using the given separator.
1690
1691This will search for the separator sep in the bytes, starting and the end. If
1692the separator is found, returns a 3-tuple containing the part before the
1693separator, the separator itself, and the part after it.
1694
1695If the separator is not found, returns a 3-tuple containing two empty bytes
1696objects and the original bytes object.
1697[clinic start generated code]*/
1698
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001699static PyObject *
1700bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001701/*[clinic end generated code: output=191b114cbb028e50 input=6588fff262a9170e]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001702{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001703 return stringlib_rpartition(
1704 (PyObject*) self,
1705 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001706 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001707 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001708}
1709
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001710/*[clinic input]
1711bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001712
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001713Return a list of the sections in the bytes, using sep as the delimiter.
1714
1715Splitting is done starting at the end of the bytes and working to the front.
1716[clinic start generated code]*/
1717
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001718static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001719bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001720/*[clinic end generated code: output=0b6570b977911d88 input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001721{
1722 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001723 const char *s = PyBytes_AS_STRING(self), *sub;
1724 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001725 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001726
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001727 if (maxsplit < 0)
1728 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001729 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001730 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001731 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001732 return NULL;
1733 sub = vsub.buf;
1734 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001735
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001736 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1737 PyBuffer_Release(&vsub);
1738 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001739}
1740
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001741
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001742/*[clinic input]
1743bytes.join
1744
1745 iterable_of_bytes: object
1746 /
1747
1748Concatenate any number of bytes objects.
1749
1750The bytes whose method is called is inserted in between each pair.
1751
1752The result is returned as a new bytes object.
1753
1754Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1755[clinic start generated code]*/
1756
Neal Norwitz6968b052007-02-27 19:02:19 +00001757static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001758bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001759/*[clinic end generated code: output=634aff14764ff997 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001760{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001761 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001762}
1763
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001764PyObject *
1765_PyBytes_Join(PyObject *sep, PyObject *x)
1766{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001767 assert(sep != NULL && PyBytes_Check(sep));
1768 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001769 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001770}
1771
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001772/* helper macro to fixup start/end slice values */
1773#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001774 if (end > len) \
1775 end = len; \
1776 else if (end < 0) { \
1777 end += len; \
1778 if (end < 0) \
1779 end = 0; \
1780 } \
1781 if (start < 0) { \
1782 start += len; \
1783 if (start < 0) \
1784 start = 0; \
1785 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001786
1787Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001788bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001789{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001790 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001791 char byte;
1792 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001793 const char *sub;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001794 Py_ssize_t len, sub_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001795 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001796 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001797
Antoine Pitrouac65d962011-10-20 23:54:17 +02001798 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1799 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001800 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001801
Antoine Pitrouac65d962011-10-20 23:54:17 +02001802 if (subobj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001803 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001804 return -2;
1805
1806 sub = subbuf.buf;
1807 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001808 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001809 else {
1810 sub = &byte;
1811 sub_len = 1;
1812 }
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001813 len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001814
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001815 ADJUST_INDICES(start, end, len);
1816 if (end - start < sub_len)
1817 res = -1;
Victor Stinnerdabbfe72015-03-25 03:16:32 +01001818 /* Issue #23573: FIXME, windows has no memrchr() */
1819 else if (sub_len == 1 && dir > 0) {
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001820 unsigned char needle = *sub;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001821 res = stringlib_fastsearch_memchr_1char(
1822 PyBytes_AS_STRING(self) + start, end - start,
Christian Heimes4e259132015-04-18 05:54:02 +02001823 needle, needle, FAST_SEARCH);
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001824 if (res >= 0)
1825 res += start;
1826 }
1827 else {
1828 if (dir > 0)
1829 res = stringlib_find_slice(
1830 PyBytes_AS_STRING(self), len,
1831 sub, sub_len, start, end);
1832 else
1833 res = stringlib_rfind_slice(
1834 PyBytes_AS_STRING(self), len,
1835 sub, sub_len, start, end);
1836 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001837
1838 if (subobj)
1839 PyBuffer_Release(&subbuf);
1840
1841 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001842}
1843
1844
1845PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001846"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001847\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001848Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001849such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001850arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001851\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001852Return -1 on failure.");
1853
Neal Norwitz6968b052007-02-27 19:02:19 +00001854static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001855bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001856{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001857 Py_ssize_t result = bytes_find_internal(self, args, +1);
1858 if (result == -2)
1859 return NULL;
1860 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001861}
1862
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001863
1864PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001865"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001866\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001867Like B.find() but raise ValueError when the substring is not found.");
1868
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001869static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001870bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001871{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001872 Py_ssize_t result = bytes_find_internal(self, args, +1);
1873 if (result == -2)
1874 return NULL;
1875 if (result == -1) {
1876 PyErr_SetString(PyExc_ValueError,
1877 "substring not found");
1878 return NULL;
1879 }
1880 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001881}
1882
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001883
1884PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001885"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001886\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001887Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001888such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001889arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001890\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001891Return -1 on failure.");
1892
Neal Norwitz6968b052007-02-27 19:02:19 +00001893static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001894bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001895{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001896 Py_ssize_t result = bytes_find_internal(self, args, -1);
1897 if (result == -2)
1898 return NULL;
1899 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001900}
1901
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001902
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001903PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001904"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001905\n\
1906Like B.rfind() but raise ValueError when the substring is not found.");
1907
1908static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001909bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001910{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001911 Py_ssize_t result = bytes_find_internal(self, args, -1);
1912 if (result == -2)
1913 return NULL;
1914 if (result == -1) {
1915 PyErr_SetString(PyExc_ValueError,
1916 "substring not found");
1917 return NULL;
1918 }
1919 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001920}
1921
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001922
1923Py_LOCAL_INLINE(PyObject *)
1924do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001925{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001926 Py_buffer vsep;
1927 char *s = PyBytes_AS_STRING(self);
1928 Py_ssize_t len = PyBytes_GET_SIZE(self);
1929 char *sep;
1930 Py_ssize_t seplen;
1931 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001932
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001933 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001934 return NULL;
1935 sep = vsep.buf;
1936 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001937
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001938 i = 0;
1939 if (striptype != RIGHTSTRIP) {
1940 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1941 i++;
1942 }
1943 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001944
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001945 j = len;
1946 if (striptype != LEFTSTRIP) {
1947 do {
1948 j--;
1949 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1950 j++;
1951 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001952
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001953 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001954
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001955 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1956 Py_INCREF(self);
1957 return (PyObject*)self;
1958 }
1959 else
1960 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001961}
1962
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001963
1964Py_LOCAL_INLINE(PyObject *)
1965do_strip(PyBytesObject *self, int striptype)
1966{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001967 char *s = PyBytes_AS_STRING(self);
1968 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001969
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001970 i = 0;
1971 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001972 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001973 i++;
1974 }
1975 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001976
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001977 j = len;
1978 if (striptype != LEFTSTRIP) {
1979 do {
1980 j--;
David Malcolm96960882010-11-05 17:23:41 +00001981 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001982 j++;
1983 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001984
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001985 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1986 Py_INCREF(self);
1987 return (PyObject*)self;
1988 }
1989 else
1990 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001991}
1992
1993
1994Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001995do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001996{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001997 if (bytes != NULL && bytes != Py_None) {
1998 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001999 }
2000 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002001}
2002
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002003/*[clinic input]
2004bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002005
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002006 self: self(type="PyBytesObject *")
2007 bytes: object = None
2008 /
2009
2010Strip leading and trailing bytes contained in the argument.
2011
2012If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2013[clinic start generated code]*/
2014
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002015static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002016bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002017/*[clinic end generated code: output=c7c228d3bd104a1b input=37daa5fad1395d95]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002018{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002019 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002020}
2021
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002022/*[clinic input]
2023bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002024
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002025 self: self(type="PyBytesObject *")
2026 bytes: object = None
2027 /
2028
2029Strip leading bytes contained in the argument.
2030
2031If the argument is omitted or None, strip leading ASCII whitespace.
2032[clinic start generated code]*/
2033
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002034static PyObject *
2035bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002036/*[clinic end generated code: output=28602e586f524e82 input=88811b09dfbc2988]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002037{
2038 return do_argstrip(self, LEFTSTRIP, bytes);
2039}
2040
2041/*[clinic input]
2042bytes.rstrip
2043
2044 self: self(type="PyBytesObject *")
2045 bytes: object = None
2046 /
2047
2048Strip trailing bytes contained in the argument.
2049
2050If the argument is omitted or None, strip trailing ASCII whitespace.
2051[clinic start generated code]*/
2052
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002053static PyObject *
2054bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002055/*[clinic end generated code: output=547e3815c95447da input=8f93c9cd361f0140]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002056{
2057 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002058}
Neal Norwitz6968b052007-02-27 19:02:19 +00002059
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002060
2061PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002062"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002063\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002064Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002065string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002066as in slice notation.");
2067
2068static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002069bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002070{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002071 PyObject *sub_obj;
2072 const char *str = PyBytes_AS_STRING(self), *sub;
2073 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02002074 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002075 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002076
Antoine Pitrouac65d962011-10-20 23:54:17 +02002077 Py_buffer vsub;
2078 PyObject *count_obj;
2079
2080 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
2081 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002082 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002083
Antoine Pitrouac65d962011-10-20 23:54:17 +02002084 if (sub_obj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002085 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02002086 return NULL;
2087
2088 sub = vsub.buf;
2089 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002090 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02002091 else {
2092 sub = &byte;
2093 sub_len = 1;
2094 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002095
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002096 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002097
Antoine Pitrouac65d962011-10-20 23:54:17 +02002098 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002099 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2100 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02002101
2102 if (sub_obj)
2103 PyBuffer_Release(&vsub);
2104
2105 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002106}
2107
2108
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002109/*[clinic input]
2110bytes.translate
2111
2112 self: self(type="PyBytesObject *")
Victor Stinner049e5092014-08-17 22:20:00 +02002113 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002114 Translation table, which must be a bytes object of length 256.
2115 [
2116 deletechars: object
2117 ]
2118 /
2119
2120Return a copy with each character mapped by the given translation table.
2121
2122All characters occurring in the optional argument deletechars are removed.
2123The remaining characters are mapped through the given translation table.
2124[clinic start generated code]*/
2125
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002126static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002127bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1,
2128 PyObject *deletechars)
2129/*[clinic end generated code: output=233df850eb50bf8d input=d8fa5519d7cc4be7]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002130{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002131 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002132 Py_buffer table_view = {NULL, NULL};
2133 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002134 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002135 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002136 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002137 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002138 Py_ssize_t inlen, tablen, dellen = 0;
2139 PyObject *result;
2140 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002141
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002142 if (PyBytes_Check(table)) {
2143 table_chars = PyBytes_AS_STRING(table);
2144 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002145 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002146 else if (table == Py_None) {
2147 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002148 tablen = 256;
2149 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002150 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002151 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002152 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002153 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002154 tablen = table_view.len;
2155 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002156
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002157 if (tablen != 256) {
2158 PyErr_SetString(PyExc_ValueError,
2159 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002160 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002161 return NULL;
2162 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002163
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002164 if (deletechars != NULL) {
2165 if (PyBytes_Check(deletechars)) {
2166 del_table_chars = PyBytes_AS_STRING(deletechars);
2167 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002168 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002169 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002170 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002171 PyBuffer_Release(&table_view);
2172 return NULL;
2173 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002174 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002175 dellen = del_table_view.len;
2176 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002177 }
2178 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002179 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002180 dellen = 0;
2181 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002182
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002183 inlen = PyBytes_GET_SIZE(input_obj);
2184 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002185 if (result == NULL) {
2186 PyBuffer_Release(&del_table_view);
2187 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002188 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002189 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002190 output_start = output = PyBytes_AsString(result);
2191 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002192
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002193 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002194 /* If no deletions are required, use faster code */
2195 for (i = inlen; --i >= 0; ) {
2196 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002197 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002198 changed = 1;
2199 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002200 if (!changed && PyBytes_CheckExact(input_obj)) {
2201 Py_INCREF(input_obj);
2202 Py_DECREF(result);
2203 result = input_obj;
2204 }
2205 PyBuffer_Release(&del_table_view);
2206 PyBuffer_Release(&table_view);
2207 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002208 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002209
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002210 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002211 for (i = 0; i < 256; i++)
2212 trans_table[i] = Py_CHARMASK(i);
2213 } else {
2214 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002215 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002216 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002217 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002218
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002219 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002220 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002221 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002222
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002223 for (i = inlen; --i >= 0; ) {
2224 c = Py_CHARMASK(*input++);
2225 if (trans_table[c] != -1)
2226 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2227 continue;
2228 changed = 1;
2229 }
2230 if (!changed && PyBytes_CheckExact(input_obj)) {
2231 Py_DECREF(result);
2232 Py_INCREF(input_obj);
2233 return input_obj;
2234 }
2235 /* Fix the size of the resulting string */
2236 if (inlen > 0)
2237 _PyBytes_Resize(&result, output - output_start);
2238 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002239}
2240
2241
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002242/*[clinic input]
2243
2244@staticmethod
2245bytes.maketrans
2246
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002247 frm: Py_buffer
2248 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002249 /
2250
2251Return a translation table useable for the bytes or bytearray translate method.
2252
2253The returned table will be one where each byte in frm is mapped to the byte at
2254the same position in to.
2255
2256The bytes objects frm and to must be of the same length.
2257[clinic start generated code]*/
2258
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002259static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002260bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002261/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002262{
2263 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002264}
2265
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002266/* find and count characters and substrings */
2267
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002268#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002269 ((char *)memchr((const void *)(target), c, target_len))
2270
2271/* String ops must return a string. */
2272/* If the object is subclass of string, create a copy */
2273Py_LOCAL(PyBytesObject *)
2274return_self(PyBytesObject *self)
2275{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002276 if (PyBytes_CheckExact(self)) {
2277 Py_INCREF(self);
2278 return self;
2279 }
2280 return (PyBytesObject *)PyBytes_FromStringAndSize(
2281 PyBytes_AS_STRING(self),
2282 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002283}
2284
2285Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00002286countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002287{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002288 Py_ssize_t count=0;
2289 const char *start=target;
2290 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002291
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002292 while ( (start=findchar(start, end-start, c)) != NULL ) {
2293 count++;
2294 if (count >= maxcount)
2295 break;
2296 start += 1;
2297 }
2298 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002299}
2300
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002301
2302/* Algorithms for different cases of string replacement */
2303
2304/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2305Py_LOCAL(PyBytesObject *)
2306replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002307 const char *to_s, Py_ssize_t to_len,
2308 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002309{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002310 char *self_s, *result_s;
2311 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002312 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002313 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002314
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002315 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002316
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002317 /* 1 at the end plus 1 after every character;
2318 count = min(maxcount, self_len + 1) */
2319 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002320 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002321 else
2322 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2323 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002324
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002325 /* Check for overflow */
2326 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002327 assert(count > 0);
2328 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002329 PyErr_SetString(PyExc_OverflowError,
2330 "replacement bytes are too long");
2331 return NULL;
2332 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002333 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002334
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002335 if (! (result = (PyBytesObject *)
2336 PyBytes_FromStringAndSize(NULL, result_len)) )
2337 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002338
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002339 self_s = PyBytes_AS_STRING(self);
2340 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002341
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002342 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002343
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002344 /* Lay the first one down (guaranteed this will occur) */
2345 Py_MEMCPY(result_s, to_s, to_len);
2346 result_s += to_len;
2347 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002348
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002349 for (i=0; i<count; i++) {
2350 *result_s++ = *self_s++;
2351 Py_MEMCPY(result_s, to_s, to_len);
2352 result_s += to_len;
2353 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002354
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002355 /* Copy the rest of the original string */
2356 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002357
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002358 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002359}
2360
2361/* Special case for deleting a single character */
2362/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2363Py_LOCAL(PyBytesObject *)
2364replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002365 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002366{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002367 char *self_s, *result_s;
2368 char *start, *next, *end;
2369 Py_ssize_t self_len, result_len;
2370 Py_ssize_t count;
2371 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002372
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002373 self_len = PyBytes_GET_SIZE(self);
2374 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002375
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002376 count = countchar(self_s, self_len, from_c, maxcount);
2377 if (count == 0) {
2378 return return_self(self);
2379 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002380
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002381 result_len = self_len - count; /* from_len == 1 */
2382 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002383
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002384 if ( (result = (PyBytesObject *)
2385 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2386 return NULL;
2387 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002388
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002389 start = self_s;
2390 end = self_s + self_len;
2391 while (count-- > 0) {
2392 next = findchar(start, end-start, from_c);
2393 if (next == NULL)
2394 break;
2395 Py_MEMCPY(result_s, start, next-start);
2396 result_s += (next-start);
2397 start = next+1;
2398 }
2399 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002400
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002401 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002402}
2403
2404/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2405
2406Py_LOCAL(PyBytesObject *)
2407replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002408 const char *from_s, Py_ssize_t from_len,
2409 Py_ssize_t maxcount) {
2410 char *self_s, *result_s;
2411 char *start, *next, *end;
2412 Py_ssize_t self_len, result_len;
2413 Py_ssize_t count, offset;
2414 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002415
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002416 self_len = PyBytes_GET_SIZE(self);
2417 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002418
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002419 count = stringlib_count(self_s, self_len,
2420 from_s, from_len,
2421 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002422
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002423 if (count == 0) {
2424 /* no matches */
2425 return return_self(self);
2426 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002427
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002428 result_len = self_len - (count * from_len);
2429 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002430
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002431 if ( (result = (PyBytesObject *)
2432 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2433 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002434
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002435 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002436
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002437 start = self_s;
2438 end = self_s + self_len;
2439 while (count-- > 0) {
2440 offset = stringlib_find(start, end-start,
2441 from_s, from_len,
2442 0);
2443 if (offset == -1)
2444 break;
2445 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002446
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002447 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002448
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002449 result_s += (next-start);
2450 start = next+from_len;
2451 }
2452 Py_MEMCPY(result_s, start, end-start);
2453 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002454}
2455
2456/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2457Py_LOCAL(PyBytesObject *)
2458replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002459 char from_c, char to_c,
2460 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002461{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002462 char *self_s, *result_s, *start, *end, *next;
2463 Py_ssize_t self_len;
2464 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002465
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002466 /* The result string will be the same size */
2467 self_s = PyBytes_AS_STRING(self);
2468 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002469
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002470 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002471
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002472 if (next == NULL) {
2473 /* No matches; return the original string */
2474 return return_self(self);
2475 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002476
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002477 /* Need to make a new string */
2478 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2479 if (result == NULL)
2480 return NULL;
2481 result_s = PyBytes_AS_STRING(result);
2482 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002483
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002484 /* change everything in-place, starting with this one */
2485 start = result_s + (next-self_s);
2486 *start = to_c;
2487 start++;
2488 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002489
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002490 while (--maxcount > 0) {
2491 next = findchar(start, end-start, from_c);
2492 if (next == NULL)
2493 break;
2494 *next = to_c;
2495 start = next+1;
2496 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002497
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002498 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002499}
2500
2501/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2502Py_LOCAL(PyBytesObject *)
2503replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002504 const char *from_s, Py_ssize_t from_len,
2505 const char *to_s, Py_ssize_t to_len,
2506 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002507{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002508 char *result_s, *start, *end;
2509 char *self_s;
2510 Py_ssize_t self_len, offset;
2511 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002512
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002513 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002514
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002515 self_s = PyBytes_AS_STRING(self);
2516 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002517
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002518 offset = stringlib_find(self_s, self_len,
2519 from_s, from_len,
2520 0);
2521 if (offset == -1) {
2522 /* No matches; return the original string */
2523 return return_self(self);
2524 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002525
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002526 /* Need to make a new string */
2527 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2528 if (result == NULL)
2529 return NULL;
2530 result_s = PyBytes_AS_STRING(result);
2531 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002532
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002533 /* change everything in-place, starting with this one */
2534 start = result_s + offset;
2535 Py_MEMCPY(start, to_s, from_len);
2536 start += from_len;
2537 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002538
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002539 while ( --maxcount > 0) {
2540 offset = stringlib_find(start, end-start,
2541 from_s, from_len,
2542 0);
2543 if (offset==-1)
2544 break;
2545 Py_MEMCPY(start+offset, to_s, from_len);
2546 start += offset+from_len;
2547 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002548
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002549 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002550}
2551
2552/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2553Py_LOCAL(PyBytesObject *)
2554replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002555 char from_c,
2556 const char *to_s, Py_ssize_t to_len,
2557 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002558{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002559 char *self_s, *result_s;
2560 char *start, *next, *end;
2561 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002562 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002563 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002564
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002565 self_s = PyBytes_AS_STRING(self);
2566 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002567
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002568 count = countchar(self_s, self_len, from_c, maxcount);
2569 if (count == 0) {
2570 /* no matches, return unchanged */
2571 return return_self(self);
2572 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002573
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002574 /* use the difference between current and new, hence the "-1" */
2575 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002576 assert(count > 0);
2577 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002578 PyErr_SetString(PyExc_OverflowError,
2579 "replacement bytes are too long");
2580 return NULL;
2581 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002582 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002583
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002584 if ( (result = (PyBytesObject *)
2585 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2586 return NULL;
2587 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002588
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002589 start = self_s;
2590 end = self_s + self_len;
2591 while (count-- > 0) {
2592 next = findchar(start, end-start, from_c);
2593 if (next == NULL)
2594 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002595
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002596 if (next == start) {
2597 /* replace with the 'to' */
2598 Py_MEMCPY(result_s, to_s, to_len);
2599 result_s += to_len;
2600 start += 1;
2601 } else {
2602 /* copy the unchanged old then the 'to' */
2603 Py_MEMCPY(result_s, start, next-start);
2604 result_s += (next-start);
2605 Py_MEMCPY(result_s, to_s, to_len);
2606 result_s += to_len;
2607 start = next+1;
2608 }
2609 }
2610 /* Copy the remainder of the remaining string */
2611 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002612
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002613 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002614}
2615
2616/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2617Py_LOCAL(PyBytesObject *)
2618replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002619 const char *from_s, Py_ssize_t from_len,
2620 const char *to_s, Py_ssize_t to_len,
2621 Py_ssize_t maxcount) {
2622 char *self_s, *result_s;
2623 char *start, *next, *end;
2624 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002625 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002626 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002627
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002628 self_s = PyBytes_AS_STRING(self);
2629 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002630
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002631 count = stringlib_count(self_s, self_len,
2632 from_s, from_len,
2633 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002634
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002635 if (count == 0) {
2636 /* no matches, return unchanged */
2637 return return_self(self);
2638 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002639
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002640 /* Check for overflow */
2641 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002642 assert(count > 0);
2643 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002644 PyErr_SetString(PyExc_OverflowError,
2645 "replacement bytes are too long");
2646 return NULL;
2647 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002648 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002649
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002650 if ( (result = (PyBytesObject *)
2651 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2652 return NULL;
2653 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002654
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002655 start = self_s;
2656 end = self_s + self_len;
2657 while (count-- > 0) {
2658 offset = stringlib_find(start, end-start,
2659 from_s, from_len,
2660 0);
2661 if (offset == -1)
2662 break;
2663 next = start+offset;
2664 if (next == start) {
2665 /* replace with the 'to' */
2666 Py_MEMCPY(result_s, to_s, to_len);
2667 result_s += to_len;
2668 start += from_len;
2669 } else {
2670 /* copy the unchanged old then the 'to' */
2671 Py_MEMCPY(result_s, start, next-start);
2672 result_s += (next-start);
2673 Py_MEMCPY(result_s, to_s, to_len);
2674 result_s += to_len;
2675 start = next+from_len;
2676 }
2677 }
2678 /* Copy the remainder of the remaining string */
2679 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002680
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002681 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002682}
2683
2684
2685Py_LOCAL(PyBytesObject *)
2686replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002687 const char *from_s, Py_ssize_t from_len,
2688 const char *to_s, Py_ssize_t to_len,
2689 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002690{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002691 if (maxcount < 0) {
2692 maxcount = PY_SSIZE_T_MAX;
2693 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2694 /* nothing to do; return the original string */
2695 return return_self(self);
2696 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002697
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002698 if (maxcount == 0 ||
2699 (from_len == 0 && to_len == 0)) {
2700 /* nothing to do; return the original string */
2701 return return_self(self);
2702 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002703
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002704 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002705
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002706 if (from_len == 0) {
2707 /* insert the 'to' string everywhere. */
2708 /* >>> "Python".replace("", ".") */
2709 /* '.P.y.t.h.o.n.' */
2710 return replace_interleave(self, to_s, to_len, maxcount);
2711 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002712
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002713 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2714 /* point for an empty self string to generate a non-empty string */
2715 /* Special case so the remaining code always gets a non-empty string */
2716 if (PyBytes_GET_SIZE(self) == 0) {
2717 return return_self(self);
2718 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002719
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002720 if (to_len == 0) {
2721 /* delete all occurrences of 'from' string */
2722 if (from_len == 1) {
2723 return replace_delete_single_character(
2724 self, from_s[0], maxcount);
2725 } else {
2726 return replace_delete_substring(self, from_s,
2727 from_len, maxcount);
2728 }
2729 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002730
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002731 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002732
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002733 if (from_len == to_len) {
2734 if (from_len == 1) {
2735 return replace_single_character_in_place(
2736 self,
2737 from_s[0],
2738 to_s[0],
2739 maxcount);
2740 } else {
2741 return replace_substring_in_place(
2742 self, from_s, from_len, to_s, to_len,
2743 maxcount);
2744 }
2745 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002746
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002747 /* Otherwise use the more generic algorithms */
2748 if (from_len == 1) {
2749 return replace_single_character(self, from_s[0],
2750 to_s, to_len, maxcount);
2751 } else {
2752 /* len('from')>=2, len('to')>=1 */
2753 return replace_substring(self, from_s, from_len, to_s, to_len,
2754 maxcount);
2755 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002756}
2757
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002758
2759/*[clinic input]
2760bytes.replace
2761
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002762 old: Py_buffer
2763 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002764 count: Py_ssize_t = -1
2765 Maximum number of occurrences to replace.
2766 -1 (the default value) means replace all occurrences.
2767 /
2768
2769Return a copy with all occurrences of substring old replaced by new.
2770
2771If the optional argument count is given, only the first count occurrences are
2772replaced.
2773[clinic start generated code]*/
2774
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002775static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002776bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new,
2777 Py_ssize_t count)
2778/*[clinic end generated code: output=403dc9d7a83c5a1d input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002779{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002780 return (PyObject *)replace((PyBytesObject *) self,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002781 (const char *)old->buf, old->len,
2782 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002783}
2784
2785/** End DALKE **/
2786
2787/* Matches the end (direction >= 0) or start (direction < 0) of self
2788 * against substr, using the start and end arguments. Returns
2789 * -1 on error, 0 if not found and 1 if found.
2790 */
2791Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002792_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002793 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002794{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002795 Py_ssize_t len = PyBytes_GET_SIZE(self);
2796 Py_ssize_t slen;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002797 Py_buffer sub_view = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002798 const char* sub;
2799 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002800
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002801 if (PyBytes_Check(substr)) {
2802 sub = PyBytes_AS_STRING(substr);
2803 slen = PyBytes_GET_SIZE(substr);
2804 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002805 else {
2806 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
2807 return -1;
2808 sub = sub_view.buf;
2809 slen = sub_view.len;
2810 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002811 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002812
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002813 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002814
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002815 if (direction < 0) {
2816 /* startswith */
2817 if (start+slen > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002818 goto notfound;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002819 } else {
2820 /* endswith */
2821 if (end-start < slen || start > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002822 goto notfound;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002823
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002824 if (end-slen > start)
2825 start = end - slen;
2826 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002827 if (end-start < slen)
2828 goto notfound;
2829 if (memcmp(str+start, sub, slen) != 0)
2830 goto notfound;
2831
2832 PyBuffer_Release(&sub_view);
2833 return 1;
2834
2835notfound:
2836 PyBuffer_Release(&sub_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002837 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002838}
2839
2840
2841PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002842"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002843\n\
2844Return True if B starts with the specified prefix, False otherwise.\n\
2845With optional start, test B beginning at that position.\n\
2846With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002847prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002848
2849static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002850bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002851{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002852 Py_ssize_t start = 0;
2853 Py_ssize_t end = PY_SSIZE_T_MAX;
2854 PyObject *subobj;
2855 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002856
Jesus Ceaac451502011-04-20 17:09:23 +02002857 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002858 return NULL;
2859 if (PyTuple_Check(subobj)) {
2860 Py_ssize_t i;
2861 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2862 result = _bytes_tailmatch(self,
2863 PyTuple_GET_ITEM(subobj, i),
2864 start, end, -1);
2865 if (result == -1)
2866 return NULL;
2867 else if (result) {
2868 Py_RETURN_TRUE;
2869 }
2870 }
2871 Py_RETURN_FALSE;
2872 }
2873 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002874 if (result == -1) {
2875 if (PyErr_ExceptionMatches(PyExc_TypeError))
2876 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2877 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002878 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002879 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002880 else
2881 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002882}
2883
2884
2885PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002886"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002887\n\
2888Return True if B ends with the specified suffix, False otherwise.\n\
2889With optional start, test B beginning at that position.\n\
2890With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002891suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002892
2893static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002894bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002895{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002896 Py_ssize_t start = 0;
2897 Py_ssize_t end = PY_SSIZE_T_MAX;
2898 PyObject *subobj;
2899 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002900
Jesus Ceaac451502011-04-20 17:09:23 +02002901 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002902 return NULL;
2903 if (PyTuple_Check(subobj)) {
2904 Py_ssize_t i;
2905 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2906 result = _bytes_tailmatch(self,
2907 PyTuple_GET_ITEM(subobj, i),
2908 start, end, +1);
2909 if (result == -1)
2910 return NULL;
2911 else if (result) {
2912 Py_RETURN_TRUE;
2913 }
2914 }
2915 Py_RETURN_FALSE;
2916 }
2917 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002918 if (result == -1) {
2919 if (PyErr_ExceptionMatches(PyExc_TypeError))
2920 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2921 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002922 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002923 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002924 else
2925 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002926}
2927
2928
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002929/*[clinic input]
2930bytes.decode
2931
2932 encoding: str(c_default="NULL") = 'utf-8'
2933 The encoding with which to decode the bytes.
2934 errors: str(c_default="NULL") = 'strict'
2935 The error handling scheme to use for the handling of decoding errors.
2936 The default is 'strict' meaning that decoding errors raise a
2937 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2938 as well as any other name registered with codecs.register_error that
2939 can handle UnicodeDecodeErrors.
2940
2941Decode the bytes using the codec registered for encoding.
2942[clinic start generated code]*/
2943
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002944static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002945bytes_decode_impl(PyBytesObject*self, const char *encoding,
2946 const char *errors)
2947/*[clinic end generated code: output=2d2016ff8e0bb176 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002948{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002949 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002950}
2951
Guido van Rossum20188312006-05-05 15:15:40 +00002952
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002953/*[clinic input]
2954bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002955
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03002956 keepends: int(c_default="0") = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002957
2958Return a list of the lines in the bytes, breaking at line boundaries.
2959
2960Line breaks are not included in the resulting list unless keepends is given and
2961true.
2962[clinic start generated code]*/
2963
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002964static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002965bytes_splitlines_impl(PyBytesObject*self, int keepends)
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03002966/*[clinic end generated code: output=995c3598f7833cad input=7f4aac67144f9944]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002967{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002968 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002969 (PyObject*) self, PyBytes_AS_STRING(self),
2970 PyBytes_GET_SIZE(self), keepends
2971 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002972}
2973
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002974static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002975hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002976{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002977 if (c >= 128)
2978 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002979 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002980 return c - '0';
2981 else {
David Malcolm96960882010-11-05 17:23:41 +00002982 if (Py_ISUPPER(c))
2983 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002984 if (c >= 'a' && c <= 'f')
2985 return c - 'a' + 10;
2986 }
2987 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002988}
2989
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002990/*[clinic input]
2991@classmethod
2992bytes.fromhex
2993
2994 string: unicode
2995 /
2996
2997Create a bytes object from a string of hexadecimal numbers.
2998
2999Spaces between two numbers are accepted.
3000Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
3001[clinic start generated code]*/
3002
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003003static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003004bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03003005/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003006{
3007 PyObject *newstring;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003008 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003009 Py_ssize_t hexlen, byteslen, i, j;
3010 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003011 void *data;
3012 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003013
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003014 assert(PyUnicode_Check(string));
3015 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003016 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003017 kind = PyUnicode_KIND(string);
3018 data = PyUnicode_DATA(string);
3019 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003020
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003021 byteslen = hexlen/2; /* This overestimates if there are spaces */
3022 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
3023 if (!newstring)
3024 return NULL;
3025 buf = PyBytes_AS_STRING(newstring);
3026 for (i = j = 0; i < hexlen; i += 2) {
3027 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003028 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003029 i++;
3030 if (i >= hexlen)
3031 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003032 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
3033 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003034 if (top == -1 || bot == -1) {
3035 PyErr_Format(PyExc_ValueError,
3036 "non-hexadecimal number found in "
3037 "fromhex() arg at position %zd", i);
3038 goto error;
3039 }
3040 buf[j++] = (top << 4) + bot;
3041 }
3042 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
3043 goto error;
3044 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003045
3046 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003047 Py_XDECREF(newstring);
3048 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003049}
3050
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003051PyDoc_STRVAR(hex__doc__,
3052"B.hex() -> string\n\
3053\n\
3054Create a string of hexadecimal numbers from a bytes object.\n\
3055Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
3056
3057static PyObject *
3058bytes_hex(PyBytesObject *self)
3059{
3060 char* argbuf = PyBytes_AS_STRING(self);
3061 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
3062 return _Py_strhex(argbuf, arglen);
3063}
3064
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003065static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003066bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003067{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003068 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003069}
3070
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003071
3072static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003073bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003074 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
3075 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3076 _Py_capitalize__doc__},
3077 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3078 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003079 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003080 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
3081 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02003082 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003083 expandtabs__doc__},
3084 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003085 BYTES_FROMHEX_METHODDEF
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003086 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003087 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3088 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3089 _Py_isalnum__doc__},
3090 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3091 _Py_isalpha__doc__},
3092 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3093 _Py_isdigit__doc__},
3094 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3095 _Py_islower__doc__},
3096 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3097 _Py_isspace__doc__},
3098 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3099 _Py_istitle__doc__},
3100 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3101 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003102 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003103 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3104 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003105 BYTES_LSTRIP_METHODDEF
3106 BYTES_MAKETRANS_METHODDEF
3107 BYTES_PARTITION_METHODDEF
3108 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003109 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3110 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3111 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003112 BYTES_RPARTITION_METHODDEF
3113 BYTES_RSPLIT_METHODDEF
3114 BYTES_RSTRIP_METHODDEF
3115 BYTES_SPLIT_METHODDEF
3116 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003117 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
3118 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003119 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003120 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3121 _Py_swapcase__doc__},
3122 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003123 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003124 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3125 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003126 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003127};
3128
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003129static PyObject *
Ethan Furmanb95b5612015-01-23 20:05:18 -08003130bytes_mod(PyObject *v, PyObject *w)
3131{
3132 if (!PyBytes_Check(v))
3133 Py_RETURN_NOTIMPLEMENTED;
3134 return _PyBytes_Format(v, w);
3135}
3136
3137static PyNumberMethods bytes_as_number = {
3138 0, /*nb_add*/
3139 0, /*nb_subtract*/
3140 0, /*nb_multiply*/
3141 bytes_mod, /*nb_remainder*/
3142};
3143
3144static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003145str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3146
3147static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003148bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003149{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003150 PyObject *x = NULL;
3151 const char *encoding = NULL;
3152 const char *errors = NULL;
3153 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003154 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003155 Py_ssize_t size;
3156 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003157 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003158
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003159 if (type != &PyBytes_Type)
3160 return str_subtype_new(type, args, kwds);
3161 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3162 &encoding, &errors))
3163 return NULL;
3164 if (x == NULL) {
3165 if (encoding != NULL || errors != NULL) {
3166 PyErr_SetString(PyExc_TypeError,
3167 "encoding or errors without sequence "
3168 "argument");
3169 return NULL;
3170 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003171 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003172 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003173
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003174 if (PyUnicode_Check(x)) {
3175 /* Encode via the codec registry */
3176 if (encoding == NULL) {
3177 PyErr_SetString(PyExc_TypeError,
3178 "string argument without an encoding");
3179 return NULL;
3180 }
3181 new = PyUnicode_AsEncodedString(x, encoding, errors);
3182 if (new == NULL)
3183 return NULL;
3184 assert(PyBytes_Check(new));
3185 return new;
3186 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003187
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003188 /* If it's not unicode, there can't be encoding or errors */
3189 if (encoding != NULL || errors != NULL) {
3190 PyErr_SetString(PyExc_TypeError,
3191 "encoding or errors without a string argument");
3192 return NULL;
3193 }
3194
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003195 /* We'd like to call PyObject_Bytes here, but we need to check for an
3196 integer argument before deferring to PyBytes_FromObject, something
3197 PyObject_Bytes doesn't do. */
3198 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3199 if (func != NULL) {
3200 new = PyObject_CallFunctionObjArgs(func, NULL);
3201 Py_DECREF(func);
3202 if (new == NULL)
3203 return NULL;
3204 if (!PyBytes_Check(new)) {
3205 PyErr_Format(PyExc_TypeError,
3206 "__bytes__ returned non-bytes (type %.200s)",
3207 Py_TYPE(new)->tp_name);
3208 Py_DECREF(new);
3209 return NULL;
3210 }
3211 return new;
3212 }
3213 else if (PyErr_Occurred())
3214 return NULL;
3215
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003216 /* Is it an integer? */
3217 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3218 if (size == -1 && PyErr_Occurred()) {
3219 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3220 return NULL;
3221 PyErr_Clear();
3222 }
3223 else if (size < 0) {
3224 PyErr_SetString(PyExc_ValueError, "negative count");
3225 return NULL;
3226 }
3227 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003228 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003229 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003230 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003231 return new;
3232 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003233
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003234 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003235}
3236
3237PyObject *
3238PyBytes_FromObject(PyObject *x)
3239{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003240 PyObject *new, *it;
3241 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003242
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003243 if (x == NULL) {
3244 PyErr_BadInternalCall();
3245 return NULL;
3246 }
Larry Hastingsca28e992012-05-24 22:58:30 -07003247
3248 if (PyBytes_CheckExact(x)) {
3249 Py_INCREF(x);
3250 return x;
3251 }
3252
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003253 /* Use the modern buffer interface */
3254 if (PyObject_CheckBuffer(x)) {
3255 Py_buffer view;
3256 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3257 return NULL;
3258 new = PyBytes_FromStringAndSize(NULL, view.len);
3259 if (!new)
3260 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003261 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3262 &view, view.len, 'C') < 0)
3263 goto fail;
3264 PyBuffer_Release(&view);
3265 return new;
3266 fail:
3267 Py_XDECREF(new);
3268 PyBuffer_Release(&view);
3269 return NULL;
3270 }
3271 if (PyUnicode_Check(x)) {
3272 PyErr_SetString(PyExc_TypeError,
3273 "cannot convert unicode object to bytes");
3274 return NULL;
3275 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003276
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003277 if (PyList_CheckExact(x)) {
3278 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3279 if (new == NULL)
3280 return NULL;
3281 for (i = 0; i < Py_SIZE(x); i++) {
3282 Py_ssize_t value = PyNumber_AsSsize_t(
3283 PyList_GET_ITEM(x, i), PyExc_ValueError);
3284 if (value == -1 && PyErr_Occurred()) {
3285 Py_DECREF(new);
3286 return NULL;
3287 }
3288 if (value < 0 || value >= 256) {
3289 PyErr_SetString(PyExc_ValueError,
3290 "bytes must be in range(0, 256)");
3291 Py_DECREF(new);
3292 return NULL;
3293 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003294 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003295 }
3296 return new;
3297 }
3298 if (PyTuple_CheckExact(x)) {
3299 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3300 if (new == NULL)
3301 return NULL;
3302 for (i = 0; i < Py_SIZE(x); i++) {
3303 Py_ssize_t value = PyNumber_AsSsize_t(
3304 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
3305 if (value == -1 && PyErr_Occurred()) {
3306 Py_DECREF(new);
3307 return NULL;
3308 }
3309 if (value < 0 || value >= 256) {
3310 PyErr_SetString(PyExc_ValueError,
3311 "bytes must be in range(0, 256)");
3312 Py_DECREF(new);
3313 return NULL;
3314 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003315 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003316 }
3317 return new;
3318 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00003319
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003320 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003321 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003322 if (size == -1 && PyErr_Occurred())
3323 return NULL;
3324 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
3325 returning a shared empty bytes string. This required because we
3326 want to call _PyBytes_Resize() the returned object, which we can
3327 only do on bytes objects with refcount == 1. */
Victor Stinner88d146b2014-08-17 21:12:18 +02003328 if (size == 0)
3329 size = 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003330 new = PyBytes_FromStringAndSize(NULL, size);
3331 if (new == NULL)
3332 return NULL;
Victor Stinner88d146b2014-08-17 21:12:18 +02003333 assert(Py_REFCNT(new) == 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003334
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003335 /* Get the iterator */
3336 it = PyObject_GetIter(x);
3337 if (it == NULL)
3338 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003339
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003340 /* Run the iterator to exhaustion */
3341 for (i = 0; ; i++) {
3342 PyObject *item;
3343 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003344
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003345 /* Get the next item */
3346 item = PyIter_Next(it);
3347 if (item == NULL) {
3348 if (PyErr_Occurred())
3349 goto error;
3350 break;
3351 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003352
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003353 /* Interpret it as an int (__index__) */
3354 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3355 Py_DECREF(item);
3356 if (value == -1 && PyErr_Occurred())
3357 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003358
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003359 /* Range check */
3360 if (value < 0 || value >= 256) {
3361 PyErr_SetString(PyExc_ValueError,
3362 "bytes must be in range(0, 256)");
3363 goto error;
3364 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003365
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003366 /* Append the byte */
3367 if (i >= size) {
3368 size = 2 * size + 1;
3369 if (_PyBytes_Resize(&new, size) < 0)
3370 goto error;
3371 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003372 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003373 }
3374 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003375
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003376 /* Clean up and return success */
3377 Py_DECREF(it);
3378 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003379
3380 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003381 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01003382 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003383 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003384}
3385
3386static PyObject *
3387str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3388{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003389 PyObject *tmp, *pnew;
3390 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003391
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003392 assert(PyType_IsSubtype(type, &PyBytes_Type));
3393 tmp = bytes_new(&PyBytes_Type, args, kwds);
3394 if (tmp == NULL)
3395 return NULL;
3396 assert(PyBytes_CheckExact(tmp));
3397 n = PyBytes_GET_SIZE(tmp);
3398 pnew = type->tp_alloc(type, n);
3399 if (pnew != NULL) {
3400 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3401 PyBytes_AS_STRING(tmp), n+1);
3402 ((PyBytesObject *)pnew)->ob_shash =
3403 ((PyBytesObject *)tmp)->ob_shash;
3404 }
3405 Py_DECREF(tmp);
3406 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003407}
3408
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003409PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003410"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003411bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003412bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003413bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3414bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003415\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003416Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003417 - an iterable yielding integers in range(256)\n\
3418 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003419 - any object implementing the buffer API.\n\
3420 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003421
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003422static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003423
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003424PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003425 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3426 "bytes",
3427 PyBytesObject_SIZE,
3428 sizeof(char),
3429 bytes_dealloc, /* tp_dealloc */
3430 0, /* tp_print */
3431 0, /* tp_getattr */
3432 0, /* tp_setattr */
3433 0, /* tp_reserved */
3434 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08003435 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003436 &bytes_as_sequence, /* tp_as_sequence */
3437 &bytes_as_mapping, /* tp_as_mapping */
3438 (hashfunc)bytes_hash, /* tp_hash */
3439 0, /* tp_call */
3440 bytes_str, /* tp_str */
3441 PyObject_GenericGetAttr, /* tp_getattro */
3442 0, /* tp_setattro */
3443 &bytes_as_buffer, /* tp_as_buffer */
3444 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3445 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3446 bytes_doc, /* tp_doc */
3447 0, /* tp_traverse */
3448 0, /* tp_clear */
3449 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3450 0, /* tp_weaklistoffset */
3451 bytes_iter, /* tp_iter */
3452 0, /* tp_iternext */
3453 bytes_methods, /* tp_methods */
3454 0, /* tp_members */
3455 0, /* tp_getset */
3456 &PyBaseObject_Type, /* tp_base */
3457 0, /* tp_dict */
3458 0, /* tp_descr_get */
3459 0, /* tp_descr_set */
3460 0, /* tp_dictoffset */
3461 0, /* tp_init */
3462 0, /* tp_alloc */
3463 bytes_new, /* tp_new */
3464 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003465};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003466
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003467void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003468PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003469{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003470 assert(pv != NULL);
3471 if (*pv == NULL)
3472 return;
3473 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003474 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003475 return;
3476 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003477
3478 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3479 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003480 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003481 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02003482
Antoine Pitrou161d6952014-05-01 14:36:20 +02003483 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003484 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02003485 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3486 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3487 Py_CLEAR(*pv);
3488 return;
3489 }
3490
3491 oldsize = PyBytes_GET_SIZE(*pv);
3492 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3493 PyErr_NoMemory();
3494 goto error;
3495 }
3496 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3497 goto error;
3498
3499 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3500 PyBuffer_Release(&wb);
3501 return;
3502
3503 error:
3504 PyBuffer_Release(&wb);
3505 Py_CLEAR(*pv);
3506 return;
3507 }
3508
3509 else {
3510 /* Multiple references, need to create new object */
3511 PyObject *v;
3512 v = bytes_concat(*pv, w);
3513 Py_DECREF(*pv);
3514 *pv = v;
3515 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003516}
3517
3518void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003519PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003520{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003521 PyBytes_Concat(pv, w);
3522 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003523}
3524
3525
Ethan Furmanb95b5612015-01-23 20:05:18 -08003526/* The following function breaks the notion that bytes are immutable:
3527 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003528 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08003529 as creating a new bytes object and destroying the old one, only
3530 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003531 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08003532 Note that if there's not enough memory to resize the bytes object, the
3533 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003534 memory" exception is set, and -1 is returned. Else (on success) 0 is
3535 returned, and the value in *pv may or may not be the same as on input.
3536 As always, an extra byte is allocated for a trailing \0 byte (newsize
3537 does *not* include that), and a trailing \0 byte is stored.
3538*/
3539
3540int
3541_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3542{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003543 PyObject *v;
3544 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003545 v = *pv;
3546 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3547 *pv = 0;
3548 Py_DECREF(v);
3549 PyErr_BadInternalCall();
3550 return -1;
3551 }
3552 /* XXX UNREF/NEWREF interface should be more symmetrical */
3553 _Py_DEC_REFTOTAL;
3554 _Py_ForgetReference(v);
3555 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003556 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003557 if (*pv == NULL) {
3558 PyObject_Del(v);
3559 PyErr_NoMemory();
3560 return -1;
3561 }
3562 _Py_NewReference(*pv);
3563 sv = (PyBytesObject *) *pv;
3564 Py_SIZE(sv) = newsize;
3565 sv->ob_sval[newsize] = '\0';
3566 sv->ob_shash = -1; /* invalidate cached hash value */
3567 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003568}
3569
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003570void
3571PyBytes_Fini(void)
3572{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003573 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003574 for (i = 0; i < UCHAR_MAX + 1; i++)
3575 Py_CLEAR(characters[i]);
3576 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003577}
3578
Benjamin Peterson4116f362008-05-27 00:36:20 +00003579/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003580
3581typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003582 PyObject_HEAD
3583 Py_ssize_t it_index;
3584 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003585} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003586
3587static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003588striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003589{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003590 _PyObject_GC_UNTRACK(it);
3591 Py_XDECREF(it->it_seq);
3592 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003593}
3594
3595static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003596striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003597{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003598 Py_VISIT(it->it_seq);
3599 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003600}
3601
3602static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003603striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003604{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003605 PyBytesObject *seq;
3606 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003607
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003608 assert(it != NULL);
3609 seq = it->it_seq;
3610 if (seq == NULL)
3611 return NULL;
3612 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003613
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003614 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3615 item = PyLong_FromLong(
3616 (unsigned char)seq->ob_sval[it->it_index]);
3617 if (item != NULL)
3618 ++it->it_index;
3619 return item;
3620 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003621
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003622 Py_DECREF(seq);
3623 it->it_seq = NULL;
3624 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003625}
3626
3627static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003628striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003629{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003630 Py_ssize_t len = 0;
3631 if (it->it_seq)
3632 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3633 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003634}
3635
3636PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003637 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003638
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003639static PyObject *
3640striter_reduce(striterobject *it)
3641{
3642 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003643 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003644 it->it_seq, it->it_index);
3645 } else {
3646 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3647 if (u == NULL)
3648 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02003649 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003650 }
3651}
3652
3653PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3654
3655static PyObject *
3656striter_setstate(striterobject *it, PyObject *state)
3657{
3658 Py_ssize_t index = PyLong_AsSsize_t(state);
3659 if (index == -1 && PyErr_Occurred())
3660 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003661 if (it->it_seq != NULL) {
3662 if (index < 0)
3663 index = 0;
3664 else if (index > PyBytes_GET_SIZE(it->it_seq))
3665 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3666 it->it_index = index;
3667 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003668 Py_RETURN_NONE;
3669}
3670
3671PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3672
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003673static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003674 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3675 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003676 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3677 reduce_doc},
3678 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3679 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003680 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003681};
3682
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003683PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003684 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3685 "bytes_iterator", /* tp_name */
3686 sizeof(striterobject), /* tp_basicsize */
3687 0, /* tp_itemsize */
3688 /* methods */
3689 (destructor)striter_dealloc, /* tp_dealloc */
3690 0, /* tp_print */
3691 0, /* tp_getattr */
3692 0, /* tp_setattr */
3693 0, /* tp_reserved */
3694 0, /* tp_repr */
3695 0, /* tp_as_number */
3696 0, /* tp_as_sequence */
3697 0, /* tp_as_mapping */
3698 0, /* tp_hash */
3699 0, /* tp_call */
3700 0, /* tp_str */
3701 PyObject_GenericGetAttr, /* tp_getattro */
3702 0, /* tp_setattro */
3703 0, /* tp_as_buffer */
3704 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3705 0, /* tp_doc */
3706 (traverseproc)striter_traverse, /* tp_traverse */
3707 0, /* tp_clear */
3708 0, /* tp_richcompare */
3709 0, /* tp_weaklistoffset */
3710 PyObject_SelfIter, /* tp_iter */
3711 (iternextfunc)striter_next, /* tp_iternext */
3712 striter_methods, /* tp_methods */
3713 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003714};
3715
3716static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003717bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003718{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003719 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003720
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003721 if (!PyBytes_Check(seq)) {
3722 PyErr_BadInternalCall();
3723 return NULL;
3724 }
3725 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3726 if (it == NULL)
3727 return NULL;
3728 it->it_index = 0;
3729 Py_INCREF(seq);
3730 it->it_seq = (PyBytesObject *)seq;
3731 _PyObject_GC_TRACK(it);
3732 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003733}