blob: 5934336f892e360976d6661a9e606458568c2da0 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +00008#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00009#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000010
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020011/*[clinic input]
Martin v. Löwis0efea322014-07-27 17:29:17 +020012class bytes "PyBytesObject*" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020013[clinic start generated code]*/
Martin v. Löwis0efea322014-07-27 17:29:17 +020014/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030016#include "clinic/bytesobject.c.h"
17
Christian Heimes2c9c7a52008-05-26 13:42:13 +000018#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000019Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000020#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000021
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022static PyBytesObject *characters[UCHAR_MAX + 1];
23static PyBytesObject *nullstring;
24
Mark Dickinsonfd24b322008-12-06 15:33:31 +000025/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
26 for a string of length n should request PyBytesObject_SIZE + n bytes.
27
28 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29 3 bytes per string allocation on a typical system.
30*/
31#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32
Christian Heimes2c9c7a52008-05-26 13:42:13 +000033/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000034 For PyBytes_FromString(), the parameter `str' points to a null-terminated
35 string containing exactly `size' bytes.
36
Martin Pantera90a4a92016-05-30 04:04:50 +000037 For PyBytes_FromStringAndSize(), the parameter `str' is
Christian Heimes2c9c7a52008-05-26 13:42:13 +000038 either NULL or else points to a string containing at least `size' bytes.
39 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
40 not have to be null-terminated. (Therefore it is safe to construct a
41 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
42 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
43 bytes (setting the last byte to the null terminating character) and you can
44 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000045 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000046 alter the data yourself, since the strings may be shared.
47
48 The PyObject member `op->ob_size', which denotes the number of "extra
49 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020050 allocated for string data, not counting the null terminating character.
51 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000052 PyBytes_FromStringAndSize()) or the length of the string in the `str'
53 parameter (for PyBytes_FromString()).
54*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020055static PyObject *
56_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000057{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020058 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020059 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020060
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000061 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000062#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000063 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000064#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 Py_INCREF(op);
66 return (PyObject *)op;
67 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000068
Victor Stinner049e5092014-08-17 22:20:00 +020069 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000070 PyErr_SetString(PyExc_OverflowError,
71 "byte string is too large");
72 return NULL;
73 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000074
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000075 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020076 if (use_calloc)
77 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
78 else
79 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 if (op == NULL)
81 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010082 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020084 if (!use_calloc)
85 op->ob_sval[size] = '\0';
86 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 if (size == 0) {
88 nullstring = op;
89 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020090 }
91 return (PyObject *) op;
92}
93
94PyObject *
95PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
96{
97 PyBytesObject *op;
98 if (size < 0) {
99 PyErr_SetString(PyExc_SystemError,
100 "Negative size passed to PyBytes_FromStringAndSize");
101 return NULL;
102 }
103 if (size == 1 && str != NULL &&
104 (op = characters[*str & UCHAR_MAX]) != NULL)
105 {
106#ifdef COUNT_ALLOCS
107 one_strings++;
108#endif
109 Py_INCREF(op);
110 return (PyObject *)op;
111 }
112
113 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
114 if (op == NULL)
115 return NULL;
116 if (str == NULL)
117 return (PyObject *) op;
118
119 Py_MEMCPY(op->ob_sval, str, size);
120 /* share short strings */
121 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 characters[*str & UCHAR_MAX] = op;
123 Py_INCREF(op);
124 }
125 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000126}
127
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000128PyObject *
129PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000130{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200131 size_t size;
132 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 assert(str != NULL);
135 size = strlen(str);
136 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
137 PyErr_SetString(PyExc_OverflowError,
138 "byte string is too long");
139 return NULL;
140 }
141 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000144#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
148 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000151#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 Py_INCREF(op);
153 return (PyObject *)op;
154 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 /* Inline PyObject_NewVar */
157 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
158 if (op == NULL)
159 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100160 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 op->ob_shash = -1;
162 Py_MEMCPY(op->ob_sval, str, size+1);
163 /* share short strings */
164 if (size == 0) {
165 nullstring = op;
166 Py_INCREF(op);
167 } else if (size == 1) {
168 characters[*str & UCHAR_MAX] = op;
169 Py_INCREF(op);
170 }
171 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000172}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000173
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000174PyObject *
175PyBytes_FromFormatV(const char *format, va_list vargs)
176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 va_list count;
178 Py_ssize_t n = 0;
179 const char* f;
180 char *s;
181 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000182
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000183 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 /* step 1: figure out how large a buffer we need */
185 for (f = format; *f; f++) {
186 if (*f == '%') {
187 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000188 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
192 * they don't affect the amount of space we reserve.
193 */
194 if ((*f == 'l' || *f == 'z') &&
195 (f[1] == 'd' || f[1] == 'u'))
196 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000197
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000198 switch (*f) {
199 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100200 {
201 int c = va_arg(count, int);
202 if (c < 0 || c > 255) {
203 PyErr_SetString(PyExc_OverflowError,
204 "PyBytes_FromFormatV(): %c format "
205 "expects an integer in range [0; 255]");
206 return NULL;
207 }
208 n++;
209 break;
210 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 case '%':
212 n++;
213 break;
214 case 'd': case 'u': case 'i': case 'x':
215 (void) va_arg(count, int);
216 /* 20 bytes is enough to hold a 64-bit
217 integer. Decimal takes the most space.
218 This isn't enough for octal. */
219 n += 20;
220 break;
221 case 's':
222 s = va_arg(count, char*);
223 n += strlen(s);
224 break;
225 case 'p':
226 (void) va_arg(count, int);
227 /* maximum 64-bit pointer representation:
228 * 0xffffffffffffffff
229 * so 19 characters is enough.
230 * XXX I count 18 -- what's the extra for?
231 */
232 n += 19;
233 break;
234 default:
235 /* if we stumble upon an unknown
236 formatting code, copy the rest of
237 the format string to the output
238 string. (we cannot just skip the
239 code, since there's no way to know
240 what's in the argument list) */
241 n += strlen(p);
242 goto expand;
243 }
244 } else
245 n++;
246 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000247 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 /* step 2: fill the buffer */
249 /* Since we've analyzed how much space we need for the worst case,
250 use sprintf directly instead of the slower PyOS_snprintf. */
251 string = PyBytes_FromStringAndSize(NULL, n);
252 if (!string)
253 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000254
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000256
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000257 for (f = format; *f; f++) {
258 if (*f == '%') {
259 const char* p = f++;
260 Py_ssize_t i;
261 int longflag = 0;
262 int size_tflag = 0;
263 /* parse the width.precision part (we're only
264 interested in the precision value, if any) */
265 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000266 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000267 n = (n*10) + *f++ - '0';
268 if (*f == '.') {
269 f++;
270 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000271 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 n = (n*10) + *f++ - '0';
273 }
David Malcolm96960882010-11-05 17:23:41 +0000274 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 f++;
276 /* handle the long flag, but only for %ld and %lu.
277 others can be added when necessary. */
278 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
279 longflag = 1;
280 ++f;
281 }
282 /* handle the size_t flag. */
283 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
284 size_tflag = 1;
285 ++f;
286 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000287
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000288 switch (*f) {
289 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100290 {
291 int c = va_arg(vargs, int);
292 /* c has been checked for overflow in the first step */
293 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000294 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100295 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 case 'd':
297 if (longflag)
298 sprintf(s, "%ld", va_arg(vargs, long));
299 else if (size_tflag)
300 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
301 va_arg(vargs, Py_ssize_t));
302 else
303 sprintf(s, "%d", va_arg(vargs, int));
304 s += strlen(s);
305 break;
306 case 'u':
307 if (longflag)
308 sprintf(s, "%lu",
309 va_arg(vargs, unsigned long));
310 else if (size_tflag)
311 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
312 va_arg(vargs, size_t));
313 else
314 sprintf(s, "%u",
315 va_arg(vargs, unsigned int));
316 s += strlen(s);
317 break;
318 case 'i':
319 sprintf(s, "%i", va_arg(vargs, int));
320 s += strlen(s);
321 break;
322 case 'x':
323 sprintf(s, "%x", va_arg(vargs, int));
324 s += strlen(s);
325 break;
326 case 's':
327 p = va_arg(vargs, char*);
328 i = strlen(p);
329 if (n > 0 && i > n)
330 i = n;
331 Py_MEMCPY(s, p, i);
332 s += i;
333 break;
334 case 'p':
335 sprintf(s, "%p", va_arg(vargs, void*));
336 /* %p is ill-defined: ensure leading 0x. */
337 if (s[1] == 'X')
338 s[1] = 'x';
339 else if (s[1] != 'x') {
340 memmove(s+2, s, strlen(s)+1);
341 s[0] = '0';
342 s[1] = 'x';
343 }
344 s += strlen(s);
345 break;
346 case '%':
347 *s++ = '%';
348 break;
349 default:
350 strcpy(s, p);
351 s += strlen(s);
352 goto end;
353 }
354 } else
355 *s++ = *f;
356 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000357
358 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
360 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361}
362
363PyObject *
364PyBytes_FromFormat(const char *format, ...)
365{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 PyObject* ret;
367 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368
369#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000373#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000374 ret = PyBytes_FromFormatV(format, vargs);
375 va_end(vargs);
376 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000377}
378
Ethan Furmanb95b5612015-01-23 20:05:18 -0800379/* Helpers for formatstring */
380
381Py_LOCAL_INLINE(PyObject *)
382getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
383{
384 Py_ssize_t argidx = *p_argidx;
385 if (argidx < arglen) {
386 (*p_argidx)++;
387 if (arglen < 0)
388 return args;
389 else
390 return PyTuple_GetItem(args, argidx);
391 }
392 PyErr_SetString(PyExc_TypeError,
393 "not enough arguments for format string");
394 return NULL;
395}
396
397/* Format codes
398 * F_LJUST '-'
399 * F_SIGN '+'
400 * F_BLANK ' '
401 * F_ALT '#'
402 * F_ZERO '0'
403 */
404#define F_LJUST (1<<0)
405#define F_SIGN (1<<1)
406#define F_BLANK (1<<2)
407#define F_ALT (1<<3)
408#define F_ZERO (1<<4)
409
410/* Returns a new reference to a PyBytes object, or NULL on failure. */
411
412static PyObject *
413formatfloat(PyObject *v, int flags, int prec, int type)
414{
415 char *p;
416 PyObject *result;
417 double x;
418
419 x = PyFloat_AsDouble(v);
420 if (x == -1.0 && PyErr_Occurred()) {
421 PyErr_Format(PyExc_TypeError, "float argument required, "
422 "not %.200s", Py_TYPE(v)->tp_name);
423 return NULL;
424 }
425
426 if (prec < 0)
427 prec = 6;
428
429 p = PyOS_double_to_string(x, type, prec,
430 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
431
432 if (p == NULL)
433 return NULL;
434 result = PyBytes_FromStringAndSize(p, strlen(p));
435 PyMem_Free(p);
436 return result;
437}
438
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300439static PyObject *
440formatlong(PyObject *v, int flags, int prec, int type)
441{
442 PyObject *result, *iobj;
443 if (type == 'i')
444 type = 'd';
445 if (PyLong_Check(v))
446 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
447 if (PyNumber_Check(v)) {
448 /* make sure number is a type of integer for o, x, and X */
449 if (type == 'o' || type == 'x' || type == 'X')
450 iobj = PyNumber_Index(v);
451 else
452 iobj = PyNumber_Long(v);
453 if (iobj == NULL) {
454 if (!PyErr_ExceptionMatches(PyExc_TypeError))
455 return NULL;
456 }
457 else if (!PyLong_Check(iobj))
458 Py_CLEAR(iobj);
459 if (iobj != NULL) {
460 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
461 Py_DECREF(iobj);
462 return result;
463 }
464 }
465 PyErr_Format(PyExc_TypeError,
466 "%%%c format: %s is required, not %.200s", type,
467 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
468 : "a number",
469 Py_TYPE(v)->tp_name);
470 return NULL;
471}
472
473static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200474byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800475{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200476 if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) {
477 *p = PyBytes_AS_STRING(arg)[0];
478 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800479 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200480 else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) {
481 *p = PyByteArray_AS_STRING(arg)[0];
482 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800483 }
484 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300485 PyObject *iobj;
486 long ival;
487 int overflow;
488 /* make sure number is a type of integer */
489 if (PyLong_Check(arg)) {
490 ival = PyLong_AsLongAndOverflow(arg, &overflow);
491 }
492 else {
493 iobj = PyNumber_Index(arg);
494 if (iobj == NULL) {
495 if (!PyErr_ExceptionMatches(PyExc_TypeError))
496 return 0;
497 goto onError;
498 }
499 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
500 Py_DECREF(iobj);
501 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300502 if (!overflow && ival == -1 && PyErr_Occurred())
503 goto onError;
504 if (overflow || !(0 <= ival && ival <= 255)) {
505 PyErr_SetString(PyExc_OverflowError,
506 "%c arg not in range(256)");
507 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800508 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300509 *p = (char)ival;
510 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800511 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300512 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200513 PyErr_SetString(PyExc_TypeError,
514 "%c requires an integer in range(256) or a single byte");
515 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800516}
517
518static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200519format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800520{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200521 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800522 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800523 /* is it a bytes object? */
524 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200525 *pbuf = PyBytes_AS_STRING(v);
526 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800527 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200528 return v;
529 }
530 if (PyByteArray_Check(v)) {
531 *pbuf = PyByteArray_AS_STRING(v);
532 *plen = PyByteArray_GET_SIZE(v);
533 Py_INCREF(v);
534 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800535 }
536 /* does it support __bytes__? */
537 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
538 if (func != NULL) {
539 result = PyObject_CallFunctionObjArgs(func, NULL);
540 Py_DECREF(func);
541 if (result == NULL)
542 return NULL;
543 if (!PyBytes_Check(result)) {
544 PyErr_Format(PyExc_TypeError,
545 "__bytes__ returned non-bytes (type %.200s)",
546 Py_TYPE(result)->tp_name);
547 Py_DECREF(result);
548 return NULL;
549 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200550 *pbuf = PyBytes_AS_STRING(result);
551 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800552 return result;
553 }
554 PyErr_Format(PyExc_TypeError,
555 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
556 Py_TYPE(v)->tp_name);
557 return NULL;
558}
559
560/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
561
562 FORMATBUFLEN is the length of the buffer in which the ints &
563 chars are formatted. XXX This is a magic number. Each formatting
564 routine does bounds checking to ensure no overflow, but a better
565 solution may be to malloc a buffer of appropriate size for each
566 format. For now, the current solution is sufficient.
567*/
568#define FORMATBUFLEN (size_t)120
569
570PyObject *
571_PyBytes_Format(PyObject *format, PyObject *args)
572{
573 char *fmt, *res;
574 Py_ssize_t arglen, argidx;
575 Py_ssize_t reslen, rescnt, fmtcnt;
576 int args_owned = 0;
577 PyObject *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800578 PyObject *dict = NULL;
579 if (format == NULL || !PyBytes_Check(format) || args == NULL) {
580 PyErr_BadInternalCall();
581 return NULL;
582 }
583 fmt = PyBytes_AS_STRING(format);
584 fmtcnt = PyBytes_GET_SIZE(format);
585 reslen = rescnt = fmtcnt + 100;
586 result = PyBytes_FromStringAndSize((char *)NULL, reslen);
587 if (result == NULL)
588 return NULL;
589 res = PyBytes_AsString(result);
590 if (PyTuple_Check(args)) {
591 arglen = PyTuple_GET_SIZE(args);
592 argidx = 0;
593 }
594 else {
595 arglen = -1;
596 argidx = -2;
597 }
598 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
599 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
600 !PyByteArray_Check(args)) {
601 dict = args;
602 }
603 while (--fmtcnt >= 0) {
604 if (*fmt != '%') {
605 if (--rescnt < 0) {
606 rescnt = fmtcnt + 100;
607 reslen += rescnt;
608 if (_PyBytes_Resize(&result, reslen))
609 return NULL;
610 res = PyBytes_AS_STRING(result)
611 + reslen - rescnt;
612 --rescnt;
613 }
614 *res++ = *fmt++;
615 }
616 else {
617 /* Got a format specifier */
618 int flags = 0;
619 Py_ssize_t width = -1;
620 int prec = -1;
621 int c = '\0';
622 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800623 PyObject *v = NULL;
624 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200625 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800626 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200627 Py_ssize_t len = 0;
628 char onechar; /* For byte_converter() */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800629
Ethan Furmanb95b5612015-01-23 20:05:18 -0800630 fmt++;
631 if (*fmt == '(') {
632 char *keystart;
633 Py_ssize_t keylen;
634 PyObject *key;
635 int pcount = 1;
636
637 if (dict == NULL) {
638 PyErr_SetString(PyExc_TypeError,
639 "format requires a mapping");
640 goto error;
641 }
642 ++fmt;
643 --fmtcnt;
644 keystart = fmt;
645 /* Skip over balanced parentheses */
646 while (pcount > 0 && --fmtcnt >= 0) {
647 if (*fmt == ')')
648 --pcount;
649 else if (*fmt == '(')
650 ++pcount;
651 fmt++;
652 }
653 keylen = fmt - keystart - 1;
654 if (fmtcnt < 0 || pcount > 0) {
655 PyErr_SetString(PyExc_ValueError,
656 "incomplete format key");
657 goto error;
658 }
659 key = PyBytes_FromStringAndSize(keystart,
660 keylen);
661 if (key == NULL)
662 goto error;
663 if (args_owned) {
664 Py_DECREF(args);
665 args_owned = 0;
666 }
667 args = PyObject_GetItem(dict, key);
668 Py_DECREF(key);
669 if (args == NULL) {
670 goto error;
671 }
672 args_owned = 1;
673 arglen = -1;
674 argidx = -2;
675 }
676 while (--fmtcnt >= 0) {
677 switch (c = *fmt++) {
678 case '-': flags |= F_LJUST; continue;
679 case '+': flags |= F_SIGN; continue;
680 case ' ': flags |= F_BLANK; continue;
681 case '#': flags |= F_ALT; continue;
682 case '0': flags |= F_ZERO; continue;
683 }
684 break;
685 }
686 if (c == '*') {
687 v = getnextarg(args, arglen, &argidx);
688 if (v == NULL)
689 goto error;
690 if (!PyLong_Check(v)) {
691 PyErr_SetString(PyExc_TypeError,
692 "* wants int");
693 goto error;
694 }
695 width = PyLong_AsSsize_t(v);
696 if (width == -1 && PyErr_Occurred())
697 goto error;
698 if (width < 0) {
699 flags |= F_LJUST;
700 width = -width;
701 }
702 if (--fmtcnt >= 0)
703 c = *fmt++;
704 }
705 else if (c >= 0 && isdigit(c)) {
706 width = c - '0';
707 while (--fmtcnt >= 0) {
708 c = Py_CHARMASK(*fmt++);
709 if (!isdigit(c))
710 break;
711 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
712 PyErr_SetString(
713 PyExc_ValueError,
714 "width too big");
715 goto error;
716 }
717 width = width*10 + (c - '0');
718 }
719 }
720 if (c == '.') {
721 prec = 0;
722 if (--fmtcnt >= 0)
723 c = *fmt++;
724 if (c == '*') {
725 v = getnextarg(args, arglen, &argidx);
726 if (v == NULL)
727 goto error;
728 if (!PyLong_Check(v)) {
729 PyErr_SetString(
730 PyExc_TypeError,
731 "* wants int");
732 goto error;
733 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200734 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800735 if (prec == -1 && PyErr_Occurred())
736 goto error;
737 if (prec < 0)
738 prec = 0;
739 if (--fmtcnt >= 0)
740 c = *fmt++;
741 }
742 else if (c >= 0 && isdigit(c)) {
743 prec = c - '0';
744 while (--fmtcnt >= 0) {
745 c = Py_CHARMASK(*fmt++);
746 if (!isdigit(c))
747 break;
748 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
749 PyErr_SetString(
750 PyExc_ValueError,
751 "prec too big");
752 goto error;
753 }
754 prec = prec*10 + (c - '0');
755 }
756 }
757 } /* prec */
758 if (fmtcnt >= 0) {
759 if (c == 'h' || c == 'l' || c == 'L') {
760 if (--fmtcnt >= 0)
761 c = *fmt++;
762 }
763 }
764 if (fmtcnt < 0) {
765 PyErr_SetString(PyExc_ValueError,
766 "incomplete format");
767 goto error;
768 }
769 if (c != '%') {
770 v = getnextarg(args, arglen, &argidx);
771 if (v == NULL)
772 goto error;
773 }
774 sign = 0;
775 fill = ' ';
776 switch (c) {
777 case '%':
778 pbuf = "%";
779 len = 1;
780 break;
Ethan Furman62e977f2015-03-11 08:17:00 -0700781 case 'r':
782 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800783 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200784 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800785 if (temp == NULL)
786 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200787 assert(PyUnicode_IS_ASCII(temp));
788 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
789 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800790 if (prec >= 0 && len > prec)
791 len = prec;
792 break;
793 case 's':
794 // %s is only for 2/3 code; 3 only code should use %b
795 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200796 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800797 if (temp == NULL)
798 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800799 if (prec >= 0 && len > prec)
800 len = prec;
801 break;
802 case 'i':
803 case 'd':
804 case 'u':
805 case 'o':
806 case 'x':
807 case 'X':
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300808 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200809 if (!temp)
810 goto error;
811 assert(PyUnicode_IS_ASCII(temp));
812 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
813 len = PyUnicode_GET_LENGTH(temp);
814 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800815 if (flags & F_ZERO)
816 fill = '0';
817 break;
818 case 'e':
819 case 'E':
820 case 'f':
821 case 'F':
822 case 'g':
823 case 'G':
824 temp = formatfloat(v, flags, prec, c);
825 if (temp == NULL)
826 goto error;
827 pbuf = PyBytes_AS_STRING(temp);
828 len = PyBytes_GET_SIZE(temp);
829 sign = 1;
830 if (flags & F_ZERO)
831 fill = '0';
832 break;
833 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200834 pbuf = &onechar;
835 len = byte_converter(v, &onechar);
836 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800837 goto error;
838 break;
839 default:
840 PyErr_Format(PyExc_ValueError,
841 "unsupported format character '%c' (0x%x) "
842 "at index %zd",
843 c, c,
844 (Py_ssize_t)(fmt - 1 -
845 PyBytes_AsString(format)));
846 goto error;
847 }
848 if (sign) {
849 if (*pbuf == '-' || *pbuf == '+') {
850 sign = *pbuf++;
851 len--;
852 }
853 else if (flags & F_SIGN)
854 sign = '+';
855 else if (flags & F_BLANK)
856 sign = ' ';
857 else
858 sign = 0;
859 }
860 if (width < len)
861 width = len;
862 if (rescnt - (sign != 0) < width) {
863 reslen -= rescnt;
864 rescnt = width + fmtcnt + 100;
865 reslen += rescnt;
866 if (reslen < 0) {
867 Py_DECREF(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800868 Py_XDECREF(temp);
869 return PyErr_NoMemory();
870 }
871 if (_PyBytes_Resize(&result, reslen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800872 Py_XDECREF(temp);
873 return NULL;
874 }
875 res = PyBytes_AS_STRING(result)
876 + reslen - rescnt;
877 }
878 if (sign) {
879 if (fill != ' ')
880 *res++ = sign;
881 rescnt--;
882 if (width > len)
883 width--;
884 }
885 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
886 assert(pbuf[0] == '0');
887 assert(pbuf[1] == c);
888 if (fill != ' ') {
889 *res++ = *pbuf++;
890 *res++ = *pbuf++;
891 }
892 rescnt -= 2;
893 width -= 2;
894 if (width < 0)
895 width = 0;
896 len -= 2;
897 }
898 if (width > len && !(flags & F_LJUST)) {
899 do {
900 --rescnt;
901 *res++ = fill;
902 } while (--width > len);
903 }
904 if (fill == ' ') {
905 if (sign)
906 *res++ = sign;
907 if ((flags & F_ALT) &&
908 (c == 'x' || c == 'X')) {
909 assert(pbuf[0] == '0');
910 assert(pbuf[1] == c);
911 *res++ = *pbuf++;
912 *res++ = *pbuf++;
913 }
914 }
915 Py_MEMCPY(res, pbuf, len);
916 res += len;
917 rescnt -= len;
918 while (--width >= len) {
919 --rescnt;
920 *res++ = ' ';
921 }
922 if (dict && (argidx < arglen) && c != '%') {
923 PyErr_SetString(PyExc_TypeError,
924 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -0800925 Py_XDECREF(temp);
926 goto error;
927 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800928 Py_XDECREF(temp);
929 } /* '%' */
930 } /* until end */
931 if (argidx < arglen && !dict) {
932 PyErr_SetString(PyExc_TypeError,
933 "not all arguments converted during bytes formatting");
934 goto error;
935 }
936 if (args_owned) {
937 Py_DECREF(args);
938 }
939 if (_PyBytes_Resize(&result, reslen - rescnt))
940 return NULL;
941 return result;
942
943 error:
944 Py_DECREF(result);
945 if (args_owned) {
946 Py_DECREF(args);
947 }
948 return NULL;
949}
950
951/* =-= */
952
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000953static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000954bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000955{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000956 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000957}
958
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000959/* Unescape a backslash-escaped string. If unicode is non-zero,
960 the string is a u-literal. If recode_encoding is non-zero,
961 the string is UTF-8 encoded and should be re-encoded in the
962 specified encoding. */
963
964PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000965 Py_ssize_t len,
966 const char *errors,
967 Py_ssize_t unicode,
968 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000969{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000970 int c;
971 char *p, *buf;
972 const char *end;
973 PyObject *v;
974 Py_ssize_t newlen = recode_encoding ? 4*len:len;
975 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
976 if (v == NULL)
977 return NULL;
978 p = buf = PyBytes_AsString(v);
979 end = s + len;
980 while (s < end) {
981 if (*s != '\\') {
982 non_esc:
983 if (recode_encoding && (*s & 0x80)) {
984 PyObject *u, *w;
985 char *r;
986 const char* t;
987 Py_ssize_t rn;
988 t = s;
989 /* Decode non-ASCII bytes as UTF-8. */
990 while (t < end && (*t & 0x80)) t++;
991 u = PyUnicode_DecodeUTF8(s, t - s, errors);
992 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000993
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000994 /* Recode them in target encoding. */
995 w = PyUnicode_AsEncodedString(
996 u, recode_encoding, errors);
997 Py_DECREF(u);
998 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000999
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001000 /* Append bytes to output buffer. */
1001 assert(PyBytes_Check(w));
1002 r = PyBytes_AS_STRING(w);
1003 rn = PyBytes_GET_SIZE(w);
1004 Py_MEMCPY(p, r, rn);
1005 p += rn;
1006 Py_DECREF(w);
1007 s = t;
1008 } else {
1009 *p++ = *s++;
1010 }
1011 continue;
1012 }
1013 s++;
1014 if (s==end) {
1015 PyErr_SetString(PyExc_ValueError,
1016 "Trailing \\ in string");
1017 goto failed;
1018 }
1019 switch (*s++) {
1020 /* XXX This assumes ASCII! */
1021 case '\n': break;
1022 case '\\': *p++ = '\\'; break;
1023 case '\'': *p++ = '\''; break;
1024 case '\"': *p++ = '\"'; break;
1025 case 'b': *p++ = '\b'; break;
1026 case 'f': *p++ = '\014'; break; /* FF */
1027 case 't': *p++ = '\t'; break;
1028 case 'n': *p++ = '\n'; break;
1029 case 'r': *p++ = '\r'; break;
1030 case 'v': *p++ = '\013'; break; /* VT */
1031 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1032 case '0': case '1': case '2': case '3':
1033 case '4': case '5': case '6': case '7':
1034 c = s[-1] - '0';
1035 if (s < end && '0' <= *s && *s <= '7') {
1036 c = (c<<3) + *s++ - '0';
1037 if (s < end && '0' <= *s && *s <= '7')
1038 c = (c<<3) + *s++ - '0';
1039 }
1040 *p++ = c;
1041 break;
1042 case 'x':
David Malcolm96960882010-11-05 17:23:41 +00001043 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001044 unsigned int x = 0;
1045 c = Py_CHARMASK(*s);
1046 s++;
David Malcolm96960882010-11-05 17:23:41 +00001047 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001048 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001049 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001050 x = 10 + c - 'a';
1051 else
1052 x = 10 + c - 'A';
1053 x = x << 4;
1054 c = Py_CHARMASK(*s);
1055 s++;
David Malcolm96960882010-11-05 17:23:41 +00001056 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001057 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001058 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001059 x += 10 + c - 'a';
1060 else
1061 x += 10 + c - 'A';
1062 *p++ = x;
1063 break;
1064 }
1065 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001066 PyErr_Format(PyExc_ValueError,
1067 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001068 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001069 goto failed;
1070 }
1071 if (strcmp(errors, "replace") == 0) {
1072 *p++ = '?';
1073 } else if (strcmp(errors, "ignore") == 0)
1074 /* do nothing */;
1075 else {
1076 PyErr_Format(PyExc_ValueError,
1077 "decoding error; unknown "
1078 "error handling code: %.400s",
1079 errors);
1080 goto failed;
1081 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001082 /* skip \x */
1083 if (s < end && Py_ISXDIGIT(s[0]))
1084 s++; /* and a hexdigit */
1085 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 default:
1087 *p++ = '\\';
1088 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001089 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001090 UTF-8 bytes may follow. */
1091 }
1092 }
1093 if (p-buf < newlen)
1094 _PyBytes_Resize(&v, p - buf);
1095 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001096 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 Py_DECREF(v);
1098 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001099}
1100
1101/* -------------------------------------------------------------------- */
1102/* object api */
1103
1104Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001105PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001106{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001107 if (!PyBytes_Check(op)) {
1108 PyErr_Format(PyExc_TypeError,
1109 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1110 return -1;
1111 }
1112 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001113}
1114
1115char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001116PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001117{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001118 if (!PyBytes_Check(op)) {
1119 PyErr_Format(PyExc_TypeError,
1120 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1121 return NULL;
1122 }
1123 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001124}
1125
1126int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001127PyBytes_AsStringAndSize(PyObject *obj,
1128 char **s,
1129 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001130{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001131 if (s == NULL) {
1132 PyErr_BadInternalCall();
1133 return -1;
1134 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001135
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001136 if (!PyBytes_Check(obj)) {
1137 PyErr_Format(PyExc_TypeError,
1138 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1139 return -1;
1140 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001141
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001142 *s = PyBytes_AS_STRING(obj);
1143 if (len != NULL)
1144 *len = PyBytes_GET_SIZE(obj);
1145 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001146 PyErr_SetString(PyExc_ValueError,
1147 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001148 return -1;
1149 }
1150 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001151}
Neal Norwitz6968b052007-02-27 19:02:19 +00001152
1153/* -------------------------------------------------------------------- */
1154/* Methods */
1155
Eric Smith0923d1d2009-04-16 20:16:10 +00001156#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001157
1158#include "stringlib/fastsearch.h"
1159#include "stringlib/count.h"
1160#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001161#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001162#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001163#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001164#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001165
Eric Smith0f78bff2009-11-30 01:01:42 +00001166#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001167
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001168PyObject *
1169PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001170{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001171 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001172 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001173 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001174 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001175 unsigned char quote, *s, *p;
1176
1177 /* Compute size of output string */
1178 squotes = dquotes = 0;
1179 newsize = 3; /* b'' */
1180 s = (unsigned char*)op->ob_sval;
1181 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001182 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001183 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001184 case '\'': squotes++; break;
1185 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001186 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001187 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001188 default:
1189 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001190 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001191 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001192 if (newsize > PY_SSIZE_T_MAX - incr)
1193 goto overflow;
1194 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001195 }
1196 quote = '\'';
1197 if (smartquotes && squotes && !dquotes)
1198 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001199 if (squotes && quote == '\'') {
1200 if (newsize > PY_SSIZE_T_MAX - squotes)
1201 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001202 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001203 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001204
1205 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001206 if (v == NULL) {
1207 return NULL;
1208 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001209 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001210
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001211 *p++ = 'b', *p++ = quote;
1212 for (i = 0; i < length; i++) {
1213 unsigned char c = op->ob_sval[i];
1214 if (c == quote || c == '\\')
1215 *p++ = '\\', *p++ = c;
1216 else if (c == '\t')
1217 *p++ = '\\', *p++ = 't';
1218 else if (c == '\n')
1219 *p++ = '\\', *p++ = 'n';
1220 else if (c == '\r')
1221 *p++ = '\\', *p++ = 'r';
1222 else if (c < ' ' || c >= 0x7f) {
1223 *p++ = '\\';
1224 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001225 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1226 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001227 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001228 else
1229 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001230 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001231 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001232 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001233 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001234
1235 overflow:
1236 PyErr_SetString(PyExc_OverflowError,
1237 "bytes object is too large to make repr");
1238 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001239}
1240
Neal Norwitz6968b052007-02-27 19:02:19 +00001241static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001242bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001243{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001244 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001245}
1246
Neal Norwitz6968b052007-02-27 19:02:19 +00001247static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001248bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001249{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 if (Py_BytesWarningFlag) {
1251 if (PyErr_WarnEx(PyExc_BytesWarning,
1252 "str() on a bytes instance", 1))
1253 return NULL;
1254 }
1255 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001256}
1257
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001258static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001259bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001260{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001261 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001262}
Neal Norwitz6968b052007-02-27 19:02:19 +00001263
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001264/* This is also used by PyBytes_Concat() */
1265static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001266bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001267{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001268 Py_buffer va, vb;
1269 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001270
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001271 va.len = -1;
1272 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001273 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1274 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001275 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1276 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1277 goto done;
1278 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001279
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001280 /* Optimize end cases */
1281 if (va.len == 0 && PyBytes_CheckExact(b)) {
1282 result = b;
1283 Py_INCREF(result);
1284 goto done;
1285 }
1286 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1287 result = a;
1288 Py_INCREF(result);
1289 goto done;
1290 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001291
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001292 if (va.len > PY_SSIZE_T_MAX - vb.len) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001293 PyErr_NoMemory();
1294 goto done;
1295 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001296
Serhiy Storchaka06cfb0c2016-07-10 20:48:43 +03001297 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001298 if (result != NULL) {
1299 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1300 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1301 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001302
1303 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001304 if (va.len != -1)
1305 PyBuffer_Release(&va);
1306 if (vb.len != -1)
1307 PyBuffer_Release(&vb);
1308 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001309}
Neal Norwitz6968b052007-02-27 19:02:19 +00001310
1311static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001312bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001313{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001314 Py_ssize_t i;
1315 Py_ssize_t j;
1316 Py_ssize_t size;
1317 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001318 size_t nbytes;
1319 if (n < 0)
1320 n = 0;
1321 /* watch out for overflows: the size can overflow int,
1322 * and the # of bytes needed can overflow size_t
1323 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001324 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001325 PyErr_SetString(PyExc_OverflowError,
1326 "repeated bytes are too long");
1327 return NULL;
1328 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001329 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001330 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1331 Py_INCREF(a);
1332 return (PyObject *)a;
1333 }
1334 nbytes = (size_t)size;
1335 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1336 PyErr_SetString(PyExc_OverflowError,
1337 "repeated bytes are too long");
1338 return NULL;
1339 }
1340 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1341 if (op == NULL)
1342 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001343 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001344 op->ob_shash = -1;
1345 op->ob_sval[size] = '\0';
1346 if (Py_SIZE(a) == 1 && n > 0) {
1347 memset(op->ob_sval, a->ob_sval[0] , n);
1348 return (PyObject *) op;
1349 }
1350 i = 0;
1351 if (i < size) {
1352 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1353 i = Py_SIZE(a);
1354 }
1355 while (i < size) {
1356 j = (i <= size-i) ? i : size-i;
1357 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1358 i += j;
1359 }
1360 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001361}
1362
Guido van Rossum98297ee2007-11-06 21:34:58 +00001363static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001364bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001365{
1366 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1367 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001368 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +00001369 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +00001370 PyErr_Clear();
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001371 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
Antoine Pitroud1188562010-06-09 16:38:55 +00001372 return -1;
1373 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
1374 varg.buf, varg.len, 0);
1375 PyBuffer_Release(&varg);
1376 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001377 }
1378 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001379 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1380 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001381 }
1382
Antoine Pitrou0010d372010-08-15 17:12:55 +00001383 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001384}
1385
Neal Norwitz6968b052007-02-27 19:02:19 +00001386static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001387bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001388{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001389 if (i < 0 || i >= Py_SIZE(a)) {
1390 PyErr_SetString(PyExc_IndexError, "index out of range");
1391 return NULL;
1392 }
1393 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001394}
1395
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001396Py_LOCAL(int)
1397bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1398{
1399 int cmp;
1400 Py_ssize_t len;
1401
1402 len = Py_SIZE(a);
1403 if (Py_SIZE(b) != len)
1404 return 0;
1405
1406 if (a->ob_sval[0] != b->ob_sval[0])
1407 return 0;
1408
1409 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1410 return (cmp == 0);
1411}
1412
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001413static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001414bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001415{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001416 int c;
1417 Py_ssize_t len_a, len_b;
1418 Py_ssize_t min_len;
1419 PyObject *result;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001420 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 /* Make sure both arguments are strings. */
1423 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001424 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001425 rc = PyObject_IsInstance((PyObject*)a,
1426 (PyObject*)&PyUnicode_Type);
1427 if (!rc)
1428 rc = PyObject_IsInstance((PyObject*)b,
1429 (PyObject*)&PyUnicode_Type);
1430 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001431 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001432 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001433 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001434 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001435 return NULL;
1436 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001437 else {
1438 rc = PyObject_IsInstance((PyObject*)a,
1439 (PyObject*)&PyLong_Type);
1440 if (!rc)
1441 rc = PyObject_IsInstance((PyObject*)b,
1442 (PyObject*)&PyLong_Type);
1443 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001444 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001445 if (rc) {
1446 if (PyErr_WarnEx(PyExc_BytesWarning,
1447 "Comparison between bytes and int", 1))
1448 return NULL;
1449 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001450 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001451 }
1452 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001453 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001454 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001455 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001456 case Py_EQ:
1457 case Py_LE:
1458 case Py_GE:
1459 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001460 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001461 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001462 case Py_NE:
1463 case Py_LT:
1464 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001465 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001466 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001467 default:
1468 PyErr_BadArgument();
1469 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001470 }
1471 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001472 else if (op == Py_EQ || op == Py_NE) {
1473 int eq = bytes_compare_eq(a, b);
1474 eq ^= (op == Py_NE);
1475 result = eq ? Py_True : Py_False;
1476 }
1477 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001478 len_a = Py_SIZE(a);
1479 len_b = Py_SIZE(b);
1480 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001481 if (min_len > 0) {
1482 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001483 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001484 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001485 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001486 else
1487 c = 0;
1488 if (c == 0)
1489 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1490 switch (op) {
1491 case Py_LT: c = c < 0; break;
1492 case Py_LE: c = c <= 0; break;
1493 case Py_GT: c = c > 0; break;
1494 case Py_GE: c = c >= 0; break;
1495 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001496 PyErr_BadArgument();
1497 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001498 }
1499 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001500 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001501
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001502 Py_INCREF(result);
1503 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001504}
1505
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001506static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001507bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001508{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001509 if (a->ob_shash == -1) {
1510 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001511 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001512 }
1513 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001514}
1515
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001516static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001517bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001518{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001519 if (PyIndex_Check(item)) {
1520 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1521 if (i == -1 && PyErr_Occurred())
1522 return NULL;
1523 if (i < 0)
1524 i += PyBytes_GET_SIZE(self);
1525 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1526 PyErr_SetString(PyExc_IndexError,
1527 "index out of range");
1528 return NULL;
1529 }
1530 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1531 }
1532 else if (PySlice_Check(item)) {
1533 Py_ssize_t start, stop, step, slicelength, cur, i;
1534 char* source_buf;
1535 char* result_buf;
1536 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001537
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001538 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001539 PyBytes_GET_SIZE(self),
1540 &start, &stop, &step, &slicelength) < 0) {
1541 return NULL;
1542 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001543
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001544 if (slicelength <= 0) {
1545 return PyBytes_FromStringAndSize("", 0);
1546 }
1547 else if (start == 0 && step == 1 &&
1548 slicelength == PyBytes_GET_SIZE(self) &&
1549 PyBytes_CheckExact(self)) {
1550 Py_INCREF(self);
1551 return (PyObject *)self;
1552 }
1553 else if (step == 1) {
1554 return PyBytes_FromStringAndSize(
1555 PyBytes_AS_STRING(self) + start,
1556 slicelength);
1557 }
1558 else {
1559 source_buf = PyBytes_AS_STRING(self);
1560 result = PyBytes_FromStringAndSize(NULL, slicelength);
1561 if (result == NULL)
1562 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001563
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001564 result_buf = PyBytes_AS_STRING(result);
1565 for (cur = start, i = 0; i < slicelength;
1566 cur += step, i++) {
1567 result_buf[i] = source_buf[cur];
1568 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001569
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001570 return result;
1571 }
1572 }
1573 else {
1574 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001575 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001576 Py_TYPE(item)->tp_name);
1577 return NULL;
1578 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001579}
1580
1581static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001582bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001583{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001584 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1585 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001586}
1587
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001588static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001589 (lenfunc)bytes_length, /*sq_length*/
1590 (binaryfunc)bytes_concat, /*sq_concat*/
1591 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1592 (ssizeargfunc)bytes_item, /*sq_item*/
1593 0, /*sq_slice*/
1594 0, /*sq_ass_item*/
1595 0, /*sq_ass_slice*/
1596 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001597};
1598
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001599static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001600 (lenfunc)bytes_length,
1601 (binaryfunc)bytes_subscript,
1602 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001603};
1604
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001605static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001606 (getbufferproc)bytes_buffer_getbuffer,
1607 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001608};
1609
1610
1611#define LEFTSTRIP 0
1612#define RIGHTSTRIP 1
1613#define BOTHSTRIP 2
1614
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001615/*[clinic input]
1616bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001617
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001618 sep: object = None
1619 The delimiter according which to split the bytes.
1620 None (the default value) means split on ASCII whitespace characters
1621 (space, tab, return, newline, formfeed, vertical tab).
1622 maxsplit: Py_ssize_t = -1
1623 Maximum number of splits to do.
1624 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001625
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001626Return a list of the sections in the bytes, using sep as the delimiter.
1627[clinic start generated code]*/
1628
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001629static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001630bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001631/*[clinic end generated code: output=8bde44dacb36ef2e input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001632{
1633 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001634 const char *s = PyBytes_AS_STRING(self), *sub;
1635 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001636 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001637
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001638 if (maxsplit < 0)
1639 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001640 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001641 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001642 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643 return NULL;
1644 sub = vsub.buf;
1645 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001646
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001647 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1648 PyBuffer_Release(&vsub);
1649 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001650}
1651
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001652/*[clinic input]
1653bytes.partition
1654
1655 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001656 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001657 /
1658
1659Partition the bytes into three parts using the given separator.
1660
1661This will search for the separator sep in the bytes. If the separator is found,
1662returns a 3-tuple containing the part before the separator, the separator
1663itself, and the part after it.
1664
1665If the separator is not found, returns a 3-tuple containing the original bytes
1666object and two empty bytes objects.
1667[clinic start generated code]*/
1668
Neal Norwitz6968b052007-02-27 19:02:19 +00001669static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001670bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001671/*[clinic end generated code: output=f532b392a17ff695 input=bc855dc63ca949de]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001672{
Neal Norwitz6968b052007-02-27 19:02:19 +00001673 return stringlib_partition(
1674 (PyObject*) self,
1675 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001676 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001677 );
1678}
1679
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001680/*[clinic input]
1681bytes.rpartition
1682
1683 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001684 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001685 /
1686
1687Partition the bytes into three parts using the given separator.
1688
1689This will search for the separator sep in the bytes, starting and the end. If
1690the separator is found, returns a 3-tuple containing the part before the
1691separator, the separator itself, and the part after it.
1692
1693If the separator is not found, returns a 3-tuple containing two empty bytes
1694objects and the original bytes object.
1695[clinic start generated code]*/
1696
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001697static PyObject *
1698bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001699/*[clinic end generated code: output=191b114cbb028e50 input=6588fff262a9170e]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001700{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001701 return stringlib_rpartition(
1702 (PyObject*) self,
1703 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001704 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001705 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001706}
1707
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001708/*[clinic input]
1709bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001710
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001711Return a list of the sections in the bytes, using sep as the delimiter.
1712
1713Splitting is done starting at the end of the bytes and working to the front.
1714[clinic start generated code]*/
1715
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001716static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001717bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001718/*[clinic end generated code: output=0b6570b977911d88 input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001719{
1720 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001721 const char *s = PyBytes_AS_STRING(self), *sub;
1722 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001723 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001724
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001725 if (maxsplit < 0)
1726 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001727 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001728 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001729 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001730 return NULL;
1731 sub = vsub.buf;
1732 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001733
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001734 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1735 PyBuffer_Release(&vsub);
1736 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001737}
1738
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001739
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001740/*[clinic input]
1741bytes.join
1742
1743 iterable_of_bytes: object
1744 /
1745
1746Concatenate any number of bytes objects.
1747
1748The bytes whose method is called is inserted in between each pair.
1749
1750The result is returned as a new bytes object.
1751
1752Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1753[clinic start generated code]*/
1754
Neal Norwitz6968b052007-02-27 19:02:19 +00001755static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001756bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001757/*[clinic end generated code: output=634aff14764ff997 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001758{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001759 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001760}
1761
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001762PyObject *
1763_PyBytes_Join(PyObject *sep, PyObject *x)
1764{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001765 assert(sep != NULL && PyBytes_Check(sep));
1766 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001767 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001768}
1769
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001770/* helper macro to fixup start/end slice values */
1771#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001772 if (end > len) \
1773 end = len; \
1774 else if (end < 0) { \
1775 end += len; \
1776 if (end < 0) \
1777 end = 0; \
1778 } \
1779 if (start < 0) { \
1780 start += len; \
1781 if (start < 0) \
1782 start = 0; \
1783 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001784
1785Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001786bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001787{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001788 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001789 char byte;
1790 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001791 const char *sub;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001792 Py_ssize_t len, sub_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001793 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001794 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001795
Antoine Pitrouac65d962011-10-20 23:54:17 +02001796 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1797 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001798 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001799
Antoine Pitrouac65d962011-10-20 23:54:17 +02001800 if (subobj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001801 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001802 return -2;
1803
1804 sub = subbuf.buf;
1805 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001806 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001807 else {
1808 sub = &byte;
1809 sub_len = 1;
1810 }
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001811 len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001812
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001813 ADJUST_INDICES(start, end, len);
1814 if (end - start < sub_len)
1815 res = -1;
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001816 else if (sub_len == 1
1817#ifndef HAVE_MEMRCHR
1818 && dir > 0
1819#endif
1820 ) {
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001821 unsigned char needle = *sub;
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001822 int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001823 res = stringlib_fastsearch_memchr_1char(
1824 PyBytes_AS_STRING(self) + start, end - start,
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001825 needle, needle, mode);
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001826 if (res >= 0)
1827 res += start;
1828 }
1829 else {
1830 if (dir > 0)
1831 res = stringlib_find_slice(
1832 PyBytes_AS_STRING(self), len,
1833 sub, sub_len, start, end);
1834 else
1835 res = stringlib_rfind_slice(
1836 PyBytes_AS_STRING(self), len,
1837 sub, sub_len, start, end);
1838 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001839
1840 if (subobj)
1841 PyBuffer_Release(&subbuf);
1842
1843 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001844}
1845
1846
1847PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001848"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001849\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001850Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001851such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001852arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001853\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001854Return -1 on failure.");
1855
Neal Norwitz6968b052007-02-27 19:02:19 +00001856static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001857bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001858{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001859 Py_ssize_t result = bytes_find_internal(self, args, +1);
1860 if (result == -2)
1861 return NULL;
1862 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001863}
1864
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001865
1866PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001867"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001868\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001869Like B.find() but raise ValueError when the substring is not found.");
1870
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001871static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001872bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001873{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001874 Py_ssize_t result = bytes_find_internal(self, args, +1);
1875 if (result == -2)
1876 return NULL;
1877 if (result == -1) {
1878 PyErr_SetString(PyExc_ValueError,
1879 "substring not found");
1880 return NULL;
1881 }
1882 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001883}
1884
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001885
1886PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001887"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001888\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001889Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001890such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001891arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001892\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001893Return -1 on failure.");
1894
Neal Norwitz6968b052007-02-27 19:02:19 +00001895static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001896bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001897{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001898 Py_ssize_t result = bytes_find_internal(self, args, -1);
1899 if (result == -2)
1900 return NULL;
1901 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001902}
1903
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001904
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001905PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001906"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001907\n\
1908Like B.rfind() but raise ValueError when the substring is not found.");
1909
1910static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001911bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001912{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001913 Py_ssize_t result = bytes_find_internal(self, args, -1);
1914 if (result == -2)
1915 return NULL;
1916 if (result == -1) {
1917 PyErr_SetString(PyExc_ValueError,
1918 "substring not found");
1919 return NULL;
1920 }
1921 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001922}
1923
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001924
1925Py_LOCAL_INLINE(PyObject *)
1926do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001927{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001928 Py_buffer vsep;
1929 char *s = PyBytes_AS_STRING(self);
1930 Py_ssize_t len = PyBytes_GET_SIZE(self);
1931 char *sep;
1932 Py_ssize_t seplen;
1933 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001934
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001935 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001936 return NULL;
1937 sep = vsep.buf;
1938 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001939
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001940 i = 0;
1941 if (striptype != RIGHTSTRIP) {
1942 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1943 i++;
1944 }
1945 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001946
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001947 j = len;
1948 if (striptype != LEFTSTRIP) {
1949 do {
1950 j--;
1951 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1952 j++;
1953 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001954
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001955 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001956
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001957 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1958 Py_INCREF(self);
1959 return (PyObject*)self;
1960 }
1961 else
1962 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001963}
1964
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001965
1966Py_LOCAL_INLINE(PyObject *)
1967do_strip(PyBytesObject *self, int striptype)
1968{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001969 char *s = PyBytes_AS_STRING(self);
1970 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001971
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001972 i = 0;
1973 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001974 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001975 i++;
1976 }
1977 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001978
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001979 j = len;
1980 if (striptype != LEFTSTRIP) {
1981 do {
1982 j--;
David Malcolm96960882010-11-05 17:23:41 +00001983 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001984 j++;
1985 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001986
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001987 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1988 Py_INCREF(self);
1989 return (PyObject*)self;
1990 }
1991 else
1992 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001993}
1994
1995
1996Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001997do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001998{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001999 if (bytes != NULL && bytes != Py_None) {
2000 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002001 }
2002 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002003}
2004
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002005/*[clinic input]
2006bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002007
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002008 self: self(type="PyBytesObject *")
2009 bytes: object = None
2010 /
2011
2012Strip leading and trailing bytes contained in the argument.
2013
2014If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2015[clinic start generated code]*/
2016
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002017static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002018bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002019/*[clinic end generated code: output=c7c228d3bd104a1b input=37daa5fad1395d95]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002020{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002021 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002022}
2023
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002024/*[clinic input]
2025bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002026
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002027 self: self(type="PyBytesObject *")
2028 bytes: object = None
2029 /
2030
2031Strip leading bytes contained in the argument.
2032
2033If the argument is omitted or None, strip leading ASCII whitespace.
2034[clinic start generated code]*/
2035
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002036static PyObject *
2037bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002038/*[clinic end generated code: output=28602e586f524e82 input=88811b09dfbc2988]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002039{
2040 return do_argstrip(self, LEFTSTRIP, bytes);
2041}
2042
2043/*[clinic input]
2044bytes.rstrip
2045
2046 self: self(type="PyBytesObject *")
2047 bytes: object = None
2048 /
2049
2050Strip trailing bytes contained in the argument.
2051
2052If the argument is omitted or None, strip trailing ASCII whitespace.
2053[clinic start generated code]*/
2054
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002055static PyObject *
2056bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002057/*[clinic end generated code: output=547e3815c95447da input=8f93c9cd361f0140]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002058{
2059 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002060}
Neal Norwitz6968b052007-02-27 19:02:19 +00002061
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002062
2063PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002064"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002065\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002066Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002067string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002068as in slice notation.");
2069
2070static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002071bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002072{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002073 PyObject *sub_obj;
2074 const char *str = PyBytes_AS_STRING(self), *sub;
2075 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02002076 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002077 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002078
Antoine Pitrouac65d962011-10-20 23:54:17 +02002079 Py_buffer vsub;
2080 PyObject *count_obj;
2081
2082 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
2083 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002084 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002085
Antoine Pitrouac65d962011-10-20 23:54:17 +02002086 if (sub_obj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002087 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02002088 return NULL;
2089
2090 sub = vsub.buf;
2091 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002092 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02002093 else {
2094 sub = &byte;
2095 sub_len = 1;
2096 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002097
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002098 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002099
Antoine Pitrouac65d962011-10-20 23:54:17 +02002100 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002101 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2102 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02002103
2104 if (sub_obj)
2105 PyBuffer_Release(&vsub);
2106
2107 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002108}
2109
2110
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002111/*[clinic input]
2112bytes.translate
2113
2114 self: self(type="PyBytesObject *")
Victor Stinner049e5092014-08-17 22:20:00 +02002115 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002116 Translation table, which must be a bytes object of length 256.
2117 [
2118 deletechars: object
2119 ]
2120 /
2121
2122Return a copy with each character mapped by the given translation table.
2123
2124All characters occurring in the optional argument deletechars are removed.
2125The remaining characters are mapped through the given translation table.
2126[clinic start generated code]*/
2127
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002128static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002129bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1,
2130 PyObject *deletechars)
2131/*[clinic end generated code: output=233df850eb50bf8d input=d8fa5519d7cc4be7]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002132{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002133 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002134 Py_buffer table_view = {NULL, NULL};
2135 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002136 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002137 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002138 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002139 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002140 Py_ssize_t inlen, tablen, dellen = 0;
2141 PyObject *result;
2142 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002143
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002144 if (PyBytes_Check(table)) {
2145 table_chars = PyBytes_AS_STRING(table);
2146 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002147 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002148 else if (table == Py_None) {
2149 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002150 tablen = 256;
2151 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002152 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002153 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002154 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002155 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002156 tablen = table_view.len;
2157 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002158
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002159 if (tablen != 256) {
2160 PyErr_SetString(PyExc_ValueError,
2161 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002162 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002163 return NULL;
2164 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002165
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002166 if (deletechars != NULL) {
2167 if (PyBytes_Check(deletechars)) {
2168 del_table_chars = PyBytes_AS_STRING(deletechars);
2169 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002170 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002171 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002172 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002173 PyBuffer_Release(&table_view);
2174 return NULL;
2175 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002176 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002177 dellen = del_table_view.len;
2178 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002179 }
2180 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002181 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002182 dellen = 0;
2183 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002184
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002185 inlen = PyBytes_GET_SIZE(input_obj);
2186 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002187 if (result == NULL) {
2188 PyBuffer_Release(&del_table_view);
2189 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002190 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002191 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002192 output_start = output = PyBytes_AsString(result);
2193 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002194
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002195 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002196 /* If no deletions are required, use faster code */
2197 for (i = inlen; --i >= 0; ) {
2198 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002199 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002200 changed = 1;
2201 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002202 if (!changed && PyBytes_CheckExact(input_obj)) {
2203 Py_INCREF(input_obj);
2204 Py_DECREF(result);
2205 result = input_obj;
2206 }
2207 PyBuffer_Release(&del_table_view);
2208 PyBuffer_Release(&table_view);
2209 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002210 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002211
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002212 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002213 for (i = 0; i < 256; i++)
2214 trans_table[i] = Py_CHARMASK(i);
2215 } else {
2216 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002217 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002218 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002219 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002220
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002221 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002222 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002223 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002224
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002225 for (i = inlen; --i >= 0; ) {
2226 c = Py_CHARMASK(*input++);
2227 if (trans_table[c] != -1)
2228 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2229 continue;
2230 changed = 1;
2231 }
2232 if (!changed && PyBytes_CheckExact(input_obj)) {
2233 Py_DECREF(result);
2234 Py_INCREF(input_obj);
2235 return input_obj;
2236 }
2237 /* Fix the size of the resulting string */
2238 if (inlen > 0)
2239 _PyBytes_Resize(&result, output - output_start);
2240 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002241}
2242
2243
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002244/*[clinic input]
2245
2246@staticmethod
2247bytes.maketrans
2248
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002249 frm: Py_buffer
2250 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002251 /
2252
2253Return a translation table useable for the bytes or bytearray translate method.
2254
2255The returned table will be one where each byte in frm is mapped to the byte at
2256the same position in to.
2257
2258The bytes objects frm and to must be of the same length.
2259[clinic start generated code]*/
2260
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002261static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002262bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002263/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002264{
2265 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002266}
2267
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002268/* find and count characters and substrings */
2269
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002270#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002271 ((char *)memchr((const void *)(target), c, target_len))
2272
2273/* String ops must return a string. */
2274/* If the object is subclass of string, create a copy */
2275Py_LOCAL(PyBytesObject *)
2276return_self(PyBytesObject *self)
2277{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002278 if (PyBytes_CheckExact(self)) {
2279 Py_INCREF(self);
2280 return self;
2281 }
2282 return (PyBytesObject *)PyBytes_FromStringAndSize(
2283 PyBytes_AS_STRING(self),
2284 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002285}
2286
2287Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00002288countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002289{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002290 Py_ssize_t count=0;
2291 const char *start=target;
2292 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002293
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002294 while ( (start=findchar(start, end-start, c)) != NULL ) {
2295 count++;
2296 if (count >= maxcount)
2297 break;
2298 start += 1;
2299 }
2300 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002301}
2302
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002303
2304/* Algorithms for different cases of string replacement */
2305
2306/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2307Py_LOCAL(PyBytesObject *)
2308replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002309 const char *to_s, Py_ssize_t to_len,
2310 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002311{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002312 char *self_s, *result_s;
2313 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002314 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002315 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002316
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002317 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002318
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002319 /* 1 at the end plus 1 after every character;
2320 count = min(maxcount, self_len + 1) */
2321 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002322 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002323 else
2324 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2325 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002326
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002327 /* Check for overflow */
2328 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002329 assert(count > 0);
2330 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002331 PyErr_SetString(PyExc_OverflowError,
2332 "replacement bytes are too long");
2333 return NULL;
2334 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002335 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002336
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002337 if (! (result = (PyBytesObject *)
2338 PyBytes_FromStringAndSize(NULL, result_len)) )
2339 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002340
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002341 self_s = PyBytes_AS_STRING(self);
2342 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002343
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002344 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002345
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002346 /* Lay the first one down (guaranteed this will occur) */
2347 Py_MEMCPY(result_s, to_s, to_len);
2348 result_s += to_len;
2349 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002350
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002351 for (i=0; i<count; i++) {
2352 *result_s++ = *self_s++;
2353 Py_MEMCPY(result_s, to_s, to_len);
2354 result_s += to_len;
2355 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002356
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002357 /* Copy the rest of the original string */
2358 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002359
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002360 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002361}
2362
2363/* Special case for deleting a single character */
2364/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2365Py_LOCAL(PyBytesObject *)
2366replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002367 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002368{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002369 char *self_s, *result_s;
2370 char *start, *next, *end;
2371 Py_ssize_t self_len, result_len;
2372 Py_ssize_t count;
2373 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002374
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002375 self_len = PyBytes_GET_SIZE(self);
2376 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002377
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002378 count = countchar(self_s, self_len, from_c, maxcount);
2379 if (count == 0) {
2380 return return_self(self);
2381 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002382
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002383 result_len = self_len - count; /* from_len == 1 */
2384 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002385
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002386 if ( (result = (PyBytesObject *)
2387 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2388 return NULL;
2389 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002390
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002391 start = self_s;
2392 end = self_s + self_len;
2393 while (count-- > 0) {
2394 next = findchar(start, end-start, from_c);
2395 if (next == NULL)
2396 break;
2397 Py_MEMCPY(result_s, start, next-start);
2398 result_s += (next-start);
2399 start = next+1;
2400 }
2401 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002402
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002403 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002404}
2405
2406/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2407
2408Py_LOCAL(PyBytesObject *)
2409replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002410 const char *from_s, Py_ssize_t from_len,
2411 Py_ssize_t maxcount) {
2412 char *self_s, *result_s;
2413 char *start, *next, *end;
2414 Py_ssize_t self_len, result_len;
2415 Py_ssize_t count, offset;
2416 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002417
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002418 self_len = PyBytes_GET_SIZE(self);
2419 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002420
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002421 count = stringlib_count(self_s, self_len,
2422 from_s, from_len,
2423 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002424
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002425 if (count == 0) {
2426 /* no matches */
2427 return return_self(self);
2428 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002429
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002430 result_len = self_len - (count * from_len);
2431 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002432
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002433 if ( (result = (PyBytesObject *)
2434 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2435 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002436
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002437 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002438
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002439 start = self_s;
2440 end = self_s + self_len;
2441 while (count-- > 0) {
2442 offset = stringlib_find(start, end-start,
2443 from_s, from_len,
2444 0);
2445 if (offset == -1)
2446 break;
2447 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002448
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002449 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002450
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002451 result_s += (next-start);
2452 start = next+from_len;
2453 }
2454 Py_MEMCPY(result_s, start, end-start);
2455 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002456}
2457
2458/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2459Py_LOCAL(PyBytesObject *)
2460replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002461 char from_c, char to_c,
2462 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002463{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002464 char *self_s, *result_s, *start, *end, *next;
2465 Py_ssize_t self_len;
2466 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002467
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002468 /* The result string will be the same size */
2469 self_s = PyBytes_AS_STRING(self);
2470 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002471
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002472 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002473
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002474 if (next == NULL) {
2475 /* No matches; return the original string */
2476 return return_self(self);
2477 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002478
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002479 /* Need to make a new string */
2480 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2481 if (result == NULL)
2482 return NULL;
2483 result_s = PyBytes_AS_STRING(result);
2484 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002485
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002486 /* change everything in-place, starting with this one */
2487 start = result_s + (next-self_s);
2488 *start = to_c;
2489 start++;
2490 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002491
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002492 while (--maxcount > 0) {
2493 next = findchar(start, end-start, from_c);
2494 if (next == NULL)
2495 break;
2496 *next = to_c;
2497 start = next+1;
2498 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002499
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002500 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002501}
2502
2503/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2504Py_LOCAL(PyBytesObject *)
2505replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002506 const char *from_s, Py_ssize_t from_len,
2507 const char *to_s, Py_ssize_t to_len,
2508 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002509{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002510 char *result_s, *start, *end;
2511 char *self_s;
2512 Py_ssize_t self_len, offset;
2513 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002514
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002515 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002516
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002517 self_s = PyBytes_AS_STRING(self);
2518 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002519
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002520 offset = stringlib_find(self_s, self_len,
2521 from_s, from_len,
2522 0);
2523 if (offset == -1) {
2524 /* No matches; return the original string */
2525 return return_self(self);
2526 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002527
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002528 /* Need to make a new string */
2529 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2530 if (result == NULL)
2531 return NULL;
2532 result_s = PyBytes_AS_STRING(result);
2533 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002534
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002535 /* change everything in-place, starting with this one */
2536 start = result_s + offset;
2537 Py_MEMCPY(start, to_s, from_len);
2538 start += from_len;
2539 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002540
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002541 while ( --maxcount > 0) {
2542 offset = stringlib_find(start, end-start,
2543 from_s, from_len,
2544 0);
2545 if (offset==-1)
2546 break;
2547 Py_MEMCPY(start+offset, to_s, from_len);
2548 start += offset+from_len;
2549 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002550
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002551 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002552}
2553
2554/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2555Py_LOCAL(PyBytesObject *)
2556replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002557 char from_c,
2558 const char *to_s, Py_ssize_t to_len,
2559 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002560{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002561 char *self_s, *result_s;
2562 char *start, *next, *end;
2563 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002564 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002565 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002566
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002567 self_s = PyBytes_AS_STRING(self);
2568 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002569
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002570 count = countchar(self_s, self_len, from_c, maxcount);
2571 if (count == 0) {
2572 /* no matches, return unchanged */
2573 return return_self(self);
2574 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002575
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002576 /* use the difference between current and new, hence the "-1" */
2577 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002578 assert(count > 0);
2579 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002580 PyErr_SetString(PyExc_OverflowError,
2581 "replacement bytes are too long");
2582 return NULL;
2583 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002584 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002585
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002586 if ( (result = (PyBytesObject *)
2587 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2588 return NULL;
2589 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002590
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002591 start = self_s;
2592 end = self_s + self_len;
2593 while (count-- > 0) {
2594 next = findchar(start, end-start, from_c);
2595 if (next == NULL)
2596 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002597
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002598 if (next == start) {
2599 /* replace with the 'to' */
2600 Py_MEMCPY(result_s, to_s, to_len);
2601 result_s += to_len;
2602 start += 1;
2603 } else {
2604 /* copy the unchanged old then the 'to' */
2605 Py_MEMCPY(result_s, start, next-start);
2606 result_s += (next-start);
2607 Py_MEMCPY(result_s, to_s, to_len);
2608 result_s += to_len;
2609 start = next+1;
2610 }
2611 }
2612 /* Copy the remainder of the remaining string */
2613 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002614
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002615 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002616}
2617
2618/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2619Py_LOCAL(PyBytesObject *)
2620replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002621 const char *from_s, Py_ssize_t from_len,
2622 const char *to_s, Py_ssize_t to_len,
2623 Py_ssize_t maxcount) {
2624 char *self_s, *result_s;
2625 char *start, *next, *end;
2626 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002627 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002628 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002629
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002630 self_s = PyBytes_AS_STRING(self);
2631 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002632
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002633 count = stringlib_count(self_s, self_len,
2634 from_s, from_len,
2635 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002636
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002637 if (count == 0) {
2638 /* no matches, return unchanged */
2639 return return_self(self);
2640 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002641
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002642 /* Check for overflow */
2643 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002644 assert(count > 0);
2645 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002646 PyErr_SetString(PyExc_OverflowError,
2647 "replacement bytes are too long");
2648 return NULL;
2649 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002650 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002651
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002652 if ( (result = (PyBytesObject *)
2653 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2654 return NULL;
2655 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002656
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002657 start = self_s;
2658 end = self_s + self_len;
2659 while (count-- > 0) {
2660 offset = stringlib_find(start, end-start,
2661 from_s, from_len,
2662 0);
2663 if (offset == -1)
2664 break;
2665 next = start+offset;
2666 if (next == start) {
2667 /* replace with the 'to' */
2668 Py_MEMCPY(result_s, to_s, to_len);
2669 result_s += to_len;
2670 start += from_len;
2671 } else {
2672 /* copy the unchanged old then the 'to' */
2673 Py_MEMCPY(result_s, start, next-start);
2674 result_s += (next-start);
2675 Py_MEMCPY(result_s, to_s, to_len);
2676 result_s += to_len;
2677 start = next+from_len;
2678 }
2679 }
2680 /* Copy the remainder of the remaining string */
2681 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002682
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002683 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002684}
2685
2686
2687Py_LOCAL(PyBytesObject *)
2688replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002689 const char *from_s, Py_ssize_t from_len,
2690 const char *to_s, Py_ssize_t to_len,
2691 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002692{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002693 if (maxcount < 0) {
2694 maxcount = PY_SSIZE_T_MAX;
2695 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2696 /* nothing to do; return the original string */
2697 return return_self(self);
2698 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002699
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002700 if (maxcount == 0 ||
2701 (from_len == 0 && to_len == 0)) {
2702 /* nothing to do; return the original string */
2703 return return_self(self);
2704 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002705
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002706 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002707
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002708 if (from_len == 0) {
2709 /* insert the 'to' string everywhere. */
2710 /* >>> "Python".replace("", ".") */
2711 /* '.P.y.t.h.o.n.' */
2712 return replace_interleave(self, to_s, to_len, maxcount);
2713 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002714
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002715 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2716 /* point for an empty self string to generate a non-empty string */
2717 /* Special case so the remaining code always gets a non-empty string */
2718 if (PyBytes_GET_SIZE(self) == 0) {
2719 return return_self(self);
2720 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002721
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002722 if (to_len == 0) {
2723 /* delete all occurrences of 'from' string */
2724 if (from_len == 1) {
2725 return replace_delete_single_character(
2726 self, from_s[0], maxcount);
2727 } else {
2728 return replace_delete_substring(self, from_s,
2729 from_len, maxcount);
2730 }
2731 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002732
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002733 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002734
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002735 if (from_len == to_len) {
2736 if (from_len == 1) {
2737 return replace_single_character_in_place(
2738 self,
2739 from_s[0],
2740 to_s[0],
2741 maxcount);
2742 } else {
2743 return replace_substring_in_place(
2744 self, from_s, from_len, to_s, to_len,
2745 maxcount);
2746 }
2747 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002748
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002749 /* Otherwise use the more generic algorithms */
2750 if (from_len == 1) {
2751 return replace_single_character(self, from_s[0],
2752 to_s, to_len, maxcount);
2753 } else {
2754 /* len('from')>=2, len('to')>=1 */
2755 return replace_substring(self, from_s, from_len, to_s, to_len,
2756 maxcount);
2757 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002758}
2759
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002760
2761/*[clinic input]
2762bytes.replace
2763
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002764 old: Py_buffer
2765 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002766 count: Py_ssize_t = -1
2767 Maximum number of occurrences to replace.
2768 -1 (the default value) means replace all occurrences.
2769 /
2770
2771Return a copy with all occurrences of substring old replaced by new.
2772
2773If the optional argument count is given, only the first count occurrences are
2774replaced.
2775[clinic start generated code]*/
2776
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002777static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002778bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new,
2779 Py_ssize_t count)
2780/*[clinic end generated code: output=403dc9d7a83c5a1d input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002781{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002782 return (PyObject *)replace((PyBytesObject *) self,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002783 (const char *)old->buf, old->len,
2784 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002785}
2786
2787/** End DALKE **/
2788
2789/* Matches the end (direction >= 0) or start (direction < 0) of self
2790 * against substr, using the start and end arguments. Returns
2791 * -1 on error, 0 if not found and 1 if found.
2792 */
2793Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002794_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002795 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002796{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002797 Py_ssize_t len = PyBytes_GET_SIZE(self);
2798 Py_ssize_t slen;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002799 Py_buffer sub_view = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002800 const char* sub;
2801 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002802
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002803 if (PyBytes_Check(substr)) {
2804 sub = PyBytes_AS_STRING(substr);
2805 slen = PyBytes_GET_SIZE(substr);
2806 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002807 else {
2808 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
2809 return -1;
2810 sub = sub_view.buf;
2811 slen = sub_view.len;
2812 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002813 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002814
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002815 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002816
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002817 if (direction < 0) {
2818 /* startswith */
2819 if (start+slen > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002820 goto notfound;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002821 } else {
2822 /* endswith */
2823 if (end-start < slen || start > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002824 goto notfound;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002825
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002826 if (end-slen > start)
2827 start = end - slen;
2828 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002829 if (end-start < slen)
2830 goto notfound;
2831 if (memcmp(str+start, sub, slen) != 0)
2832 goto notfound;
2833
2834 PyBuffer_Release(&sub_view);
2835 return 1;
2836
2837notfound:
2838 PyBuffer_Release(&sub_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002839 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002840}
2841
2842
2843PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002844"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002845\n\
2846Return True if B starts with the specified prefix, False otherwise.\n\
2847With optional start, test B beginning at that position.\n\
2848With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002849prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002850
2851static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002852bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002853{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002854 Py_ssize_t start = 0;
2855 Py_ssize_t end = PY_SSIZE_T_MAX;
2856 PyObject *subobj;
2857 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002858
Jesus Ceaac451502011-04-20 17:09:23 +02002859 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002860 return NULL;
2861 if (PyTuple_Check(subobj)) {
2862 Py_ssize_t i;
2863 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2864 result = _bytes_tailmatch(self,
2865 PyTuple_GET_ITEM(subobj, i),
2866 start, end, -1);
2867 if (result == -1)
2868 return NULL;
2869 else if (result) {
2870 Py_RETURN_TRUE;
2871 }
2872 }
2873 Py_RETURN_FALSE;
2874 }
2875 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002876 if (result == -1) {
2877 if (PyErr_ExceptionMatches(PyExc_TypeError))
2878 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2879 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002880 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002881 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002882 else
2883 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002884}
2885
2886
2887PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002888"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002889\n\
2890Return True if B ends with the specified suffix, False otherwise.\n\
2891With optional start, test B beginning at that position.\n\
2892With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002893suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002894
2895static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002896bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002897{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002898 Py_ssize_t start = 0;
2899 Py_ssize_t end = PY_SSIZE_T_MAX;
2900 PyObject *subobj;
2901 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002902
Jesus Ceaac451502011-04-20 17:09:23 +02002903 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002904 return NULL;
2905 if (PyTuple_Check(subobj)) {
2906 Py_ssize_t i;
2907 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2908 result = _bytes_tailmatch(self,
2909 PyTuple_GET_ITEM(subobj, i),
2910 start, end, +1);
2911 if (result == -1)
2912 return NULL;
2913 else if (result) {
2914 Py_RETURN_TRUE;
2915 }
2916 }
2917 Py_RETURN_FALSE;
2918 }
2919 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002920 if (result == -1) {
2921 if (PyErr_ExceptionMatches(PyExc_TypeError))
2922 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2923 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002924 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002925 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002926 else
2927 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002928}
2929
2930
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002931/*[clinic input]
2932bytes.decode
2933
2934 encoding: str(c_default="NULL") = 'utf-8'
2935 The encoding with which to decode the bytes.
2936 errors: str(c_default="NULL") = 'strict'
2937 The error handling scheme to use for the handling of decoding errors.
2938 The default is 'strict' meaning that decoding errors raise a
2939 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2940 as well as any other name registered with codecs.register_error that
2941 can handle UnicodeDecodeErrors.
2942
2943Decode the bytes using the codec registered for encoding.
2944[clinic start generated code]*/
2945
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002946static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002947bytes_decode_impl(PyBytesObject*self, const char *encoding,
2948 const char *errors)
2949/*[clinic end generated code: output=2d2016ff8e0bb176 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002950{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002951 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002952}
2953
Guido van Rossum20188312006-05-05 15:15:40 +00002954
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002955/*[clinic input]
2956bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002957
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03002958 keepends: int(c_default="0") = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002959
2960Return a list of the lines in the bytes, breaking at line boundaries.
2961
2962Line breaks are not included in the resulting list unless keepends is given and
2963true.
2964[clinic start generated code]*/
2965
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002966static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002967bytes_splitlines_impl(PyBytesObject*self, int keepends)
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03002968/*[clinic end generated code: output=995c3598f7833cad input=7f4aac67144f9944]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002969{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002970 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002971 (PyObject*) self, PyBytes_AS_STRING(self),
2972 PyBytes_GET_SIZE(self), keepends
2973 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002974}
2975
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002976static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002977hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002978{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002979 if (c >= 128)
2980 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002981 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002982 return c - '0';
2983 else {
David Malcolm96960882010-11-05 17:23:41 +00002984 if (Py_ISUPPER(c))
2985 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002986 if (c >= 'a' && c <= 'f')
2987 return c - 'a' + 10;
2988 }
2989 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002990}
2991
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002992/*[clinic input]
2993@classmethod
2994bytes.fromhex
2995
2996 string: unicode
2997 /
2998
2999Create a bytes object from a string of hexadecimal numbers.
3000
3001Spaces between two numbers are accepted.
3002Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
3003[clinic start generated code]*/
3004
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003005static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003006bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03003007/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003008{
3009 PyObject *newstring;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003010 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003011 Py_ssize_t hexlen, byteslen, i, j;
3012 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003013 void *data;
3014 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003015
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003016 assert(PyUnicode_Check(string));
3017 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003018 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003019 kind = PyUnicode_KIND(string);
3020 data = PyUnicode_DATA(string);
3021 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003022
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003023 byteslen = hexlen/2; /* This overestimates if there are spaces */
3024 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
3025 if (!newstring)
3026 return NULL;
3027 buf = PyBytes_AS_STRING(newstring);
3028 for (i = j = 0; i < hexlen; i += 2) {
3029 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003030 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003031 i++;
3032 if (i >= hexlen)
3033 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003034 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
3035 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003036 if (top == -1 || bot == -1) {
3037 PyErr_Format(PyExc_ValueError,
3038 "non-hexadecimal number found in "
3039 "fromhex() arg at position %zd", i);
3040 goto error;
3041 }
3042 buf[j++] = (top << 4) + bot;
3043 }
3044 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
3045 goto error;
3046 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003047
3048 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003049 Py_XDECREF(newstring);
3050 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003051}
3052
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003053PyDoc_STRVAR(hex__doc__,
3054"B.hex() -> string\n\
3055\n\
3056Create a string of hexadecimal numbers from a bytes object.\n\
3057Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
3058
3059static PyObject *
3060bytes_hex(PyBytesObject *self)
3061{
3062 char* argbuf = PyBytes_AS_STRING(self);
3063 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
3064 return _Py_strhex(argbuf, arglen);
3065}
3066
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003067static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003068bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003069{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003070 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003071}
3072
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003073
3074static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003075bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003076 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
3077 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3078 _Py_capitalize__doc__},
3079 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3080 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003081 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003082 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
3083 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02003084 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003085 expandtabs__doc__},
3086 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003087 BYTES_FROMHEX_METHODDEF
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003088 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003089 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3090 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3091 _Py_isalnum__doc__},
3092 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3093 _Py_isalpha__doc__},
3094 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3095 _Py_isdigit__doc__},
3096 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3097 _Py_islower__doc__},
3098 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3099 _Py_isspace__doc__},
3100 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3101 _Py_istitle__doc__},
3102 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3103 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003104 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003105 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3106 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003107 BYTES_LSTRIP_METHODDEF
3108 BYTES_MAKETRANS_METHODDEF
3109 BYTES_PARTITION_METHODDEF
3110 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003111 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3112 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3113 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003114 BYTES_RPARTITION_METHODDEF
3115 BYTES_RSPLIT_METHODDEF
3116 BYTES_RSTRIP_METHODDEF
3117 BYTES_SPLIT_METHODDEF
3118 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003119 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
3120 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003121 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003122 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3123 _Py_swapcase__doc__},
3124 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003125 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003126 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3127 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003128 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003129};
3130
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003131static PyObject *
Ethan Furmanb95b5612015-01-23 20:05:18 -08003132bytes_mod(PyObject *v, PyObject *w)
3133{
3134 if (!PyBytes_Check(v))
3135 Py_RETURN_NOTIMPLEMENTED;
3136 return _PyBytes_Format(v, w);
3137}
3138
3139static PyNumberMethods bytes_as_number = {
3140 0, /*nb_add*/
3141 0, /*nb_subtract*/
3142 0, /*nb_multiply*/
3143 bytes_mod, /*nb_remainder*/
3144};
3145
3146static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02003147bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003148
3149static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003150bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003151{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003152 PyObject *x = NULL;
3153 const char *encoding = NULL;
3154 const char *errors = NULL;
3155 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003156 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003157 Py_ssize_t size;
3158 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003159 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003160
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003161 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02003162 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003163 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3164 &encoding, &errors))
3165 return NULL;
3166 if (x == NULL) {
3167 if (encoding != NULL || errors != NULL) {
3168 PyErr_SetString(PyExc_TypeError,
3169 "encoding or errors without sequence "
3170 "argument");
3171 return NULL;
3172 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003173 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003174 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003175
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003176 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003177 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003178 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003179 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003180 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003181 return NULL;
3182 }
3183 new = PyUnicode_AsEncodedString(x, encoding, errors);
3184 if (new == NULL)
3185 return NULL;
3186 assert(PyBytes_Check(new));
3187 return new;
3188 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003189
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003190 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003191 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003192 PyUnicode_Check(x) ?
3193 "string argument without an encoding" :
3194 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003195 return NULL;
3196 }
3197
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003198 /* We'd like to call PyObject_Bytes here, but we need to check for an
3199 integer argument before deferring to PyBytes_FromObject, something
3200 PyObject_Bytes doesn't do. */
3201 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3202 if (func != NULL) {
3203 new = PyObject_CallFunctionObjArgs(func, NULL);
3204 Py_DECREF(func);
3205 if (new == NULL)
3206 return NULL;
3207 if (!PyBytes_Check(new)) {
3208 PyErr_Format(PyExc_TypeError,
3209 "__bytes__ returned non-bytes (type %.200s)",
3210 Py_TYPE(new)->tp_name);
3211 Py_DECREF(new);
3212 return NULL;
3213 }
3214 return new;
3215 }
3216 else if (PyErr_Occurred())
3217 return NULL;
3218
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003219 if (PyUnicode_Check(x)) {
3220 PyErr_SetString(PyExc_TypeError,
3221 "string argument without an encoding");
3222 return NULL;
3223 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003224 /* Is it an integer? */
3225 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3226 if (size == -1 && PyErr_Occurred()) {
3227 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3228 return NULL;
3229 PyErr_Clear();
3230 }
3231 else if (size < 0) {
3232 PyErr_SetString(PyExc_ValueError, "negative count");
3233 return NULL;
3234 }
3235 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003236 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003237 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003238 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003239 return new;
3240 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003241
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003242 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003243}
3244
3245PyObject *
3246PyBytes_FromObject(PyObject *x)
3247{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003248 PyObject *new, *it;
3249 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003250
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003251 if (x == NULL) {
3252 PyErr_BadInternalCall();
3253 return NULL;
3254 }
Larry Hastingsca28e992012-05-24 22:58:30 -07003255
3256 if (PyBytes_CheckExact(x)) {
3257 Py_INCREF(x);
3258 return x;
3259 }
3260
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003261 /* Use the modern buffer interface */
3262 if (PyObject_CheckBuffer(x)) {
3263 Py_buffer view;
3264 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3265 return NULL;
3266 new = PyBytes_FromStringAndSize(NULL, view.len);
3267 if (!new)
3268 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003269 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3270 &view, view.len, 'C') < 0)
3271 goto fail;
3272 PyBuffer_Release(&view);
3273 return new;
3274 fail:
3275 Py_XDECREF(new);
3276 PyBuffer_Release(&view);
3277 return NULL;
3278 }
3279 if (PyUnicode_Check(x)) {
3280 PyErr_SetString(PyExc_TypeError,
3281 "cannot convert unicode object to bytes");
3282 return NULL;
3283 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003284
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003285 if (PyList_CheckExact(x)) {
3286 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3287 if (new == NULL)
3288 return NULL;
3289 for (i = 0; i < Py_SIZE(x); i++) {
3290 Py_ssize_t value = PyNumber_AsSsize_t(
3291 PyList_GET_ITEM(x, i), PyExc_ValueError);
3292 if (value == -1 && PyErr_Occurred()) {
3293 Py_DECREF(new);
3294 return NULL;
3295 }
3296 if (value < 0 || value >= 256) {
3297 PyErr_SetString(PyExc_ValueError,
3298 "bytes must be in range(0, 256)");
3299 Py_DECREF(new);
3300 return NULL;
3301 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003302 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003303 }
3304 return new;
3305 }
3306 if (PyTuple_CheckExact(x)) {
3307 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3308 if (new == NULL)
3309 return NULL;
3310 for (i = 0; i < Py_SIZE(x); i++) {
3311 Py_ssize_t value = PyNumber_AsSsize_t(
3312 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
3313 if (value == -1 && PyErr_Occurred()) {
3314 Py_DECREF(new);
3315 return NULL;
3316 }
3317 if (value < 0 || value >= 256) {
3318 PyErr_SetString(PyExc_ValueError,
3319 "bytes must be in range(0, 256)");
3320 Py_DECREF(new);
3321 return NULL;
3322 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003323 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003324 }
3325 return new;
3326 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00003327
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003328 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003329 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003330 if (size == -1 && PyErr_Occurred())
3331 return NULL;
3332 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
3333 returning a shared empty bytes string. This required because we
3334 want to call _PyBytes_Resize() the returned object, which we can
3335 only do on bytes objects with refcount == 1. */
Victor Stinner88d146b2014-08-17 21:12:18 +02003336 if (size == 0)
3337 size = 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003338 new = PyBytes_FromStringAndSize(NULL, size);
3339 if (new == NULL)
3340 return NULL;
Victor Stinner88d146b2014-08-17 21:12:18 +02003341 assert(Py_REFCNT(new) == 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003342
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003343 /* Get the iterator */
3344 it = PyObject_GetIter(x);
3345 if (it == NULL)
3346 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003347
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003348 /* Run the iterator to exhaustion */
3349 for (i = 0; ; i++) {
3350 PyObject *item;
3351 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003352
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003353 /* Get the next item */
3354 item = PyIter_Next(it);
3355 if (item == NULL) {
3356 if (PyErr_Occurred())
3357 goto error;
3358 break;
3359 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003360
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003361 /* Interpret it as an int (__index__) */
3362 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3363 Py_DECREF(item);
3364 if (value == -1 && PyErr_Occurred())
3365 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003366
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003367 /* Range check */
3368 if (value < 0 || value >= 256) {
3369 PyErr_SetString(PyExc_ValueError,
3370 "bytes must be in range(0, 256)");
3371 goto error;
3372 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003373
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003374 /* Append the byte */
3375 if (i >= size) {
3376 size = 2 * size + 1;
3377 if (_PyBytes_Resize(&new, size) < 0)
3378 goto error;
3379 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003380 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003381 }
3382 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003383
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003384 /* Clean up and return success */
3385 Py_DECREF(it);
3386 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003387
3388 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003389 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01003390 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003391 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003392}
3393
3394static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02003395bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003396{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003397 PyObject *tmp, *pnew;
3398 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003399
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003400 assert(PyType_IsSubtype(type, &PyBytes_Type));
3401 tmp = bytes_new(&PyBytes_Type, args, kwds);
3402 if (tmp == NULL)
3403 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02003404 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003405 n = PyBytes_GET_SIZE(tmp);
3406 pnew = type->tp_alloc(type, n);
3407 if (pnew != NULL) {
3408 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3409 PyBytes_AS_STRING(tmp), n+1);
3410 ((PyBytesObject *)pnew)->ob_shash =
3411 ((PyBytesObject *)tmp)->ob_shash;
3412 }
3413 Py_DECREF(tmp);
3414 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003415}
3416
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003417PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003418"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003419bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003420bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003421bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3422bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003423\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003424Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003425 - an iterable yielding integers in range(256)\n\
3426 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003427 - any object implementing the buffer API.\n\
3428 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003429
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003430static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003431
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003432PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003433 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3434 "bytes",
3435 PyBytesObject_SIZE,
3436 sizeof(char),
3437 bytes_dealloc, /* tp_dealloc */
3438 0, /* tp_print */
3439 0, /* tp_getattr */
3440 0, /* tp_setattr */
3441 0, /* tp_reserved */
3442 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08003443 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003444 &bytes_as_sequence, /* tp_as_sequence */
3445 &bytes_as_mapping, /* tp_as_mapping */
3446 (hashfunc)bytes_hash, /* tp_hash */
3447 0, /* tp_call */
3448 bytes_str, /* tp_str */
3449 PyObject_GenericGetAttr, /* tp_getattro */
3450 0, /* tp_setattro */
3451 &bytes_as_buffer, /* tp_as_buffer */
3452 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3453 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3454 bytes_doc, /* tp_doc */
3455 0, /* tp_traverse */
3456 0, /* tp_clear */
3457 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3458 0, /* tp_weaklistoffset */
3459 bytes_iter, /* tp_iter */
3460 0, /* tp_iternext */
3461 bytes_methods, /* tp_methods */
3462 0, /* tp_members */
3463 0, /* tp_getset */
3464 &PyBaseObject_Type, /* tp_base */
3465 0, /* tp_dict */
3466 0, /* tp_descr_get */
3467 0, /* tp_descr_set */
3468 0, /* tp_dictoffset */
3469 0, /* tp_init */
3470 0, /* tp_alloc */
3471 bytes_new, /* tp_new */
3472 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003473};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003474
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003475void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003476PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003477{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003478 assert(pv != NULL);
3479 if (*pv == NULL)
3480 return;
3481 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003482 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003483 return;
3484 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003485
3486 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3487 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003488 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003489 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02003490
Antoine Pitrou161d6952014-05-01 14:36:20 +02003491 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003492 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02003493 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3494 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3495 Py_CLEAR(*pv);
3496 return;
3497 }
3498
3499 oldsize = PyBytes_GET_SIZE(*pv);
3500 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3501 PyErr_NoMemory();
3502 goto error;
3503 }
3504 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3505 goto error;
3506
3507 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3508 PyBuffer_Release(&wb);
3509 return;
3510
3511 error:
3512 PyBuffer_Release(&wb);
3513 Py_CLEAR(*pv);
3514 return;
3515 }
3516
3517 else {
3518 /* Multiple references, need to create new object */
3519 PyObject *v;
3520 v = bytes_concat(*pv, w);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03003521 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02003522 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003523}
3524
3525void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003526PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003527{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003528 PyBytes_Concat(pv, w);
3529 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003530}
3531
3532
Ethan Furmanb95b5612015-01-23 20:05:18 -08003533/* The following function breaks the notion that bytes are immutable:
3534 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003535 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08003536 as creating a new bytes object and destroying the old one, only
3537 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003538 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08003539 Note that if there's not enough memory to resize the bytes object, the
3540 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003541 memory" exception is set, and -1 is returned. Else (on success) 0 is
3542 returned, and the value in *pv may or may not be the same as on input.
3543 As always, an extra byte is allocated for a trailing \0 byte (newsize
3544 does *not* include that), and a trailing \0 byte is stored.
3545*/
3546
3547int
3548_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3549{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003550 PyObject *v;
3551 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003552 v = *pv;
3553 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3554 *pv = 0;
3555 Py_DECREF(v);
3556 PyErr_BadInternalCall();
3557 return -1;
3558 }
3559 /* XXX UNREF/NEWREF interface should be more symmetrical */
3560 _Py_DEC_REFTOTAL;
3561 _Py_ForgetReference(v);
3562 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003563 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003564 if (*pv == NULL) {
3565 PyObject_Del(v);
3566 PyErr_NoMemory();
3567 return -1;
3568 }
3569 _Py_NewReference(*pv);
3570 sv = (PyBytesObject *) *pv;
3571 Py_SIZE(sv) = newsize;
3572 sv->ob_sval[newsize] = '\0';
3573 sv->ob_shash = -1; /* invalidate cached hash value */
3574 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003575}
3576
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003577void
3578PyBytes_Fini(void)
3579{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003580 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003581 for (i = 0; i < UCHAR_MAX + 1; i++)
3582 Py_CLEAR(characters[i]);
3583 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003584}
3585
Benjamin Peterson4116f362008-05-27 00:36:20 +00003586/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003587
3588typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003589 PyObject_HEAD
3590 Py_ssize_t it_index;
3591 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003592} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003593
3594static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003595striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003596{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003597 _PyObject_GC_UNTRACK(it);
3598 Py_XDECREF(it->it_seq);
3599 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003600}
3601
3602static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003603striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003604{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003605 Py_VISIT(it->it_seq);
3606 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003607}
3608
3609static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003610striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003611{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003612 PyBytesObject *seq;
3613 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003614
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003615 assert(it != NULL);
3616 seq = it->it_seq;
3617 if (seq == NULL)
3618 return NULL;
3619 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003620
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003621 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3622 item = PyLong_FromLong(
3623 (unsigned char)seq->ob_sval[it->it_index]);
3624 if (item != NULL)
3625 ++it->it_index;
3626 return item;
3627 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003628
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003629 it->it_seq = NULL;
Serhiy Storchakafbb1c5e2016-03-30 20:40:02 +03003630 Py_DECREF(seq);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003631 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003632}
3633
3634static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003635striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003636{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003637 Py_ssize_t len = 0;
3638 if (it->it_seq)
3639 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3640 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003641}
3642
3643PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003644 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003645
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003646static PyObject *
3647striter_reduce(striterobject *it)
3648{
3649 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003650 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003651 it->it_seq, it->it_index);
3652 } else {
3653 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3654 if (u == NULL)
3655 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02003656 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003657 }
3658}
3659
3660PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3661
3662static PyObject *
3663striter_setstate(striterobject *it, PyObject *state)
3664{
3665 Py_ssize_t index = PyLong_AsSsize_t(state);
3666 if (index == -1 && PyErr_Occurred())
3667 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003668 if (it->it_seq != NULL) {
3669 if (index < 0)
3670 index = 0;
3671 else if (index > PyBytes_GET_SIZE(it->it_seq))
3672 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3673 it->it_index = index;
3674 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003675 Py_RETURN_NONE;
3676}
3677
3678PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3679
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003680static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003681 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3682 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003683 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3684 reduce_doc},
3685 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3686 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003687 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003688};
3689
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003690PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003691 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3692 "bytes_iterator", /* tp_name */
3693 sizeof(striterobject), /* tp_basicsize */
3694 0, /* tp_itemsize */
3695 /* methods */
3696 (destructor)striter_dealloc, /* tp_dealloc */
3697 0, /* tp_print */
3698 0, /* tp_getattr */
3699 0, /* tp_setattr */
3700 0, /* tp_reserved */
3701 0, /* tp_repr */
3702 0, /* tp_as_number */
3703 0, /* tp_as_sequence */
3704 0, /* tp_as_mapping */
3705 0, /* tp_hash */
3706 0, /* tp_call */
3707 0, /* tp_str */
3708 PyObject_GenericGetAttr, /* tp_getattro */
3709 0, /* tp_setattro */
3710 0, /* tp_as_buffer */
3711 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3712 0, /* tp_doc */
3713 (traverseproc)striter_traverse, /* tp_traverse */
3714 0, /* tp_clear */
3715 0, /* tp_richcompare */
3716 0, /* tp_weaklistoffset */
3717 PyObject_SelfIter, /* tp_iter */
3718 (iternextfunc)striter_next, /* tp_iternext */
3719 striter_methods, /* tp_methods */
3720 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003721};
3722
3723static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003724bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003725{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003726 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003727
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003728 if (!PyBytes_Check(seq)) {
3729 PyErr_BadInternalCall();
3730 return NULL;
3731 }
3732 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3733 if (it == NULL)
3734 return NULL;
3735 it->it_index = 0;
3736 Py_INCREF(seq);
3737 it->it_seq = (PyBytesObject *)seq;
3738 _PyObject_GC_TRACK(it);
3739 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003740}