blob: 258db63d6b0997ef2e77af671cfe26a96927fc5c [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020010/*[clinic input]
Martin v. Löwis0efea322014-07-27 17:29:17 +020011class bytes "PyBytesObject*" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020012[clinic start generated code]*/
Martin v. Löwis0efea322014-07-27 17:29:17 +020013/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020014
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030015#include "clinic/bytesobject.c.h"
16
Christian Heimes2c9c7a52008-05-26 13:42:13 +000017#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000018Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000019#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000020
Christian Heimes2c9c7a52008-05-26 13:42:13 +000021static PyBytesObject *characters[UCHAR_MAX + 1];
22static PyBytesObject *nullstring;
23
Mark Dickinsonfd24b322008-12-06 15:33:31 +000024/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
25 for a string of length n should request PyBytesObject_SIZE + n bytes.
26
27 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
28 3 bytes per string allocation on a typical system.
29*/
30#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
31
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000033 For PyBytes_FromString(), the parameter `str' points to a null-terminated
34 string containing exactly `size' bytes.
35
36 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
37 either NULL or else points to a string containing at least `size' bytes.
38 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
39 not have to be null-terminated. (Therefore it is safe to construct a
40 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
41 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
42 bytes (setting the last byte to the null terminating character) and you can
43 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000044 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000045 alter the data yourself, since the strings may be shared.
46
47 The PyObject member `op->ob_size', which denotes the number of "extra
48 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020049 allocated for string data, not counting the null terminating character.
50 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000051 PyBytes_FromStringAndSize()) or the length of the string in the `str'
52 parameter (for PyBytes_FromString()).
53*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020054static PyObject *
55_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000056{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020057 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020058 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020059
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000060 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000061#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000062 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000063#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000064 Py_INCREF(op);
65 return (PyObject *)op;
66 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000067
Victor Stinner049e5092014-08-17 22:20:00 +020068 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 PyErr_SetString(PyExc_OverflowError,
70 "byte string is too large");
71 return NULL;
72 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000073
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020075 if (use_calloc)
76 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
77 else
78 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 if (op == NULL)
80 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010081 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020083 if (!use_calloc)
84 op->ob_sval[size] = '\0';
85 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000086 if (size == 0) {
87 nullstring = op;
88 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020089 }
90 return (PyObject *) op;
91}
92
93PyObject *
94PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
95{
96 PyBytesObject *op;
97 if (size < 0) {
98 PyErr_SetString(PyExc_SystemError,
99 "Negative size passed to PyBytes_FromStringAndSize");
100 return NULL;
101 }
102 if (size == 1 && str != NULL &&
103 (op = characters[*str & UCHAR_MAX]) != NULL)
104 {
105#ifdef COUNT_ALLOCS
106 one_strings++;
107#endif
108 Py_INCREF(op);
109 return (PyObject *)op;
110 }
111
112 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
113 if (op == NULL)
114 return NULL;
115 if (str == NULL)
116 return (PyObject *) op;
117
118 Py_MEMCPY(op->ob_sval, str, size);
119 /* share short strings */
120 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000121 characters[*str & UCHAR_MAX] = op;
122 Py_INCREF(op);
123 }
124 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000125}
126
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000127PyObject *
128PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000129{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200130 size_t size;
131 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000132
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000133 assert(str != NULL);
134 size = strlen(str);
135 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
136 PyErr_SetString(PyExc_OverflowError,
137 "byte string is too long");
138 return NULL;
139 }
140 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000141#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000142 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000143#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000144 Py_INCREF(op);
145 return (PyObject *)op;
146 }
147 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000148#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000150#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000151 Py_INCREF(op);
152 return (PyObject *)op;
153 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000154
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000155 /* Inline PyObject_NewVar */
156 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
157 if (op == NULL)
158 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100159 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000160 op->ob_shash = -1;
161 Py_MEMCPY(op->ob_sval, str, size+1);
162 /* share short strings */
163 if (size == 0) {
164 nullstring = op;
165 Py_INCREF(op);
166 } else if (size == 1) {
167 characters[*str & UCHAR_MAX] = op;
168 Py_INCREF(op);
169 }
170 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000171}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000172
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000173PyObject *
174PyBytes_FromFormatV(const char *format, va_list vargs)
175{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000176 va_list count;
177 Py_ssize_t n = 0;
178 const char* f;
179 char *s;
180 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000181
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000182 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000183 /* step 1: figure out how large a buffer we need */
184 for (f = format; *f; f++) {
185 if (*f == '%') {
186 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000187 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000188 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000189
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000190 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
191 * they don't affect the amount of space we reserve.
192 */
193 if ((*f == 'l' || *f == 'z') &&
194 (f[1] == 'd' || f[1] == 'u'))
195 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000196
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000197 switch (*f) {
198 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100199 {
200 int c = va_arg(count, int);
201 if (c < 0 || c > 255) {
202 PyErr_SetString(PyExc_OverflowError,
203 "PyBytes_FromFormatV(): %c format "
204 "expects an integer in range [0; 255]");
205 return NULL;
206 }
207 n++;
208 break;
209 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000210 case '%':
211 n++;
212 break;
213 case 'd': case 'u': case 'i': case 'x':
214 (void) va_arg(count, int);
215 /* 20 bytes is enough to hold a 64-bit
216 integer. Decimal takes the most space.
217 This isn't enough for octal. */
218 n += 20;
219 break;
220 case 's':
221 s = va_arg(count, char*);
222 n += strlen(s);
223 break;
224 case 'p':
225 (void) va_arg(count, int);
226 /* maximum 64-bit pointer representation:
227 * 0xffffffffffffffff
228 * so 19 characters is enough.
229 * XXX I count 18 -- what's the extra for?
230 */
231 n += 19;
232 break;
233 default:
234 /* if we stumble upon an unknown
235 formatting code, copy the rest of
236 the format string to the output
237 string. (we cannot just skip the
238 code, since there's no way to know
239 what's in the argument list) */
240 n += strlen(p);
241 goto expand;
242 }
243 } else
244 n++;
245 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000246 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000247 /* step 2: fill the buffer */
248 /* Since we've analyzed how much space we need for the worst case,
249 use sprintf directly instead of the slower PyOS_snprintf. */
250 string = PyBytes_FromStringAndSize(NULL, n);
251 if (!string)
252 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000253
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000254 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000255
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 for (f = format; *f; f++) {
257 if (*f == '%') {
258 const char* p = f++;
259 Py_ssize_t i;
260 int longflag = 0;
261 int size_tflag = 0;
262 /* parse the width.precision part (we're only
263 interested in the precision value, if any) */
264 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000265 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000266 n = (n*10) + *f++ - '0';
267 if (*f == '.') {
268 f++;
269 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000270 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000271 n = (n*10) + *f++ - '0';
272 }
David Malcolm96960882010-11-05 17:23:41 +0000273 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000274 f++;
275 /* handle the long flag, but only for %ld and %lu.
276 others can be added when necessary. */
277 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
278 longflag = 1;
279 ++f;
280 }
281 /* handle the size_t flag. */
282 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
283 size_tflag = 1;
284 ++f;
285 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000286
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000287 switch (*f) {
288 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100289 {
290 int c = va_arg(vargs, int);
291 /* c has been checked for overflow in the first step */
292 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000293 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100294 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000295 case 'd':
296 if (longflag)
297 sprintf(s, "%ld", va_arg(vargs, long));
298 else if (size_tflag)
299 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
300 va_arg(vargs, Py_ssize_t));
301 else
302 sprintf(s, "%d", va_arg(vargs, int));
303 s += strlen(s);
304 break;
305 case 'u':
306 if (longflag)
307 sprintf(s, "%lu",
308 va_arg(vargs, unsigned long));
309 else if (size_tflag)
310 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
311 va_arg(vargs, size_t));
312 else
313 sprintf(s, "%u",
314 va_arg(vargs, unsigned int));
315 s += strlen(s);
316 break;
317 case 'i':
318 sprintf(s, "%i", va_arg(vargs, int));
319 s += strlen(s);
320 break;
321 case 'x':
322 sprintf(s, "%x", va_arg(vargs, int));
323 s += strlen(s);
324 break;
325 case 's':
326 p = va_arg(vargs, char*);
327 i = strlen(p);
328 if (n > 0 && i > n)
329 i = n;
330 Py_MEMCPY(s, p, i);
331 s += i;
332 break;
333 case 'p':
334 sprintf(s, "%p", va_arg(vargs, void*));
335 /* %p is ill-defined: ensure leading 0x. */
336 if (s[1] == 'X')
337 s[1] = 'x';
338 else if (s[1] != 'x') {
339 memmove(s+2, s, strlen(s)+1);
340 s[0] = '0';
341 s[1] = 'x';
342 }
343 s += strlen(s);
344 break;
345 case '%':
346 *s++ = '%';
347 break;
348 default:
349 strcpy(s, p);
350 s += strlen(s);
351 goto end;
352 }
353 } else
354 *s++ = *f;
355 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000356
357 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000358 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
359 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000360}
361
362PyObject *
363PyBytes_FromFormat(const char *format, ...)
364{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000365 PyObject* ret;
366 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000367
368#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000369 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000370#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000372#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000373 ret = PyBytes_FromFormatV(format, vargs);
374 va_end(vargs);
375 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000376}
377
Ethan Furmanb95b5612015-01-23 20:05:18 -0800378/* Helpers for formatstring */
379
380Py_LOCAL_INLINE(PyObject *)
381getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
382{
383 Py_ssize_t argidx = *p_argidx;
384 if (argidx < arglen) {
385 (*p_argidx)++;
386 if (arglen < 0)
387 return args;
388 else
389 return PyTuple_GetItem(args, argidx);
390 }
391 PyErr_SetString(PyExc_TypeError,
392 "not enough arguments for format string");
393 return NULL;
394}
395
396/* Format codes
397 * F_LJUST '-'
398 * F_SIGN '+'
399 * F_BLANK ' '
400 * F_ALT '#'
401 * F_ZERO '0'
402 */
403#define F_LJUST (1<<0)
404#define F_SIGN (1<<1)
405#define F_BLANK (1<<2)
406#define F_ALT (1<<3)
407#define F_ZERO (1<<4)
408
409/* Returns a new reference to a PyBytes object, or NULL on failure. */
410
411static PyObject *
412formatfloat(PyObject *v, int flags, int prec, int type)
413{
414 char *p;
415 PyObject *result;
416 double x;
417
418 x = PyFloat_AsDouble(v);
419 if (x == -1.0 && PyErr_Occurred()) {
420 PyErr_Format(PyExc_TypeError, "float argument required, "
421 "not %.200s", Py_TYPE(v)->tp_name);
422 return NULL;
423 }
424
425 if (prec < 0)
426 prec = 6;
427
428 p = PyOS_double_to_string(x, type, prec,
429 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
430
431 if (p == NULL)
432 return NULL;
433 result = PyBytes_FromStringAndSize(p, strlen(p));
434 PyMem_Free(p);
435 return result;
436}
437
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300438static PyObject *
439formatlong(PyObject *v, int flags, int prec, int type)
440{
441 PyObject *result, *iobj;
442 if (type == 'i')
443 type = 'd';
444 if (PyLong_Check(v))
445 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
446 if (PyNumber_Check(v)) {
447 /* make sure number is a type of integer for o, x, and X */
448 if (type == 'o' || type == 'x' || type == 'X')
449 iobj = PyNumber_Index(v);
450 else
451 iobj = PyNumber_Long(v);
452 if (iobj == NULL) {
453 if (!PyErr_ExceptionMatches(PyExc_TypeError))
454 return NULL;
455 }
456 else if (!PyLong_Check(iobj))
457 Py_CLEAR(iobj);
458 if (iobj != NULL) {
459 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
460 Py_DECREF(iobj);
461 return result;
462 }
463 }
464 PyErr_Format(PyExc_TypeError,
465 "%%%c format: %s is required, not %.200s", type,
466 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
467 : "a number",
468 Py_TYPE(v)->tp_name);
469 return NULL;
470}
471
472static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200473byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800474{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200475 if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) {
476 *p = PyBytes_AS_STRING(arg)[0];
477 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800478 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200479 else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) {
480 *p = PyByteArray_AS_STRING(arg)[0];
481 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800482 }
483 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300484 PyObject *iobj;
485 long ival;
486 int overflow;
487 /* make sure number is a type of integer */
488 if (PyLong_Check(arg)) {
489 ival = PyLong_AsLongAndOverflow(arg, &overflow);
490 }
491 else {
492 iobj = PyNumber_Index(arg);
493 if (iobj == NULL) {
494 if (!PyErr_ExceptionMatches(PyExc_TypeError))
495 return 0;
496 goto onError;
497 }
498 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
499 Py_DECREF(iobj);
500 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300501 if (!overflow && ival == -1 && PyErr_Occurred())
502 goto onError;
503 if (overflow || !(0 <= ival && ival <= 255)) {
504 PyErr_SetString(PyExc_OverflowError,
505 "%c arg not in range(256)");
506 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800507 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300508 *p = (char)ival;
509 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800510 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300511 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200512 PyErr_SetString(PyExc_TypeError,
513 "%c requires an integer in range(256) or a single byte");
514 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800515}
516
517static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200518format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800519{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200520 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800521 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800522 /* is it a bytes object? */
523 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200524 *pbuf = PyBytes_AS_STRING(v);
525 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800526 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200527 return v;
528 }
529 if (PyByteArray_Check(v)) {
530 *pbuf = PyByteArray_AS_STRING(v);
531 *plen = PyByteArray_GET_SIZE(v);
532 Py_INCREF(v);
533 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800534 }
535 /* does it support __bytes__? */
536 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
537 if (func != NULL) {
538 result = PyObject_CallFunctionObjArgs(func, NULL);
539 Py_DECREF(func);
540 if (result == NULL)
541 return NULL;
542 if (!PyBytes_Check(result)) {
543 PyErr_Format(PyExc_TypeError,
544 "__bytes__ returned non-bytes (type %.200s)",
545 Py_TYPE(result)->tp_name);
546 Py_DECREF(result);
547 return NULL;
548 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200549 *pbuf = PyBytes_AS_STRING(result);
550 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800551 return result;
552 }
553 PyErr_Format(PyExc_TypeError,
554 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
555 Py_TYPE(v)->tp_name);
556 return NULL;
557}
558
559/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
560
561 FORMATBUFLEN is the length of the buffer in which the ints &
562 chars are formatted. XXX This is a magic number. Each formatting
563 routine does bounds checking to ensure no overflow, but a better
564 solution may be to malloc a buffer of appropriate size for each
565 format. For now, the current solution is sufficient.
566*/
567#define FORMATBUFLEN (size_t)120
568
569PyObject *
570_PyBytes_Format(PyObject *format, PyObject *args)
571{
572 char *fmt, *res;
573 Py_ssize_t arglen, argidx;
574 Py_ssize_t reslen, rescnt, fmtcnt;
575 int args_owned = 0;
576 PyObject *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800577 PyObject *dict = NULL;
578 if (format == NULL || !PyBytes_Check(format) || args == NULL) {
579 PyErr_BadInternalCall();
580 return NULL;
581 }
582 fmt = PyBytes_AS_STRING(format);
583 fmtcnt = PyBytes_GET_SIZE(format);
584 reslen = rescnt = fmtcnt + 100;
585 result = PyBytes_FromStringAndSize((char *)NULL, reslen);
586 if (result == NULL)
587 return NULL;
588 res = PyBytes_AsString(result);
589 if (PyTuple_Check(args)) {
590 arglen = PyTuple_GET_SIZE(args);
591 argidx = 0;
592 }
593 else {
594 arglen = -1;
595 argidx = -2;
596 }
597 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
598 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
599 !PyByteArray_Check(args)) {
600 dict = args;
601 }
602 while (--fmtcnt >= 0) {
603 if (*fmt != '%') {
604 if (--rescnt < 0) {
605 rescnt = fmtcnt + 100;
606 reslen += rescnt;
607 if (_PyBytes_Resize(&result, reslen))
608 return NULL;
609 res = PyBytes_AS_STRING(result)
610 + reslen - rescnt;
611 --rescnt;
612 }
613 *res++ = *fmt++;
614 }
615 else {
616 /* Got a format specifier */
617 int flags = 0;
618 Py_ssize_t width = -1;
619 int prec = -1;
620 int c = '\0';
621 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800622 PyObject *v = NULL;
623 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200624 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800625 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200626 Py_ssize_t len = 0;
627 char onechar; /* For byte_converter() */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800628
Ethan Furmanb95b5612015-01-23 20:05:18 -0800629 fmt++;
630 if (*fmt == '(') {
631 char *keystart;
632 Py_ssize_t keylen;
633 PyObject *key;
634 int pcount = 1;
635
636 if (dict == NULL) {
637 PyErr_SetString(PyExc_TypeError,
638 "format requires a mapping");
639 goto error;
640 }
641 ++fmt;
642 --fmtcnt;
643 keystart = fmt;
644 /* Skip over balanced parentheses */
645 while (pcount > 0 && --fmtcnt >= 0) {
646 if (*fmt == ')')
647 --pcount;
648 else if (*fmt == '(')
649 ++pcount;
650 fmt++;
651 }
652 keylen = fmt - keystart - 1;
653 if (fmtcnt < 0 || pcount > 0) {
654 PyErr_SetString(PyExc_ValueError,
655 "incomplete format key");
656 goto error;
657 }
658 key = PyBytes_FromStringAndSize(keystart,
659 keylen);
660 if (key == NULL)
661 goto error;
662 if (args_owned) {
663 Py_DECREF(args);
664 args_owned = 0;
665 }
666 args = PyObject_GetItem(dict, key);
667 Py_DECREF(key);
668 if (args == NULL) {
669 goto error;
670 }
671 args_owned = 1;
672 arglen = -1;
673 argidx = -2;
674 }
675 while (--fmtcnt >= 0) {
676 switch (c = *fmt++) {
677 case '-': flags |= F_LJUST; continue;
678 case '+': flags |= F_SIGN; continue;
679 case ' ': flags |= F_BLANK; continue;
680 case '#': flags |= F_ALT; continue;
681 case '0': flags |= F_ZERO; continue;
682 }
683 break;
684 }
685 if (c == '*') {
686 v = getnextarg(args, arglen, &argidx);
687 if (v == NULL)
688 goto error;
689 if (!PyLong_Check(v)) {
690 PyErr_SetString(PyExc_TypeError,
691 "* wants int");
692 goto error;
693 }
694 width = PyLong_AsSsize_t(v);
695 if (width == -1 && PyErr_Occurred())
696 goto error;
697 if (width < 0) {
698 flags |= F_LJUST;
699 width = -width;
700 }
701 if (--fmtcnt >= 0)
702 c = *fmt++;
703 }
704 else if (c >= 0 && isdigit(c)) {
705 width = c - '0';
706 while (--fmtcnt >= 0) {
707 c = Py_CHARMASK(*fmt++);
708 if (!isdigit(c))
709 break;
710 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
711 PyErr_SetString(
712 PyExc_ValueError,
713 "width too big");
714 goto error;
715 }
716 width = width*10 + (c - '0');
717 }
718 }
719 if (c == '.') {
720 prec = 0;
721 if (--fmtcnt >= 0)
722 c = *fmt++;
723 if (c == '*') {
724 v = getnextarg(args, arglen, &argidx);
725 if (v == NULL)
726 goto error;
727 if (!PyLong_Check(v)) {
728 PyErr_SetString(
729 PyExc_TypeError,
730 "* wants int");
731 goto error;
732 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200733 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800734 if (prec == -1 && PyErr_Occurred())
735 goto error;
736 if (prec < 0)
737 prec = 0;
738 if (--fmtcnt >= 0)
739 c = *fmt++;
740 }
741 else if (c >= 0 && isdigit(c)) {
742 prec = c - '0';
743 while (--fmtcnt >= 0) {
744 c = Py_CHARMASK(*fmt++);
745 if (!isdigit(c))
746 break;
747 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
748 PyErr_SetString(
749 PyExc_ValueError,
750 "prec too big");
751 goto error;
752 }
753 prec = prec*10 + (c - '0');
754 }
755 }
756 } /* prec */
757 if (fmtcnt >= 0) {
758 if (c == 'h' || c == 'l' || c == 'L') {
759 if (--fmtcnt >= 0)
760 c = *fmt++;
761 }
762 }
763 if (fmtcnt < 0) {
764 PyErr_SetString(PyExc_ValueError,
765 "incomplete format");
766 goto error;
767 }
768 if (c != '%') {
769 v = getnextarg(args, arglen, &argidx);
770 if (v == NULL)
771 goto error;
772 }
773 sign = 0;
774 fill = ' ';
775 switch (c) {
776 case '%':
777 pbuf = "%";
778 len = 1;
779 break;
Ethan Furman62e977f2015-03-11 08:17:00 -0700780 case 'r':
781 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800782 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200783 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800784 if (temp == NULL)
785 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200786 assert(PyUnicode_IS_ASCII(temp));
787 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
788 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800789 if (prec >= 0 && len > prec)
790 len = prec;
791 break;
792 case 's':
793 // %s is only for 2/3 code; 3 only code should use %b
794 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200795 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800796 if (temp == NULL)
797 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800798 if (prec >= 0 && len > prec)
799 len = prec;
800 break;
801 case 'i':
802 case 'd':
803 case 'u':
804 case 'o':
805 case 'x':
806 case 'X':
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300807 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200808 if (!temp)
809 goto error;
810 assert(PyUnicode_IS_ASCII(temp));
811 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
812 len = PyUnicode_GET_LENGTH(temp);
813 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800814 if (flags & F_ZERO)
815 fill = '0';
816 break;
817 case 'e':
818 case 'E':
819 case 'f':
820 case 'F':
821 case 'g':
822 case 'G':
823 temp = formatfloat(v, flags, prec, c);
824 if (temp == NULL)
825 goto error;
826 pbuf = PyBytes_AS_STRING(temp);
827 len = PyBytes_GET_SIZE(temp);
828 sign = 1;
829 if (flags & F_ZERO)
830 fill = '0';
831 break;
832 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200833 pbuf = &onechar;
834 len = byte_converter(v, &onechar);
835 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800836 goto error;
837 break;
838 default:
839 PyErr_Format(PyExc_ValueError,
840 "unsupported format character '%c' (0x%x) "
841 "at index %zd",
842 c, c,
843 (Py_ssize_t)(fmt - 1 -
844 PyBytes_AsString(format)));
845 goto error;
846 }
847 if (sign) {
848 if (*pbuf == '-' || *pbuf == '+') {
849 sign = *pbuf++;
850 len--;
851 }
852 else if (flags & F_SIGN)
853 sign = '+';
854 else if (flags & F_BLANK)
855 sign = ' ';
856 else
857 sign = 0;
858 }
859 if (width < len)
860 width = len;
861 if (rescnt - (sign != 0) < width) {
862 reslen -= rescnt;
863 rescnt = width + fmtcnt + 100;
864 reslen += rescnt;
865 if (reslen < 0) {
866 Py_DECREF(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800867 Py_XDECREF(temp);
868 return PyErr_NoMemory();
869 }
870 if (_PyBytes_Resize(&result, reslen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800871 Py_XDECREF(temp);
872 return NULL;
873 }
874 res = PyBytes_AS_STRING(result)
875 + reslen - rescnt;
876 }
877 if (sign) {
878 if (fill != ' ')
879 *res++ = sign;
880 rescnt--;
881 if (width > len)
882 width--;
883 }
884 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
885 assert(pbuf[0] == '0');
886 assert(pbuf[1] == c);
887 if (fill != ' ') {
888 *res++ = *pbuf++;
889 *res++ = *pbuf++;
890 }
891 rescnt -= 2;
892 width -= 2;
893 if (width < 0)
894 width = 0;
895 len -= 2;
896 }
897 if (width > len && !(flags & F_LJUST)) {
898 do {
899 --rescnt;
900 *res++ = fill;
901 } while (--width > len);
902 }
903 if (fill == ' ') {
904 if (sign)
905 *res++ = sign;
906 if ((flags & F_ALT) &&
907 (c == 'x' || c == 'X')) {
908 assert(pbuf[0] == '0');
909 assert(pbuf[1] == c);
910 *res++ = *pbuf++;
911 *res++ = *pbuf++;
912 }
913 }
914 Py_MEMCPY(res, pbuf, len);
915 res += len;
916 rescnt -= len;
917 while (--width >= len) {
918 --rescnt;
919 *res++ = ' ';
920 }
921 if (dict && (argidx < arglen) && c != '%') {
922 PyErr_SetString(PyExc_TypeError,
923 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -0800924 Py_XDECREF(temp);
925 goto error;
926 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800927 Py_XDECREF(temp);
928 } /* '%' */
929 } /* until end */
930 if (argidx < arglen && !dict) {
931 PyErr_SetString(PyExc_TypeError,
932 "not all arguments converted during bytes formatting");
933 goto error;
934 }
935 if (args_owned) {
936 Py_DECREF(args);
937 }
938 if (_PyBytes_Resize(&result, reslen - rescnt))
939 return NULL;
940 return result;
941
942 error:
943 Py_DECREF(result);
944 if (args_owned) {
945 Py_DECREF(args);
946 }
947 return NULL;
948}
949
950/* =-= */
951
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000952static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000953bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000954{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000955 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000956}
957
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000958/* Unescape a backslash-escaped string. If unicode is non-zero,
959 the string is a u-literal. If recode_encoding is non-zero,
960 the string is UTF-8 encoded and should be re-encoded in the
961 specified encoding. */
962
963PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000964 Py_ssize_t len,
965 const char *errors,
966 Py_ssize_t unicode,
967 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000968{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000969 int c;
970 char *p, *buf;
971 const char *end;
972 PyObject *v;
973 Py_ssize_t newlen = recode_encoding ? 4*len:len;
974 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
975 if (v == NULL)
976 return NULL;
977 p = buf = PyBytes_AsString(v);
978 end = s + len;
979 while (s < end) {
980 if (*s != '\\') {
981 non_esc:
982 if (recode_encoding && (*s & 0x80)) {
983 PyObject *u, *w;
984 char *r;
985 const char* t;
986 Py_ssize_t rn;
987 t = s;
988 /* Decode non-ASCII bytes as UTF-8. */
989 while (t < end && (*t & 0x80)) t++;
990 u = PyUnicode_DecodeUTF8(s, t - s, errors);
991 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000992
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000993 /* Recode them in target encoding. */
994 w = PyUnicode_AsEncodedString(
995 u, recode_encoding, errors);
996 Py_DECREF(u);
997 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000998
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000999 /* Append bytes to output buffer. */
1000 assert(PyBytes_Check(w));
1001 r = PyBytes_AS_STRING(w);
1002 rn = PyBytes_GET_SIZE(w);
1003 Py_MEMCPY(p, r, rn);
1004 p += rn;
1005 Py_DECREF(w);
1006 s = t;
1007 } else {
1008 *p++ = *s++;
1009 }
1010 continue;
1011 }
1012 s++;
1013 if (s==end) {
1014 PyErr_SetString(PyExc_ValueError,
1015 "Trailing \\ in string");
1016 goto failed;
1017 }
1018 switch (*s++) {
1019 /* XXX This assumes ASCII! */
1020 case '\n': break;
1021 case '\\': *p++ = '\\'; break;
1022 case '\'': *p++ = '\''; break;
1023 case '\"': *p++ = '\"'; break;
1024 case 'b': *p++ = '\b'; break;
1025 case 'f': *p++ = '\014'; break; /* FF */
1026 case 't': *p++ = '\t'; break;
1027 case 'n': *p++ = '\n'; break;
1028 case 'r': *p++ = '\r'; break;
1029 case 'v': *p++ = '\013'; break; /* VT */
1030 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1031 case '0': case '1': case '2': case '3':
1032 case '4': case '5': case '6': case '7':
1033 c = s[-1] - '0';
1034 if (s < end && '0' <= *s && *s <= '7') {
1035 c = (c<<3) + *s++ - '0';
1036 if (s < end && '0' <= *s && *s <= '7')
1037 c = (c<<3) + *s++ - '0';
1038 }
1039 *p++ = c;
1040 break;
1041 case 'x':
David Malcolm96960882010-11-05 17:23:41 +00001042 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001043 unsigned int x = 0;
1044 c = Py_CHARMASK(*s);
1045 s++;
David Malcolm96960882010-11-05 17:23:41 +00001046 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001047 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001048 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001049 x = 10 + c - 'a';
1050 else
1051 x = 10 + c - 'A';
1052 x = x << 4;
1053 c = Py_CHARMASK(*s);
1054 s++;
David Malcolm96960882010-11-05 17:23:41 +00001055 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001056 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001057 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001058 x += 10 + c - 'a';
1059 else
1060 x += 10 + c - 'A';
1061 *p++ = x;
1062 break;
1063 }
1064 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001065 PyErr_Format(PyExc_ValueError,
1066 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001067 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001068 goto failed;
1069 }
1070 if (strcmp(errors, "replace") == 0) {
1071 *p++ = '?';
1072 } else if (strcmp(errors, "ignore") == 0)
1073 /* do nothing */;
1074 else {
1075 PyErr_Format(PyExc_ValueError,
1076 "decoding error; unknown "
1077 "error handling code: %.400s",
1078 errors);
1079 goto failed;
1080 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001081 /* skip \x */
1082 if (s < end && Py_ISXDIGIT(s[0]))
1083 s++; /* and a hexdigit */
1084 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001085 default:
1086 *p++ = '\\';
1087 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001088 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001089 UTF-8 bytes may follow. */
1090 }
1091 }
1092 if (p-buf < newlen)
1093 _PyBytes_Resize(&v, p - buf);
1094 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001095 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001096 Py_DECREF(v);
1097 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001098}
1099
1100/* -------------------------------------------------------------------- */
1101/* object api */
1102
1103Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001104PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001105{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001106 if (!PyBytes_Check(op)) {
1107 PyErr_Format(PyExc_TypeError,
1108 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1109 return -1;
1110 }
1111 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001112}
1113
1114char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001115PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001116{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001117 if (!PyBytes_Check(op)) {
1118 PyErr_Format(PyExc_TypeError,
1119 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1120 return NULL;
1121 }
1122 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001123}
1124
1125int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001126PyBytes_AsStringAndSize(PyObject *obj,
1127 char **s,
1128 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001129{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001130 if (s == NULL) {
1131 PyErr_BadInternalCall();
1132 return -1;
1133 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001134
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001135 if (!PyBytes_Check(obj)) {
1136 PyErr_Format(PyExc_TypeError,
1137 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1138 return -1;
1139 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001141 *s = PyBytes_AS_STRING(obj);
1142 if (len != NULL)
1143 *len = PyBytes_GET_SIZE(obj);
1144 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001145 PyErr_SetString(PyExc_ValueError,
1146 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001147 return -1;
1148 }
1149 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001150}
Neal Norwitz6968b052007-02-27 19:02:19 +00001151
1152/* -------------------------------------------------------------------- */
1153/* Methods */
1154
Eric Smith0923d1d2009-04-16 20:16:10 +00001155#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001156
1157#include "stringlib/fastsearch.h"
1158#include "stringlib/count.h"
1159#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001160#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001161#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001162#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001163#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001164
Eric Smith0f78bff2009-11-30 01:01:42 +00001165#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001166
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001167PyObject *
1168PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001169{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001170 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001171 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001172 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001173 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001174 unsigned char quote, *s, *p;
1175
1176 /* Compute size of output string */
1177 squotes = dquotes = 0;
1178 newsize = 3; /* b'' */
1179 s = (unsigned char*)op->ob_sval;
1180 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001181 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001182 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001183 case '\'': squotes++; break;
1184 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001185 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001186 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001187 default:
1188 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001189 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001190 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001191 if (newsize > PY_SSIZE_T_MAX - incr)
1192 goto overflow;
1193 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001194 }
1195 quote = '\'';
1196 if (smartquotes && squotes && !dquotes)
1197 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001198 if (squotes && quote == '\'') {
1199 if (newsize > PY_SSIZE_T_MAX - squotes)
1200 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001201 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001202 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001203
1204 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001205 if (v == NULL) {
1206 return NULL;
1207 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001208 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001209
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001210 *p++ = 'b', *p++ = quote;
1211 for (i = 0; i < length; i++) {
1212 unsigned char c = op->ob_sval[i];
1213 if (c == quote || c == '\\')
1214 *p++ = '\\', *p++ = c;
1215 else if (c == '\t')
1216 *p++ = '\\', *p++ = 't';
1217 else if (c == '\n')
1218 *p++ = '\\', *p++ = 'n';
1219 else if (c == '\r')
1220 *p++ = '\\', *p++ = 'r';
1221 else if (c < ' ' || c >= 0x7f) {
1222 *p++ = '\\';
1223 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001224 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1225 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001227 else
1228 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001229 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001230 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001231 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001232 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001233
1234 overflow:
1235 PyErr_SetString(PyExc_OverflowError,
1236 "bytes object is too large to make repr");
1237 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001238}
1239
Neal Norwitz6968b052007-02-27 19:02:19 +00001240static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001241bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001242{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001243 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001244}
1245
Neal Norwitz6968b052007-02-27 19:02:19 +00001246static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001247bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001248{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001249 if (Py_BytesWarningFlag) {
1250 if (PyErr_WarnEx(PyExc_BytesWarning,
1251 "str() on a bytes instance", 1))
1252 return NULL;
1253 }
1254 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001255}
1256
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001257static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001258bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001259{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001260 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001261}
Neal Norwitz6968b052007-02-27 19:02:19 +00001262
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001263/* This is also used by PyBytes_Concat() */
1264static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001265bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001266{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001267 Py_ssize_t size;
1268 Py_buffer va, vb;
1269 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001270
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001271 va.len = -1;
1272 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001273 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1274 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001275 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1276 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1277 goto done;
1278 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001279
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001280 /* Optimize end cases */
1281 if (va.len == 0 && PyBytes_CheckExact(b)) {
1282 result = b;
1283 Py_INCREF(result);
1284 goto done;
1285 }
1286 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1287 result = a;
1288 Py_INCREF(result);
1289 goto done;
1290 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001291
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001292 size = va.len + vb.len;
1293 if (size < 0) {
1294 PyErr_NoMemory();
1295 goto done;
1296 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001297
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001298 result = PyBytes_FromStringAndSize(NULL, size);
1299 if (result != NULL) {
1300 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1301 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1302 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001303
1304 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001305 if (va.len != -1)
1306 PyBuffer_Release(&va);
1307 if (vb.len != -1)
1308 PyBuffer_Release(&vb);
1309 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001310}
Neal Norwitz6968b052007-02-27 19:02:19 +00001311
1312static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001313bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001314{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001315 Py_ssize_t i;
1316 Py_ssize_t j;
1317 Py_ssize_t size;
1318 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001319 size_t nbytes;
1320 if (n < 0)
1321 n = 0;
1322 /* watch out for overflows: the size can overflow int,
1323 * and the # of bytes needed can overflow size_t
1324 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001325 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001326 PyErr_SetString(PyExc_OverflowError,
1327 "repeated bytes are too long");
1328 return NULL;
1329 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001330 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001331 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1332 Py_INCREF(a);
1333 return (PyObject *)a;
1334 }
1335 nbytes = (size_t)size;
1336 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1337 PyErr_SetString(PyExc_OverflowError,
1338 "repeated bytes are too long");
1339 return NULL;
1340 }
1341 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1342 if (op == NULL)
1343 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001344 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001345 op->ob_shash = -1;
1346 op->ob_sval[size] = '\0';
1347 if (Py_SIZE(a) == 1 && n > 0) {
1348 memset(op->ob_sval, a->ob_sval[0] , n);
1349 return (PyObject *) op;
1350 }
1351 i = 0;
1352 if (i < size) {
1353 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1354 i = Py_SIZE(a);
1355 }
1356 while (i < size) {
1357 j = (i <= size-i) ? i : size-i;
1358 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1359 i += j;
1360 }
1361 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001362}
1363
Guido van Rossum98297ee2007-11-06 21:34:58 +00001364static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001365bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001366{
1367 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1368 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001369 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +00001370 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +00001371 PyErr_Clear();
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001372 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
Antoine Pitroud1188562010-06-09 16:38:55 +00001373 return -1;
1374 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
1375 varg.buf, varg.len, 0);
1376 PyBuffer_Release(&varg);
1377 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001378 }
1379 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001380 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1381 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001382 }
1383
Antoine Pitrou0010d372010-08-15 17:12:55 +00001384 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001385}
1386
Neal Norwitz6968b052007-02-27 19:02:19 +00001387static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001388bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001389{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 if (i < 0 || i >= Py_SIZE(a)) {
1391 PyErr_SetString(PyExc_IndexError, "index out of range");
1392 return NULL;
1393 }
1394 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001395}
1396
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001397Py_LOCAL(int)
1398bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1399{
1400 int cmp;
1401 Py_ssize_t len;
1402
1403 len = Py_SIZE(a);
1404 if (Py_SIZE(b) != len)
1405 return 0;
1406
1407 if (a->ob_sval[0] != b->ob_sval[0])
1408 return 0;
1409
1410 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1411 return (cmp == 0);
1412}
1413
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001414static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001415bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001416{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 int c;
1418 Py_ssize_t len_a, len_b;
1419 Py_ssize_t min_len;
1420 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 /* Make sure both arguments are strings. */
1423 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001424 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
1425 if (PyObject_IsInstance((PyObject*)a,
1426 (PyObject*)&PyUnicode_Type) ||
1427 PyObject_IsInstance((PyObject*)b,
1428 (PyObject*)&PyUnicode_Type)) {
1429 if (PyErr_WarnEx(PyExc_BytesWarning,
1430 "Comparison between bytes and string", 1))
1431 return NULL;
1432 }
1433 else if (PyObject_IsInstance((PyObject*)a,
1434 (PyObject*)&PyLong_Type) ||
1435 PyObject_IsInstance((PyObject*)b,
1436 (PyObject*)&PyLong_Type)) {
1437 if (PyErr_WarnEx(PyExc_BytesWarning,
1438 "Comparison between bytes and int", 1))
1439 return NULL;
1440 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001441 }
1442 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001443 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001444 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001445 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001446 case Py_EQ:
1447 case Py_LE:
1448 case Py_GE:
1449 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001450 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001451 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001452 case Py_NE:
1453 case Py_LT:
1454 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001455 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001456 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001457 default:
1458 PyErr_BadArgument();
1459 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001460 }
1461 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001462 else if (op == Py_EQ || op == Py_NE) {
1463 int eq = bytes_compare_eq(a, b);
1464 eq ^= (op == Py_NE);
1465 result = eq ? Py_True : Py_False;
1466 }
1467 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001468 len_a = Py_SIZE(a);
1469 len_b = Py_SIZE(b);
1470 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001471 if (min_len > 0) {
1472 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001473 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001474 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001475 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001476 else
1477 c = 0;
1478 if (c == 0)
1479 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1480 switch (op) {
1481 case Py_LT: c = c < 0; break;
1482 case Py_LE: c = c <= 0; break;
1483 case Py_GT: c = c > 0; break;
1484 case Py_GE: c = c >= 0; break;
1485 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001486 PyErr_BadArgument();
1487 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001488 }
1489 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001490 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001491
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001492 Py_INCREF(result);
1493 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001494}
1495
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001496static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001497bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001498{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001499 if (a->ob_shash == -1) {
1500 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001501 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001502 }
1503 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001504}
1505
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001506static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001507bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001508{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001509 if (PyIndex_Check(item)) {
1510 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1511 if (i == -1 && PyErr_Occurred())
1512 return NULL;
1513 if (i < 0)
1514 i += PyBytes_GET_SIZE(self);
1515 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1516 PyErr_SetString(PyExc_IndexError,
1517 "index out of range");
1518 return NULL;
1519 }
1520 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1521 }
1522 else if (PySlice_Check(item)) {
1523 Py_ssize_t start, stop, step, slicelength, cur, i;
1524 char* source_buf;
1525 char* result_buf;
1526 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001527
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001528 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001529 PyBytes_GET_SIZE(self),
1530 &start, &stop, &step, &slicelength) < 0) {
1531 return NULL;
1532 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001533
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001534 if (slicelength <= 0) {
1535 return PyBytes_FromStringAndSize("", 0);
1536 }
1537 else if (start == 0 && step == 1 &&
1538 slicelength == PyBytes_GET_SIZE(self) &&
1539 PyBytes_CheckExact(self)) {
1540 Py_INCREF(self);
1541 return (PyObject *)self;
1542 }
1543 else if (step == 1) {
1544 return PyBytes_FromStringAndSize(
1545 PyBytes_AS_STRING(self) + start,
1546 slicelength);
1547 }
1548 else {
1549 source_buf = PyBytes_AS_STRING(self);
1550 result = PyBytes_FromStringAndSize(NULL, slicelength);
1551 if (result == NULL)
1552 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001553
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001554 result_buf = PyBytes_AS_STRING(result);
1555 for (cur = start, i = 0; i < slicelength;
1556 cur += step, i++) {
1557 result_buf[i] = source_buf[cur];
1558 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001559
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001560 return result;
1561 }
1562 }
1563 else {
1564 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001565 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001566 Py_TYPE(item)->tp_name);
1567 return NULL;
1568 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001569}
1570
1571static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001572bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001573{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001574 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1575 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001576}
1577
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001578static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001579 (lenfunc)bytes_length, /*sq_length*/
1580 (binaryfunc)bytes_concat, /*sq_concat*/
1581 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1582 (ssizeargfunc)bytes_item, /*sq_item*/
1583 0, /*sq_slice*/
1584 0, /*sq_ass_item*/
1585 0, /*sq_ass_slice*/
1586 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001587};
1588
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001589static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001590 (lenfunc)bytes_length,
1591 (binaryfunc)bytes_subscript,
1592 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001593};
1594
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001595static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001596 (getbufferproc)bytes_buffer_getbuffer,
1597 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001598};
1599
1600
1601#define LEFTSTRIP 0
1602#define RIGHTSTRIP 1
1603#define BOTHSTRIP 2
1604
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001605/*[clinic input]
1606bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001607
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001608 sep: object = None
1609 The delimiter according which to split the bytes.
1610 None (the default value) means split on ASCII whitespace characters
1611 (space, tab, return, newline, formfeed, vertical tab).
1612 maxsplit: Py_ssize_t = -1
1613 Maximum number of splits to do.
1614 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001615
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001616Return a list of the sections in the bytes, using sep as the delimiter.
1617[clinic start generated code]*/
1618
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001619static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001620bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001621/*[clinic end generated code: output=8bde44dacb36ef2e input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001622{
1623 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001624 const char *s = PyBytes_AS_STRING(self), *sub;
1625 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001626 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001627
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001628 if (maxsplit < 0)
1629 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001630 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001631 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001632 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001633 return NULL;
1634 sub = vsub.buf;
1635 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001636
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001637 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1638 PyBuffer_Release(&vsub);
1639 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001640}
1641
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001642/*[clinic input]
1643bytes.partition
1644
1645 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001646 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001647 /
1648
1649Partition the bytes into three parts using the given separator.
1650
1651This will search for the separator sep in the bytes. If the separator is found,
1652returns a 3-tuple containing the part before the separator, the separator
1653itself, and the part after it.
1654
1655If the separator is not found, returns a 3-tuple containing the original bytes
1656object and two empty bytes objects.
1657[clinic start generated code]*/
1658
Neal Norwitz6968b052007-02-27 19:02:19 +00001659static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001660bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001661/*[clinic end generated code: output=f532b392a17ff695 input=bc855dc63ca949de]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001662{
Neal Norwitz6968b052007-02-27 19:02:19 +00001663 return stringlib_partition(
1664 (PyObject*) self,
1665 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001666 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001667 );
1668}
1669
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001670/*[clinic input]
1671bytes.rpartition
1672
1673 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001674 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001675 /
1676
1677Partition the bytes into three parts using the given separator.
1678
1679This will search for the separator sep in the bytes, starting and the end. If
1680the separator is found, returns a 3-tuple containing the part before the
1681separator, the separator itself, and the part after it.
1682
1683If the separator is not found, returns a 3-tuple containing two empty bytes
1684objects and the original bytes object.
1685[clinic start generated code]*/
1686
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001687static PyObject *
1688bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001689/*[clinic end generated code: output=191b114cbb028e50 input=6588fff262a9170e]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001690{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001691 return stringlib_rpartition(
1692 (PyObject*) self,
1693 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001694 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001695 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001696}
1697
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001698/*[clinic input]
1699bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001700
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001701Return a list of the sections in the bytes, using sep as the delimiter.
1702
1703Splitting is done starting at the end of the bytes and working to the front.
1704[clinic start generated code]*/
1705
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001706static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001707bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001708/*[clinic end generated code: output=0b6570b977911d88 input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001709{
1710 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001711 const char *s = PyBytes_AS_STRING(self), *sub;
1712 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001713 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001714
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001715 if (maxsplit < 0)
1716 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001717 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001718 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001719 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001720 return NULL;
1721 sub = vsub.buf;
1722 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001723
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001724 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1725 PyBuffer_Release(&vsub);
1726 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001727}
1728
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001729
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001730/*[clinic input]
1731bytes.join
1732
1733 iterable_of_bytes: object
1734 /
1735
1736Concatenate any number of bytes objects.
1737
1738The bytes whose method is called is inserted in between each pair.
1739
1740The result is returned as a new bytes object.
1741
1742Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1743[clinic start generated code]*/
1744
Neal Norwitz6968b052007-02-27 19:02:19 +00001745static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001746bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001747/*[clinic end generated code: output=634aff14764ff997 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001748{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001749 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001750}
1751
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001752PyObject *
1753_PyBytes_Join(PyObject *sep, PyObject *x)
1754{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001755 assert(sep != NULL && PyBytes_Check(sep));
1756 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001757 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001758}
1759
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001760/* helper macro to fixup start/end slice values */
1761#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001762 if (end > len) \
1763 end = len; \
1764 else if (end < 0) { \
1765 end += len; \
1766 if (end < 0) \
1767 end = 0; \
1768 } \
1769 if (start < 0) { \
1770 start += len; \
1771 if (start < 0) \
1772 start = 0; \
1773 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001774
1775Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001776bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001777{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001778 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001779 char byte;
1780 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001781 const char *sub;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001782 Py_ssize_t len, sub_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001783 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001784 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001785
Antoine Pitrouac65d962011-10-20 23:54:17 +02001786 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1787 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001788 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001789
Antoine Pitrouac65d962011-10-20 23:54:17 +02001790 if (subobj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001791 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001792 return -2;
1793
1794 sub = subbuf.buf;
1795 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001796 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001797 else {
1798 sub = &byte;
1799 sub_len = 1;
1800 }
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001801 len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001802
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001803 ADJUST_INDICES(start, end, len);
1804 if (end - start < sub_len)
1805 res = -1;
Victor Stinnerdabbfe72015-03-25 03:16:32 +01001806 /* Issue #23573: FIXME, windows has no memrchr() */
1807 else if (sub_len == 1 && dir > 0) {
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001808 unsigned char needle = *sub;
1809 int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
1810 res = stringlib_fastsearch_memchr_1char(
1811 PyBytes_AS_STRING(self) + start, end - start,
1812 needle, needle, mode);
1813 if (res >= 0)
1814 res += start;
1815 }
1816 else {
1817 if (dir > 0)
1818 res = stringlib_find_slice(
1819 PyBytes_AS_STRING(self), len,
1820 sub, sub_len, start, end);
1821 else
1822 res = stringlib_rfind_slice(
1823 PyBytes_AS_STRING(self), len,
1824 sub, sub_len, start, end);
1825 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001826
1827 if (subobj)
1828 PyBuffer_Release(&subbuf);
1829
1830 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001831}
1832
1833
1834PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001835"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001836\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001837Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001838such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001839arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001840\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001841Return -1 on failure.");
1842
Neal Norwitz6968b052007-02-27 19:02:19 +00001843static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001844bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001845{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001846 Py_ssize_t result = bytes_find_internal(self, args, +1);
1847 if (result == -2)
1848 return NULL;
1849 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001850}
1851
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001852
1853PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001854"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001855\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001856Like B.find() but raise ValueError when the substring is not found.");
1857
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001858static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001859bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001860{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001861 Py_ssize_t result = bytes_find_internal(self, args, +1);
1862 if (result == -2)
1863 return NULL;
1864 if (result == -1) {
1865 PyErr_SetString(PyExc_ValueError,
1866 "substring not found");
1867 return NULL;
1868 }
1869 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001870}
1871
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001872
1873PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001874"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001875\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001876Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001877such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001878arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001879\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001880Return -1 on failure.");
1881
Neal Norwitz6968b052007-02-27 19:02:19 +00001882static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001883bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001884{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001885 Py_ssize_t result = bytes_find_internal(self, args, -1);
1886 if (result == -2)
1887 return NULL;
1888 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001889}
1890
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001891
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001892PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001893"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001894\n\
1895Like B.rfind() but raise ValueError when the substring is not found.");
1896
1897static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001898bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001899{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001900 Py_ssize_t result = bytes_find_internal(self, args, -1);
1901 if (result == -2)
1902 return NULL;
1903 if (result == -1) {
1904 PyErr_SetString(PyExc_ValueError,
1905 "substring not found");
1906 return NULL;
1907 }
1908 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001909}
1910
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001911
1912Py_LOCAL_INLINE(PyObject *)
1913do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001914{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001915 Py_buffer vsep;
1916 char *s = PyBytes_AS_STRING(self);
1917 Py_ssize_t len = PyBytes_GET_SIZE(self);
1918 char *sep;
1919 Py_ssize_t seplen;
1920 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001921
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001922 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001923 return NULL;
1924 sep = vsep.buf;
1925 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001926
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001927 i = 0;
1928 if (striptype != RIGHTSTRIP) {
1929 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1930 i++;
1931 }
1932 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001933
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001934 j = len;
1935 if (striptype != LEFTSTRIP) {
1936 do {
1937 j--;
1938 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1939 j++;
1940 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001941
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001942 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001943
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001944 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1945 Py_INCREF(self);
1946 return (PyObject*)self;
1947 }
1948 else
1949 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001950}
1951
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001952
1953Py_LOCAL_INLINE(PyObject *)
1954do_strip(PyBytesObject *self, int striptype)
1955{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001956 char *s = PyBytes_AS_STRING(self);
1957 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001958
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001959 i = 0;
1960 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001961 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001962 i++;
1963 }
1964 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001965
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001966 j = len;
1967 if (striptype != LEFTSTRIP) {
1968 do {
1969 j--;
David Malcolm96960882010-11-05 17:23:41 +00001970 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001971 j++;
1972 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001973
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001974 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1975 Py_INCREF(self);
1976 return (PyObject*)self;
1977 }
1978 else
1979 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001980}
1981
1982
1983Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001984do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001985{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001986 if (bytes != NULL && bytes != Py_None) {
1987 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001988 }
1989 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001990}
1991
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001992/*[clinic input]
1993bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001994
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001995 self: self(type="PyBytesObject *")
1996 bytes: object = None
1997 /
1998
1999Strip leading and trailing bytes contained in the argument.
2000
2001If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2002[clinic start generated code]*/
2003
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002004static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002005bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002006/*[clinic end generated code: output=c7c228d3bd104a1b input=37daa5fad1395d95]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002007{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002008 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002009}
2010
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002011/*[clinic input]
2012bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002013
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002014 self: self(type="PyBytesObject *")
2015 bytes: object = None
2016 /
2017
2018Strip leading bytes contained in the argument.
2019
2020If the argument is omitted or None, strip leading ASCII whitespace.
2021[clinic start generated code]*/
2022
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002023static PyObject *
2024bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002025/*[clinic end generated code: output=28602e586f524e82 input=88811b09dfbc2988]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002026{
2027 return do_argstrip(self, LEFTSTRIP, bytes);
2028}
2029
2030/*[clinic input]
2031bytes.rstrip
2032
2033 self: self(type="PyBytesObject *")
2034 bytes: object = None
2035 /
2036
2037Strip trailing bytes contained in the argument.
2038
2039If the argument is omitted or None, strip trailing ASCII whitespace.
2040[clinic start generated code]*/
2041
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002042static PyObject *
2043bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002044/*[clinic end generated code: output=547e3815c95447da input=8f93c9cd361f0140]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002045{
2046 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002047}
Neal Norwitz6968b052007-02-27 19:02:19 +00002048
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002049
2050PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002051"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002052\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002053Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002054string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002055as in slice notation.");
2056
2057static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002058bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002059{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002060 PyObject *sub_obj;
2061 const char *str = PyBytes_AS_STRING(self), *sub;
2062 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02002063 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002064 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002065
Antoine Pitrouac65d962011-10-20 23:54:17 +02002066 Py_buffer vsub;
2067 PyObject *count_obj;
2068
2069 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
2070 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002071 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002072
Antoine Pitrouac65d962011-10-20 23:54:17 +02002073 if (sub_obj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002074 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02002075 return NULL;
2076
2077 sub = vsub.buf;
2078 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002079 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02002080 else {
2081 sub = &byte;
2082 sub_len = 1;
2083 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002085 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002086
Antoine Pitrouac65d962011-10-20 23:54:17 +02002087 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002088 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2089 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02002090
2091 if (sub_obj)
2092 PyBuffer_Release(&vsub);
2093
2094 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002095}
2096
2097
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002098/*[clinic input]
2099bytes.translate
2100
2101 self: self(type="PyBytesObject *")
Victor Stinner049e5092014-08-17 22:20:00 +02002102 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002103 Translation table, which must be a bytes object of length 256.
2104 [
2105 deletechars: object
2106 ]
2107 /
2108
2109Return a copy with each character mapped by the given translation table.
2110
2111All characters occurring in the optional argument deletechars are removed.
2112The remaining characters are mapped through the given translation table.
2113[clinic start generated code]*/
2114
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002115static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002116bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1,
2117 PyObject *deletechars)
2118/*[clinic end generated code: output=233df850eb50bf8d input=d8fa5519d7cc4be7]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002119{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002120 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002121 Py_buffer table_view = {NULL, NULL};
2122 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002123 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002124 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002125 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002126 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002127 Py_ssize_t inlen, tablen, dellen = 0;
2128 PyObject *result;
2129 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002130
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002131 if (PyBytes_Check(table)) {
2132 table_chars = PyBytes_AS_STRING(table);
2133 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002134 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002135 else if (table == Py_None) {
2136 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002137 tablen = 256;
2138 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002139 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002140 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002141 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002142 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002143 tablen = table_view.len;
2144 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002145
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002146 if (tablen != 256) {
2147 PyErr_SetString(PyExc_ValueError,
2148 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002149 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002150 return NULL;
2151 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002152
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002153 if (deletechars != NULL) {
2154 if (PyBytes_Check(deletechars)) {
2155 del_table_chars = PyBytes_AS_STRING(deletechars);
2156 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002157 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002158 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002159 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002160 PyBuffer_Release(&table_view);
2161 return NULL;
2162 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002163 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002164 dellen = del_table_view.len;
2165 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002166 }
2167 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002168 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002169 dellen = 0;
2170 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002171
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002172 inlen = PyBytes_GET_SIZE(input_obj);
2173 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002174 if (result == NULL) {
2175 PyBuffer_Release(&del_table_view);
2176 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002177 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002178 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002179 output_start = output = PyBytes_AsString(result);
2180 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002181
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002182 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002183 /* If no deletions are required, use faster code */
2184 for (i = inlen; --i >= 0; ) {
2185 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002186 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002187 changed = 1;
2188 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002189 if (!changed && PyBytes_CheckExact(input_obj)) {
2190 Py_INCREF(input_obj);
2191 Py_DECREF(result);
2192 result = input_obj;
2193 }
2194 PyBuffer_Release(&del_table_view);
2195 PyBuffer_Release(&table_view);
2196 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002197 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002198
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002199 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002200 for (i = 0; i < 256; i++)
2201 trans_table[i] = Py_CHARMASK(i);
2202 } else {
2203 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002204 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002205 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002206 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002207
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002208 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002209 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002210 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002211
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002212 for (i = inlen; --i >= 0; ) {
2213 c = Py_CHARMASK(*input++);
2214 if (trans_table[c] != -1)
2215 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2216 continue;
2217 changed = 1;
2218 }
2219 if (!changed && PyBytes_CheckExact(input_obj)) {
2220 Py_DECREF(result);
2221 Py_INCREF(input_obj);
2222 return input_obj;
2223 }
2224 /* Fix the size of the resulting string */
2225 if (inlen > 0)
2226 _PyBytes_Resize(&result, output - output_start);
2227 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002228}
2229
2230
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002231/*[clinic input]
2232
2233@staticmethod
2234bytes.maketrans
2235
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002236 frm: Py_buffer
2237 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002238 /
2239
2240Return a translation table useable for the bytes or bytearray translate method.
2241
2242The returned table will be one where each byte in frm is mapped to the byte at
2243the same position in to.
2244
2245The bytes objects frm and to must be of the same length.
2246[clinic start generated code]*/
2247
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002248static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002249bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002250/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002251{
2252 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002253}
2254
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002255/* find and count characters and substrings */
2256
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002257#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002258 ((char *)memchr((const void *)(target), c, target_len))
2259
2260/* String ops must return a string. */
2261/* If the object is subclass of string, create a copy */
2262Py_LOCAL(PyBytesObject *)
2263return_self(PyBytesObject *self)
2264{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002265 if (PyBytes_CheckExact(self)) {
2266 Py_INCREF(self);
2267 return self;
2268 }
2269 return (PyBytesObject *)PyBytes_FromStringAndSize(
2270 PyBytes_AS_STRING(self),
2271 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002272}
2273
2274Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00002275countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002276{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002277 Py_ssize_t count=0;
2278 const char *start=target;
2279 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002280
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002281 while ( (start=findchar(start, end-start, c)) != NULL ) {
2282 count++;
2283 if (count >= maxcount)
2284 break;
2285 start += 1;
2286 }
2287 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002288}
2289
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002290
2291/* Algorithms for different cases of string replacement */
2292
2293/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2294Py_LOCAL(PyBytesObject *)
2295replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002296 const char *to_s, Py_ssize_t to_len,
2297 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002298{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002299 char *self_s, *result_s;
2300 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002301 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002302 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002303
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002304 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002305
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002306 /* 1 at the end plus 1 after every character;
2307 count = min(maxcount, self_len + 1) */
2308 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002309 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002310 else
2311 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2312 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002313
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002314 /* Check for overflow */
2315 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002316 assert(count > 0);
2317 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002318 PyErr_SetString(PyExc_OverflowError,
2319 "replacement bytes are too long");
2320 return NULL;
2321 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002322 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002323
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002324 if (! (result = (PyBytesObject *)
2325 PyBytes_FromStringAndSize(NULL, result_len)) )
2326 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002327
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002328 self_s = PyBytes_AS_STRING(self);
2329 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002330
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002331 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002332
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002333 /* Lay the first one down (guaranteed this will occur) */
2334 Py_MEMCPY(result_s, to_s, to_len);
2335 result_s += to_len;
2336 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002337
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002338 for (i=0; i<count; i++) {
2339 *result_s++ = *self_s++;
2340 Py_MEMCPY(result_s, to_s, to_len);
2341 result_s += to_len;
2342 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002343
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002344 /* Copy the rest of the original string */
2345 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002346
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002347 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002348}
2349
2350/* Special case for deleting a single character */
2351/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2352Py_LOCAL(PyBytesObject *)
2353replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002354 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002355{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002356 char *self_s, *result_s;
2357 char *start, *next, *end;
2358 Py_ssize_t self_len, result_len;
2359 Py_ssize_t count;
2360 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002361
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002362 self_len = PyBytes_GET_SIZE(self);
2363 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002364
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002365 count = countchar(self_s, self_len, from_c, maxcount);
2366 if (count == 0) {
2367 return return_self(self);
2368 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002369
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002370 result_len = self_len - count; /* from_len == 1 */
2371 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002372
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002373 if ( (result = (PyBytesObject *)
2374 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2375 return NULL;
2376 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002377
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002378 start = self_s;
2379 end = self_s + self_len;
2380 while (count-- > 0) {
2381 next = findchar(start, end-start, from_c);
2382 if (next == NULL)
2383 break;
2384 Py_MEMCPY(result_s, start, next-start);
2385 result_s += (next-start);
2386 start = next+1;
2387 }
2388 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002389
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002390 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002391}
2392
2393/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2394
2395Py_LOCAL(PyBytesObject *)
2396replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002397 const char *from_s, Py_ssize_t from_len,
2398 Py_ssize_t maxcount) {
2399 char *self_s, *result_s;
2400 char *start, *next, *end;
2401 Py_ssize_t self_len, result_len;
2402 Py_ssize_t count, offset;
2403 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002404
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002405 self_len = PyBytes_GET_SIZE(self);
2406 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002407
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002408 count = stringlib_count(self_s, self_len,
2409 from_s, from_len,
2410 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002411
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002412 if (count == 0) {
2413 /* no matches */
2414 return return_self(self);
2415 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002416
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002417 result_len = self_len - (count * from_len);
2418 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002419
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002420 if ( (result = (PyBytesObject *)
2421 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2422 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002423
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002424 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002425
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002426 start = self_s;
2427 end = self_s + self_len;
2428 while (count-- > 0) {
2429 offset = stringlib_find(start, end-start,
2430 from_s, from_len,
2431 0);
2432 if (offset == -1)
2433 break;
2434 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002435
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002436 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002437
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002438 result_s += (next-start);
2439 start = next+from_len;
2440 }
2441 Py_MEMCPY(result_s, start, end-start);
2442 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002443}
2444
2445/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2446Py_LOCAL(PyBytesObject *)
2447replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002448 char from_c, char to_c,
2449 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002450{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002451 char *self_s, *result_s, *start, *end, *next;
2452 Py_ssize_t self_len;
2453 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002454
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002455 /* The result string will be the same size */
2456 self_s = PyBytes_AS_STRING(self);
2457 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002458
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002459 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002460
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002461 if (next == NULL) {
2462 /* No matches; return the original string */
2463 return return_self(self);
2464 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002465
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002466 /* Need to make a new string */
2467 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2468 if (result == NULL)
2469 return NULL;
2470 result_s = PyBytes_AS_STRING(result);
2471 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002472
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002473 /* change everything in-place, starting with this one */
2474 start = result_s + (next-self_s);
2475 *start = to_c;
2476 start++;
2477 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002478
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002479 while (--maxcount > 0) {
2480 next = findchar(start, end-start, from_c);
2481 if (next == NULL)
2482 break;
2483 *next = to_c;
2484 start = next+1;
2485 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002486
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002487 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002488}
2489
2490/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2491Py_LOCAL(PyBytesObject *)
2492replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002493 const char *from_s, Py_ssize_t from_len,
2494 const char *to_s, Py_ssize_t to_len,
2495 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002496{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002497 char *result_s, *start, *end;
2498 char *self_s;
2499 Py_ssize_t self_len, offset;
2500 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002501
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002502 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002503
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002504 self_s = PyBytes_AS_STRING(self);
2505 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002506
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002507 offset = stringlib_find(self_s, self_len,
2508 from_s, from_len,
2509 0);
2510 if (offset == -1) {
2511 /* No matches; return the original string */
2512 return return_self(self);
2513 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002514
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002515 /* Need to make a new string */
2516 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2517 if (result == NULL)
2518 return NULL;
2519 result_s = PyBytes_AS_STRING(result);
2520 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002521
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002522 /* change everything in-place, starting with this one */
2523 start = result_s + offset;
2524 Py_MEMCPY(start, to_s, from_len);
2525 start += from_len;
2526 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002527
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002528 while ( --maxcount > 0) {
2529 offset = stringlib_find(start, end-start,
2530 from_s, from_len,
2531 0);
2532 if (offset==-1)
2533 break;
2534 Py_MEMCPY(start+offset, to_s, from_len);
2535 start += offset+from_len;
2536 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002537
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002538 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002539}
2540
2541/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2542Py_LOCAL(PyBytesObject *)
2543replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002544 char from_c,
2545 const char *to_s, Py_ssize_t to_len,
2546 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002547{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002548 char *self_s, *result_s;
2549 char *start, *next, *end;
2550 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002551 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002552 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002553
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002554 self_s = PyBytes_AS_STRING(self);
2555 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002556
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002557 count = countchar(self_s, self_len, from_c, maxcount);
2558 if (count == 0) {
2559 /* no matches, return unchanged */
2560 return return_self(self);
2561 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002562
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002563 /* use the difference between current and new, hence the "-1" */
2564 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002565 assert(count > 0);
2566 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002567 PyErr_SetString(PyExc_OverflowError,
2568 "replacement bytes are too long");
2569 return NULL;
2570 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002571 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002572
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002573 if ( (result = (PyBytesObject *)
2574 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2575 return NULL;
2576 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002577
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002578 start = self_s;
2579 end = self_s + self_len;
2580 while (count-- > 0) {
2581 next = findchar(start, end-start, from_c);
2582 if (next == NULL)
2583 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002584
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002585 if (next == start) {
2586 /* replace with the 'to' */
2587 Py_MEMCPY(result_s, to_s, to_len);
2588 result_s += to_len;
2589 start += 1;
2590 } else {
2591 /* copy the unchanged old then the 'to' */
2592 Py_MEMCPY(result_s, start, next-start);
2593 result_s += (next-start);
2594 Py_MEMCPY(result_s, to_s, to_len);
2595 result_s += to_len;
2596 start = next+1;
2597 }
2598 }
2599 /* Copy the remainder of the remaining string */
2600 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002601
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002602 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002603}
2604
2605/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2606Py_LOCAL(PyBytesObject *)
2607replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002608 const char *from_s, Py_ssize_t from_len,
2609 const char *to_s, Py_ssize_t to_len,
2610 Py_ssize_t maxcount) {
2611 char *self_s, *result_s;
2612 char *start, *next, *end;
2613 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002614 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002615 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002616
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002617 self_s = PyBytes_AS_STRING(self);
2618 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002619
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002620 count = stringlib_count(self_s, self_len,
2621 from_s, from_len,
2622 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002623
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002624 if (count == 0) {
2625 /* no matches, return unchanged */
2626 return return_self(self);
2627 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002628
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002629 /* Check for overflow */
2630 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002631 assert(count > 0);
2632 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002633 PyErr_SetString(PyExc_OverflowError,
2634 "replacement bytes are too long");
2635 return NULL;
2636 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002637 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002638
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002639 if ( (result = (PyBytesObject *)
2640 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2641 return NULL;
2642 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002643
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002644 start = self_s;
2645 end = self_s + self_len;
2646 while (count-- > 0) {
2647 offset = stringlib_find(start, end-start,
2648 from_s, from_len,
2649 0);
2650 if (offset == -1)
2651 break;
2652 next = start+offset;
2653 if (next == start) {
2654 /* replace with the 'to' */
2655 Py_MEMCPY(result_s, to_s, to_len);
2656 result_s += to_len;
2657 start += from_len;
2658 } else {
2659 /* copy the unchanged old then the 'to' */
2660 Py_MEMCPY(result_s, start, next-start);
2661 result_s += (next-start);
2662 Py_MEMCPY(result_s, to_s, to_len);
2663 result_s += to_len;
2664 start = next+from_len;
2665 }
2666 }
2667 /* Copy the remainder of the remaining string */
2668 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002669
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002670 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002671}
2672
2673
2674Py_LOCAL(PyBytesObject *)
2675replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002676 const char *from_s, Py_ssize_t from_len,
2677 const char *to_s, Py_ssize_t to_len,
2678 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002679{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002680 if (maxcount < 0) {
2681 maxcount = PY_SSIZE_T_MAX;
2682 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2683 /* nothing to do; return the original string */
2684 return return_self(self);
2685 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002686
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002687 if (maxcount == 0 ||
2688 (from_len == 0 && to_len == 0)) {
2689 /* nothing to do; return the original string */
2690 return return_self(self);
2691 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002692
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002693 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002694
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002695 if (from_len == 0) {
2696 /* insert the 'to' string everywhere. */
2697 /* >>> "Python".replace("", ".") */
2698 /* '.P.y.t.h.o.n.' */
2699 return replace_interleave(self, to_s, to_len, maxcount);
2700 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002701
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002702 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2703 /* point for an empty self string to generate a non-empty string */
2704 /* Special case so the remaining code always gets a non-empty string */
2705 if (PyBytes_GET_SIZE(self) == 0) {
2706 return return_self(self);
2707 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002708
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002709 if (to_len == 0) {
2710 /* delete all occurrences of 'from' string */
2711 if (from_len == 1) {
2712 return replace_delete_single_character(
2713 self, from_s[0], maxcount);
2714 } else {
2715 return replace_delete_substring(self, from_s,
2716 from_len, maxcount);
2717 }
2718 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002719
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002720 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002721
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002722 if (from_len == to_len) {
2723 if (from_len == 1) {
2724 return replace_single_character_in_place(
2725 self,
2726 from_s[0],
2727 to_s[0],
2728 maxcount);
2729 } else {
2730 return replace_substring_in_place(
2731 self, from_s, from_len, to_s, to_len,
2732 maxcount);
2733 }
2734 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002735
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002736 /* Otherwise use the more generic algorithms */
2737 if (from_len == 1) {
2738 return replace_single_character(self, from_s[0],
2739 to_s, to_len, maxcount);
2740 } else {
2741 /* len('from')>=2, len('to')>=1 */
2742 return replace_substring(self, from_s, from_len, to_s, to_len,
2743 maxcount);
2744 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002745}
2746
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002747
2748/*[clinic input]
2749bytes.replace
2750
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002751 old: Py_buffer
2752 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002753 count: Py_ssize_t = -1
2754 Maximum number of occurrences to replace.
2755 -1 (the default value) means replace all occurrences.
2756 /
2757
2758Return a copy with all occurrences of substring old replaced by new.
2759
2760If the optional argument count is given, only the first count occurrences are
2761replaced.
2762[clinic start generated code]*/
2763
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002764static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002765bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new,
2766 Py_ssize_t count)
2767/*[clinic end generated code: output=403dc9d7a83c5a1d input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002768{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002769 return (PyObject *)replace((PyBytesObject *) self,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002770 (const char *)old->buf, old->len,
2771 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002772}
2773
2774/** End DALKE **/
2775
2776/* Matches the end (direction >= 0) or start (direction < 0) of self
2777 * against substr, using the start and end arguments. Returns
2778 * -1 on error, 0 if not found and 1 if found.
2779 */
2780Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002781_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002782 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002783{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002784 Py_ssize_t len = PyBytes_GET_SIZE(self);
2785 Py_ssize_t slen;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002786 Py_buffer sub_view = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002787 const char* sub;
2788 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002789
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002790 if (PyBytes_Check(substr)) {
2791 sub = PyBytes_AS_STRING(substr);
2792 slen = PyBytes_GET_SIZE(substr);
2793 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002794 else {
2795 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
2796 return -1;
2797 sub = sub_view.buf;
2798 slen = sub_view.len;
2799 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002800 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002801
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002802 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002803
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002804 if (direction < 0) {
2805 /* startswith */
2806 if (start+slen > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002807 goto notfound;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002808 } else {
2809 /* endswith */
2810 if (end-start < slen || start > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002811 goto notfound;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002812
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002813 if (end-slen > start)
2814 start = end - slen;
2815 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002816 if (end-start < slen)
2817 goto notfound;
2818 if (memcmp(str+start, sub, slen) != 0)
2819 goto notfound;
2820
2821 PyBuffer_Release(&sub_view);
2822 return 1;
2823
2824notfound:
2825 PyBuffer_Release(&sub_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002826 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002827}
2828
2829
2830PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002831"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002832\n\
2833Return True if B starts with the specified prefix, False otherwise.\n\
2834With optional start, test B beginning at that position.\n\
2835With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002836prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002837
2838static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002839bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002840{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002841 Py_ssize_t start = 0;
2842 Py_ssize_t end = PY_SSIZE_T_MAX;
2843 PyObject *subobj;
2844 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002845
Jesus Ceaac451502011-04-20 17:09:23 +02002846 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002847 return NULL;
2848 if (PyTuple_Check(subobj)) {
2849 Py_ssize_t i;
2850 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2851 result = _bytes_tailmatch(self,
2852 PyTuple_GET_ITEM(subobj, i),
2853 start, end, -1);
2854 if (result == -1)
2855 return NULL;
2856 else if (result) {
2857 Py_RETURN_TRUE;
2858 }
2859 }
2860 Py_RETURN_FALSE;
2861 }
2862 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002863 if (result == -1) {
2864 if (PyErr_ExceptionMatches(PyExc_TypeError))
2865 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2866 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002867 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002868 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002869 else
2870 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002871}
2872
2873
2874PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002875"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002876\n\
2877Return True if B ends with the specified suffix, False otherwise.\n\
2878With optional start, test B beginning at that position.\n\
2879With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002880suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002881
2882static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002883bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002884{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002885 Py_ssize_t start = 0;
2886 Py_ssize_t end = PY_SSIZE_T_MAX;
2887 PyObject *subobj;
2888 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002889
Jesus Ceaac451502011-04-20 17:09:23 +02002890 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002891 return NULL;
2892 if (PyTuple_Check(subobj)) {
2893 Py_ssize_t i;
2894 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2895 result = _bytes_tailmatch(self,
2896 PyTuple_GET_ITEM(subobj, i),
2897 start, end, +1);
2898 if (result == -1)
2899 return NULL;
2900 else if (result) {
2901 Py_RETURN_TRUE;
2902 }
2903 }
2904 Py_RETURN_FALSE;
2905 }
2906 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002907 if (result == -1) {
2908 if (PyErr_ExceptionMatches(PyExc_TypeError))
2909 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2910 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002911 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002912 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002913 else
2914 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002915}
2916
2917
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002918/*[clinic input]
2919bytes.decode
2920
2921 encoding: str(c_default="NULL") = 'utf-8'
2922 The encoding with which to decode the bytes.
2923 errors: str(c_default="NULL") = 'strict'
2924 The error handling scheme to use for the handling of decoding errors.
2925 The default is 'strict' meaning that decoding errors raise a
2926 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2927 as well as any other name registered with codecs.register_error that
2928 can handle UnicodeDecodeErrors.
2929
2930Decode the bytes using the codec registered for encoding.
2931[clinic start generated code]*/
2932
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002933static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002934bytes_decode_impl(PyBytesObject*self, const char *encoding,
2935 const char *errors)
2936/*[clinic end generated code: output=2d2016ff8e0bb176 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002937{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002938 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002939}
2940
Guido van Rossum20188312006-05-05 15:15:40 +00002941
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002942/*[clinic input]
2943bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002944
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002945 keepends: int(py_default="False") = 0
2946
2947Return a list of the lines in the bytes, breaking at line boundaries.
2948
2949Line breaks are not included in the resulting list unless keepends is given and
2950true.
2951[clinic start generated code]*/
2952
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002953static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002954bytes_splitlines_impl(PyBytesObject*self, int keepends)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002955/*[clinic end generated code: output=995c3598f7833cad input=ddb93e3351080c8c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002956{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002957 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002958 (PyObject*) self, PyBytes_AS_STRING(self),
2959 PyBytes_GET_SIZE(self), keepends
2960 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002961}
2962
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002963static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002964hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002965{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002966 if (c >= 128)
2967 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002968 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002969 return c - '0';
2970 else {
David Malcolm96960882010-11-05 17:23:41 +00002971 if (Py_ISUPPER(c))
2972 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002973 if (c >= 'a' && c <= 'f')
2974 return c - 'a' + 10;
2975 }
2976 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002977}
2978
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002979/*[clinic input]
2980@classmethod
2981bytes.fromhex
2982
2983 string: unicode
2984 /
2985
2986Create a bytes object from a string of hexadecimal numbers.
2987
2988Spaces between two numbers are accepted.
2989Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2990[clinic start generated code]*/
2991
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002992static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002993bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002994/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002995{
2996 PyObject *newstring;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002997 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002998 Py_ssize_t hexlen, byteslen, i, j;
2999 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003000 void *data;
3001 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003002
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003003 assert(PyUnicode_Check(string));
3004 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003005 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003006 kind = PyUnicode_KIND(string);
3007 data = PyUnicode_DATA(string);
3008 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003009
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003010 byteslen = hexlen/2; /* This overestimates if there are spaces */
3011 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
3012 if (!newstring)
3013 return NULL;
3014 buf = PyBytes_AS_STRING(newstring);
3015 for (i = j = 0; i < hexlen; i += 2) {
3016 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003017 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003018 i++;
3019 if (i >= hexlen)
3020 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003021 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
3022 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003023 if (top == -1 || bot == -1) {
3024 PyErr_Format(PyExc_ValueError,
3025 "non-hexadecimal number found in "
3026 "fromhex() arg at position %zd", i);
3027 goto error;
3028 }
3029 buf[j++] = (top << 4) + bot;
3030 }
3031 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
3032 goto error;
3033 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003034
3035 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003036 Py_XDECREF(newstring);
3037 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003038}
3039
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003040static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003041bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003042{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003043 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003044}
3045
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003046
3047static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003048bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003049 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
3050 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3051 _Py_capitalize__doc__},
3052 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3053 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003054 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003055 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
3056 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02003057 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003058 expandtabs__doc__},
3059 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003060 BYTES_FROMHEX_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003061 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3062 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3063 _Py_isalnum__doc__},
3064 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3065 _Py_isalpha__doc__},
3066 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3067 _Py_isdigit__doc__},
3068 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3069 _Py_islower__doc__},
3070 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3071 _Py_isspace__doc__},
3072 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3073 _Py_istitle__doc__},
3074 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3075 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003076 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003077 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3078 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003079 BYTES_LSTRIP_METHODDEF
3080 BYTES_MAKETRANS_METHODDEF
3081 BYTES_PARTITION_METHODDEF
3082 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003083 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3084 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3085 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003086 BYTES_RPARTITION_METHODDEF
3087 BYTES_RSPLIT_METHODDEF
3088 BYTES_RSTRIP_METHODDEF
3089 BYTES_SPLIT_METHODDEF
3090 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003091 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
3092 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003093 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003094 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3095 _Py_swapcase__doc__},
3096 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003097 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003098 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3099 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003100 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003101};
3102
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003103static PyObject *
Ethan Furmanb95b5612015-01-23 20:05:18 -08003104bytes_mod(PyObject *v, PyObject *w)
3105{
3106 if (!PyBytes_Check(v))
3107 Py_RETURN_NOTIMPLEMENTED;
3108 return _PyBytes_Format(v, w);
3109}
3110
3111static PyNumberMethods bytes_as_number = {
3112 0, /*nb_add*/
3113 0, /*nb_subtract*/
3114 0, /*nb_multiply*/
3115 bytes_mod, /*nb_remainder*/
3116};
3117
3118static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003119str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3120
3121static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003122bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003123{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003124 PyObject *x = NULL;
3125 const char *encoding = NULL;
3126 const char *errors = NULL;
3127 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003128 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003129 Py_ssize_t size;
3130 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003131 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003132
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003133 if (type != &PyBytes_Type)
3134 return str_subtype_new(type, args, kwds);
3135 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3136 &encoding, &errors))
3137 return NULL;
3138 if (x == NULL) {
3139 if (encoding != NULL || errors != NULL) {
3140 PyErr_SetString(PyExc_TypeError,
3141 "encoding or errors without sequence "
3142 "argument");
3143 return NULL;
3144 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003145 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003146 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003147
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003148 if (PyUnicode_Check(x)) {
3149 /* Encode via the codec registry */
3150 if (encoding == NULL) {
3151 PyErr_SetString(PyExc_TypeError,
3152 "string argument without an encoding");
3153 return NULL;
3154 }
3155 new = PyUnicode_AsEncodedString(x, encoding, errors);
3156 if (new == NULL)
3157 return NULL;
3158 assert(PyBytes_Check(new));
3159 return new;
3160 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003161
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003162 /* If it's not unicode, there can't be encoding or errors */
3163 if (encoding != NULL || errors != NULL) {
3164 PyErr_SetString(PyExc_TypeError,
3165 "encoding or errors without a string argument");
3166 return NULL;
3167 }
3168
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003169 /* We'd like to call PyObject_Bytes here, but we need to check for an
3170 integer argument before deferring to PyBytes_FromObject, something
3171 PyObject_Bytes doesn't do. */
3172 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3173 if (func != NULL) {
3174 new = PyObject_CallFunctionObjArgs(func, NULL);
3175 Py_DECREF(func);
3176 if (new == NULL)
3177 return NULL;
3178 if (!PyBytes_Check(new)) {
3179 PyErr_Format(PyExc_TypeError,
3180 "__bytes__ returned non-bytes (type %.200s)",
3181 Py_TYPE(new)->tp_name);
3182 Py_DECREF(new);
3183 return NULL;
3184 }
3185 return new;
3186 }
3187 else if (PyErr_Occurred())
3188 return NULL;
3189
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003190 /* Is it an integer? */
3191 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3192 if (size == -1 && PyErr_Occurred()) {
3193 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3194 return NULL;
3195 PyErr_Clear();
3196 }
3197 else if (size < 0) {
3198 PyErr_SetString(PyExc_ValueError, "negative count");
3199 return NULL;
3200 }
3201 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003202 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003203 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003204 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003205 return new;
3206 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003207
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003208 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003209}
3210
3211PyObject *
3212PyBytes_FromObject(PyObject *x)
3213{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003214 PyObject *new, *it;
3215 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003216
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003217 if (x == NULL) {
3218 PyErr_BadInternalCall();
3219 return NULL;
3220 }
Larry Hastingsca28e992012-05-24 22:58:30 -07003221
3222 if (PyBytes_CheckExact(x)) {
3223 Py_INCREF(x);
3224 return x;
3225 }
3226
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003227 /* Use the modern buffer interface */
3228 if (PyObject_CheckBuffer(x)) {
3229 Py_buffer view;
3230 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3231 return NULL;
3232 new = PyBytes_FromStringAndSize(NULL, view.len);
3233 if (!new)
3234 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003235 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3236 &view, view.len, 'C') < 0)
3237 goto fail;
3238 PyBuffer_Release(&view);
3239 return new;
3240 fail:
3241 Py_XDECREF(new);
3242 PyBuffer_Release(&view);
3243 return NULL;
3244 }
3245 if (PyUnicode_Check(x)) {
3246 PyErr_SetString(PyExc_TypeError,
3247 "cannot convert unicode object to bytes");
3248 return NULL;
3249 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003250
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003251 if (PyList_CheckExact(x)) {
3252 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3253 if (new == NULL)
3254 return NULL;
3255 for (i = 0; i < Py_SIZE(x); i++) {
3256 Py_ssize_t value = PyNumber_AsSsize_t(
3257 PyList_GET_ITEM(x, i), PyExc_ValueError);
3258 if (value == -1 && PyErr_Occurred()) {
3259 Py_DECREF(new);
3260 return NULL;
3261 }
3262 if (value < 0 || value >= 256) {
3263 PyErr_SetString(PyExc_ValueError,
3264 "bytes must be in range(0, 256)");
3265 Py_DECREF(new);
3266 return NULL;
3267 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003268 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003269 }
3270 return new;
3271 }
3272 if (PyTuple_CheckExact(x)) {
3273 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3274 if (new == NULL)
3275 return NULL;
3276 for (i = 0; i < Py_SIZE(x); i++) {
3277 Py_ssize_t value = PyNumber_AsSsize_t(
3278 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
3279 if (value == -1 && PyErr_Occurred()) {
3280 Py_DECREF(new);
3281 return NULL;
3282 }
3283 if (value < 0 || value >= 256) {
3284 PyErr_SetString(PyExc_ValueError,
3285 "bytes must be in range(0, 256)");
3286 Py_DECREF(new);
3287 return NULL;
3288 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003289 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003290 }
3291 return new;
3292 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00003293
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003294 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003295 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003296 if (size == -1 && PyErr_Occurred())
3297 return NULL;
3298 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
3299 returning a shared empty bytes string. This required because we
3300 want to call _PyBytes_Resize() the returned object, which we can
3301 only do on bytes objects with refcount == 1. */
Victor Stinner88d146b2014-08-17 21:12:18 +02003302 if (size == 0)
3303 size = 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003304 new = PyBytes_FromStringAndSize(NULL, size);
3305 if (new == NULL)
3306 return NULL;
Victor Stinner88d146b2014-08-17 21:12:18 +02003307 assert(Py_REFCNT(new) == 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003308
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003309 /* Get the iterator */
3310 it = PyObject_GetIter(x);
3311 if (it == NULL)
3312 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003313
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003314 /* Run the iterator to exhaustion */
3315 for (i = 0; ; i++) {
3316 PyObject *item;
3317 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003318
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003319 /* Get the next item */
3320 item = PyIter_Next(it);
3321 if (item == NULL) {
3322 if (PyErr_Occurred())
3323 goto error;
3324 break;
3325 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003326
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003327 /* Interpret it as an int (__index__) */
3328 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3329 Py_DECREF(item);
3330 if (value == -1 && PyErr_Occurred())
3331 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003332
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003333 /* Range check */
3334 if (value < 0 || value >= 256) {
3335 PyErr_SetString(PyExc_ValueError,
3336 "bytes must be in range(0, 256)");
3337 goto error;
3338 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003339
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003340 /* Append the byte */
3341 if (i >= size) {
3342 size = 2 * size + 1;
3343 if (_PyBytes_Resize(&new, size) < 0)
3344 goto error;
3345 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003346 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003347 }
3348 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003349
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003350 /* Clean up and return success */
3351 Py_DECREF(it);
3352 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003353
3354 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003355 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01003356 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003357 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003358}
3359
3360static PyObject *
3361str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3362{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003363 PyObject *tmp, *pnew;
3364 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003365
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003366 assert(PyType_IsSubtype(type, &PyBytes_Type));
3367 tmp = bytes_new(&PyBytes_Type, args, kwds);
3368 if (tmp == NULL)
3369 return NULL;
3370 assert(PyBytes_CheckExact(tmp));
3371 n = PyBytes_GET_SIZE(tmp);
3372 pnew = type->tp_alloc(type, n);
3373 if (pnew != NULL) {
3374 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3375 PyBytes_AS_STRING(tmp), n+1);
3376 ((PyBytesObject *)pnew)->ob_shash =
3377 ((PyBytesObject *)tmp)->ob_shash;
3378 }
3379 Py_DECREF(tmp);
3380 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003381}
3382
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003383PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003384"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003385bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003386bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003387bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3388bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003389\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003390Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003391 - an iterable yielding integers in range(256)\n\
3392 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003393 - any object implementing the buffer API.\n\
3394 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003395
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003396static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003397
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003398PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003399 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3400 "bytes",
3401 PyBytesObject_SIZE,
3402 sizeof(char),
3403 bytes_dealloc, /* tp_dealloc */
3404 0, /* tp_print */
3405 0, /* tp_getattr */
3406 0, /* tp_setattr */
3407 0, /* tp_reserved */
3408 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08003409 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003410 &bytes_as_sequence, /* tp_as_sequence */
3411 &bytes_as_mapping, /* tp_as_mapping */
3412 (hashfunc)bytes_hash, /* tp_hash */
3413 0, /* tp_call */
3414 bytes_str, /* tp_str */
3415 PyObject_GenericGetAttr, /* tp_getattro */
3416 0, /* tp_setattro */
3417 &bytes_as_buffer, /* tp_as_buffer */
3418 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3419 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3420 bytes_doc, /* tp_doc */
3421 0, /* tp_traverse */
3422 0, /* tp_clear */
3423 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3424 0, /* tp_weaklistoffset */
3425 bytes_iter, /* tp_iter */
3426 0, /* tp_iternext */
3427 bytes_methods, /* tp_methods */
3428 0, /* tp_members */
3429 0, /* tp_getset */
3430 &PyBaseObject_Type, /* tp_base */
3431 0, /* tp_dict */
3432 0, /* tp_descr_get */
3433 0, /* tp_descr_set */
3434 0, /* tp_dictoffset */
3435 0, /* tp_init */
3436 0, /* tp_alloc */
3437 bytes_new, /* tp_new */
3438 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003439};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003440
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003441void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003442PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003443{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003444 assert(pv != NULL);
3445 if (*pv == NULL)
3446 return;
3447 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003448 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003449 return;
3450 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003451
3452 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3453 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003454 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003455 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02003456
Antoine Pitrou161d6952014-05-01 14:36:20 +02003457 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003458 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02003459 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3460 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3461 Py_CLEAR(*pv);
3462 return;
3463 }
3464
3465 oldsize = PyBytes_GET_SIZE(*pv);
3466 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3467 PyErr_NoMemory();
3468 goto error;
3469 }
3470 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3471 goto error;
3472
3473 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3474 PyBuffer_Release(&wb);
3475 return;
3476
3477 error:
3478 PyBuffer_Release(&wb);
3479 Py_CLEAR(*pv);
3480 return;
3481 }
3482
3483 else {
3484 /* Multiple references, need to create new object */
3485 PyObject *v;
3486 v = bytes_concat(*pv, w);
3487 Py_DECREF(*pv);
3488 *pv = v;
3489 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003490}
3491
3492void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003493PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003494{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003495 PyBytes_Concat(pv, w);
3496 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003497}
3498
3499
Ethan Furmanb95b5612015-01-23 20:05:18 -08003500/* The following function breaks the notion that bytes are immutable:
3501 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003502 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08003503 as creating a new bytes object and destroying the old one, only
3504 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003505 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08003506 Note that if there's not enough memory to resize the bytes object, the
3507 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003508 memory" exception is set, and -1 is returned. Else (on success) 0 is
3509 returned, and the value in *pv may or may not be the same as on input.
3510 As always, an extra byte is allocated for a trailing \0 byte (newsize
3511 does *not* include that), and a trailing \0 byte is stored.
3512*/
3513
3514int
3515_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3516{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003517 PyObject *v;
3518 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003519 v = *pv;
3520 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3521 *pv = 0;
3522 Py_DECREF(v);
3523 PyErr_BadInternalCall();
3524 return -1;
3525 }
3526 /* XXX UNREF/NEWREF interface should be more symmetrical */
3527 _Py_DEC_REFTOTAL;
3528 _Py_ForgetReference(v);
3529 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003530 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003531 if (*pv == NULL) {
3532 PyObject_Del(v);
3533 PyErr_NoMemory();
3534 return -1;
3535 }
3536 _Py_NewReference(*pv);
3537 sv = (PyBytesObject *) *pv;
3538 Py_SIZE(sv) = newsize;
3539 sv->ob_sval[newsize] = '\0';
3540 sv->ob_shash = -1; /* invalidate cached hash value */
3541 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003542}
3543
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003544void
3545PyBytes_Fini(void)
3546{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003547 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003548 for (i = 0; i < UCHAR_MAX + 1; i++)
3549 Py_CLEAR(characters[i]);
3550 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003551}
3552
Benjamin Peterson4116f362008-05-27 00:36:20 +00003553/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003554
3555typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003556 PyObject_HEAD
3557 Py_ssize_t it_index;
3558 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003559} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003560
3561static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003562striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003563{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003564 _PyObject_GC_UNTRACK(it);
3565 Py_XDECREF(it->it_seq);
3566 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003567}
3568
3569static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003570striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003571{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003572 Py_VISIT(it->it_seq);
3573 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003574}
3575
3576static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003577striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003578{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003579 PyBytesObject *seq;
3580 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003581
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003582 assert(it != NULL);
3583 seq = it->it_seq;
3584 if (seq == NULL)
3585 return NULL;
3586 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003587
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003588 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3589 item = PyLong_FromLong(
3590 (unsigned char)seq->ob_sval[it->it_index]);
3591 if (item != NULL)
3592 ++it->it_index;
3593 return item;
3594 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003595
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003596 Py_DECREF(seq);
3597 it->it_seq = NULL;
3598 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003599}
3600
3601static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003602striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003603{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003604 Py_ssize_t len = 0;
3605 if (it->it_seq)
3606 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3607 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003608}
3609
3610PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003611 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003612
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003613static PyObject *
3614striter_reduce(striterobject *it)
3615{
3616 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003617 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003618 it->it_seq, it->it_index);
3619 } else {
3620 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3621 if (u == NULL)
3622 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02003623 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003624 }
3625}
3626
3627PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3628
3629static PyObject *
3630striter_setstate(striterobject *it, PyObject *state)
3631{
3632 Py_ssize_t index = PyLong_AsSsize_t(state);
3633 if (index == -1 && PyErr_Occurred())
3634 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003635 if (it->it_seq != NULL) {
3636 if (index < 0)
3637 index = 0;
3638 else if (index > PyBytes_GET_SIZE(it->it_seq))
3639 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3640 it->it_index = index;
3641 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003642 Py_RETURN_NONE;
3643}
3644
3645PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3646
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003647static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003648 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3649 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003650 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3651 reduce_doc},
3652 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3653 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003654 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003655};
3656
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003657PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003658 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3659 "bytes_iterator", /* tp_name */
3660 sizeof(striterobject), /* tp_basicsize */
3661 0, /* tp_itemsize */
3662 /* methods */
3663 (destructor)striter_dealloc, /* tp_dealloc */
3664 0, /* tp_print */
3665 0, /* tp_getattr */
3666 0, /* tp_setattr */
3667 0, /* tp_reserved */
3668 0, /* tp_repr */
3669 0, /* tp_as_number */
3670 0, /* tp_as_sequence */
3671 0, /* tp_as_mapping */
3672 0, /* tp_hash */
3673 0, /* tp_call */
3674 0, /* tp_str */
3675 PyObject_GenericGetAttr, /* tp_getattro */
3676 0, /* tp_setattro */
3677 0, /* tp_as_buffer */
3678 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3679 0, /* tp_doc */
3680 (traverseproc)striter_traverse, /* tp_traverse */
3681 0, /* tp_clear */
3682 0, /* tp_richcompare */
3683 0, /* tp_weaklistoffset */
3684 PyObject_SelfIter, /* tp_iter */
3685 (iternextfunc)striter_next, /* tp_iternext */
3686 striter_methods, /* tp_methods */
3687 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003688};
3689
3690static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003691bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003692{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003693 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003694
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003695 if (!PyBytes_Check(seq)) {
3696 PyErr_BadInternalCall();
3697 return NULL;
3698 }
3699 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3700 if (it == NULL)
3701 return NULL;
3702 it->it_index = 0;
3703 Py_INCREF(seq);
3704 it->it_seq = (PyBytesObject *)seq;
3705 _PyObject_GC_TRACK(it);
3706 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003707}