blob: b6089dd50336f7fabc8f913dc608110a826c77ec [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020010/*[clinic input]
Martin v. Löwis0efea322014-07-27 17:29:17 +020011class bytes "PyBytesObject*" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020012[clinic start generated code]*/
Martin v. Löwis0efea322014-07-27 17:29:17 +020013/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020014
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030015#include "clinic/bytesobject.c.h"
16
Christian Heimes2c9c7a52008-05-26 13:42:13 +000017#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000018Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000019#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000020
Christian Heimes2c9c7a52008-05-26 13:42:13 +000021static PyBytesObject *characters[UCHAR_MAX + 1];
22static PyBytesObject *nullstring;
23
Mark Dickinsonfd24b322008-12-06 15:33:31 +000024/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
25 for a string of length n should request PyBytesObject_SIZE + n bytes.
26
27 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
28 3 bytes per string allocation on a typical system.
29*/
30#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
31
Christian Heimes2c9c7a52008-05-26 13:42:13 +000032/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000033 For PyBytes_FromString(), the parameter `str' points to a null-terminated
34 string containing exactly `size' bytes.
35
36 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
37 either NULL or else points to a string containing at least `size' bytes.
38 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
39 not have to be null-terminated. (Therefore it is safe to construct a
40 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
41 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
42 bytes (setting the last byte to the null terminating character) and you can
43 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000044 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000045 alter the data yourself, since the strings may be shared.
46
47 The PyObject member `op->ob_size', which denotes the number of "extra
48 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020049 allocated for string data, not counting the null terminating character.
50 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000051 PyBytes_FromStringAndSize()) or the length of the string in the `str'
52 parameter (for PyBytes_FromString()).
53*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020054static PyObject *
55_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000056{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020057 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020058 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020059
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000060 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000061#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000062 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000063#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000064 Py_INCREF(op);
65 return (PyObject *)op;
66 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000067
Victor Stinner049e5092014-08-17 22:20:00 +020068 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 PyErr_SetString(PyExc_OverflowError,
70 "byte string is too large");
71 return NULL;
72 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000073
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020075 if (use_calloc)
76 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
77 else
78 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 if (op == NULL)
80 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010081 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020083 if (!use_calloc)
84 op->ob_sval[size] = '\0';
85 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000086 if (size == 0) {
87 nullstring = op;
88 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020089 }
90 return (PyObject *) op;
91}
92
93PyObject *
94PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
95{
96 PyBytesObject *op;
97 if (size < 0) {
98 PyErr_SetString(PyExc_SystemError,
99 "Negative size passed to PyBytes_FromStringAndSize");
100 return NULL;
101 }
102 if (size == 1 && str != NULL &&
103 (op = characters[*str & UCHAR_MAX]) != NULL)
104 {
105#ifdef COUNT_ALLOCS
106 one_strings++;
107#endif
108 Py_INCREF(op);
109 return (PyObject *)op;
110 }
111
112 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
113 if (op == NULL)
114 return NULL;
115 if (str == NULL)
116 return (PyObject *) op;
117
118 Py_MEMCPY(op->ob_sval, str, size);
119 /* share short strings */
120 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000121 characters[*str & UCHAR_MAX] = op;
122 Py_INCREF(op);
123 }
124 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000125}
126
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000127PyObject *
128PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000129{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200130 size_t size;
131 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000132
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000133 assert(str != NULL);
134 size = strlen(str);
135 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
136 PyErr_SetString(PyExc_OverflowError,
137 "byte string is too long");
138 return NULL;
139 }
140 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000141#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000142 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000143#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000144 Py_INCREF(op);
145 return (PyObject *)op;
146 }
147 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000148#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000150#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000151 Py_INCREF(op);
152 return (PyObject *)op;
153 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000154
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000155 /* Inline PyObject_NewVar */
156 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
157 if (op == NULL)
158 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100159 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000160 op->ob_shash = -1;
161 Py_MEMCPY(op->ob_sval, str, size+1);
162 /* share short strings */
163 if (size == 0) {
164 nullstring = op;
165 Py_INCREF(op);
166 } else if (size == 1) {
167 characters[*str & UCHAR_MAX] = op;
168 Py_INCREF(op);
169 }
170 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000171}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000172
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000173PyObject *
174PyBytes_FromFormatV(const char *format, va_list vargs)
175{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000176 va_list count;
177 Py_ssize_t n = 0;
178 const char* f;
179 char *s;
180 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000181
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000182 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000183 /* step 1: figure out how large a buffer we need */
184 for (f = format; *f; f++) {
185 if (*f == '%') {
186 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000187 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000188 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000189
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000190 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
191 * they don't affect the amount of space we reserve.
192 */
193 if ((*f == 'l' || *f == 'z') &&
194 (f[1] == 'd' || f[1] == 'u'))
195 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000196
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000197 switch (*f) {
198 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100199 {
200 int c = va_arg(count, int);
201 if (c < 0 || c > 255) {
202 PyErr_SetString(PyExc_OverflowError,
203 "PyBytes_FromFormatV(): %c format "
204 "expects an integer in range [0; 255]");
205 return NULL;
206 }
207 n++;
208 break;
209 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000210 case '%':
211 n++;
212 break;
213 case 'd': case 'u': case 'i': case 'x':
214 (void) va_arg(count, int);
215 /* 20 bytes is enough to hold a 64-bit
216 integer. Decimal takes the most space.
217 This isn't enough for octal. */
218 n += 20;
219 break;
220 case 's':
221 s = va_arg(count, char*);
222 n += strlen(s);
223 break;
224 case 'p':
225 (void) va_arg(count, int);
226 /* maximum 64-bit pointer representation:
227 * 0xffffffffffffffff
228 * so 19 characters is enough.
229 * XXX I count 18 -- what's the extra for?
230 */
231 n += 19;
232 break;
233 default:
234 /* if we stumble upon an unknown
235 formatting code, copy the rest of
236 the format string to the output
237 string. (we cannot just skip the
238 code, since there's no way to know
239 what's in the argument list) */
240 n += strlen(p);
241 goto expand;
242 }
243 } else
244 n++;
245 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000246 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000247 /* step 2: fill the buffer */
248 /* Since we've analyzed how much space we need for the worst case,
249 use sprintf directly instead of the slower PyOS_snprintf. */
250 string = PyBytes_FromStringAndSize(NULL, n);
251 if (!string)
252 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000253
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000254 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000255
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 for (f = format; *f; f++) {
257 if (*f == '%') {
258 const char* p = f++;
259 Py_ssize_t i;
260 int longflag = 0;
261 int size_tflag = 0;
262 /* parse the width.precision part (we're only
263 interested in the precision value, if any) */
264 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000265 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000266 n = (n*10) + *f++ - '0';
267 if (*f == '.') {
268 f++;
269 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000270 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000271 n = (n*10) + *f++ - '0';
272 }
David Malcolm96960882010-11-05 17:23:41 +0000273 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000274 f++;
275 /* handle the long flag, but only for %ld and %lu.
276 others can be added when necessary. */
277 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
278 longflag = 1;
279 ++f;
280 }
281 /* handle the size_t flag. */
282 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
283 size_tflag = 1;
284 ++f;
285 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000286
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000287 switch (*f) {
288 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100289 {
290 int c = va_arg(vargs, int);
291 /* c has been checked for overflow in the first step */
292 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000293 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100294 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000295 case 'd':
296 if (longflag)
297 sprintf(s, "%ld", va_arg(vargs, long));
298 else if (size_tflag)
299 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
300 va_arg(vargs, Py_ssize_t));
301 else
302 sprintf(s, "%d", va_arg(vargs, int));
303 s += strlen(s);
304 break;
305 case 'u':
306 if (longflag)
307 sprintf(s, "%lu",
308 va_arg(vargs, unsigned long));
309 else if (size_tflag)
310 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
311 va_arg(vargs, size_t));
312 else
313 sprintf(s, "%u",
314 va_arg(vargs, unsigned int));
315 s += strlen(s);
316 break;
317 case 'i':
318 sprintf(s, "%i", va_arg(vargs, int));
319 s += strlen(s);
320 break;
321 case 'x':
322 sprintf(s, "%x", va_arg(vargs, int));
323 s += strlen(s);
324 break;
325 case 's':
326 p = va_arg(vargs, char*);
327 i = strlen(p);
328 if (n > 0 && i > n)
329 i = n;
330 Py_MEMCPY(s, p, i);
331 s += i;
332 break;
333 case 'p':
334 sprintf(s, "%p", va_arg(vargs, void*));
335 /* %p is ill-defined: ensure leading 0x. */
336 if (s[1] == 'X')
337 s[1] = 'x';
338 else if (s[1] != 'x') {
339 memmove(s+2, s, strlen(s)+1);
340 s[0] = '0';
341 s[1] = 'x';
342 }
343 s += strlen(s);
344 break;
345 case '%':
346 *s++ = '%';
347 break;
348 default:
349 strcpy(s, p);
350 s += strlen(s);
351 goto end;
352 }
353 } else
354 *s++ = *f;
355 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000356
357 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000358 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
359 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000360}
361
362PyObject *
363PyBytes_FromFormat(const char *format, ...)
364{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000365 PyObject* ret;
366 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000367
368#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000369 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000370#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000372#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000373 ret = PyBytes_FromFormatV(format, vargs);
374 va_end(vargs);
375 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000376}
377
Ethan Furmanb95b5612015-01-23 20:05:18 -0800378/* Helpers for formatstring */
379
380Py_LOCAL_INLINE(PyObject *)
381getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
382{
383 Py_ssize_t argidx = *p_argidx;
384 if (argidx < arglen) {
385 (*p_argidx)++;
386 if (arglen < 0)
387 return args;
388 else
389 return PyTuple_GetItem(args, argidx);
390 }
391 PyErr_SetString(PyExc_TypeError,
392 "not enough arguments for format string");
393 return NULL;
394}
395
396/* Format codes
397 * F_LJUST '-'
398 * F_SIGN '+'
399 * F_BLANK ' '
400 * F_ALT '#'
401 * F_ZERO '0'
402 */
403#define F_LJUST (1<<0)
404#define F_SIGN (1<<1)
405#define F_BLANK (1<<2)
406#define F_ALT (1<<3)
407#define F_ZERO (1<<4)
408
409/* Returns a new reference to a PyBytes object, or NULL on failure. */
410
411static PyObject *
412formatfloat(PyObject *v, int flags, int prec, int type)
413{
414 char *p;
415 PyObject *result;
416 double x;
417
418 x = PyFloat_AsDouble(v);
419 if (x == -1.0 && PyErr_Occurred()) {
420 PyErr_Format(PyExc_TypeError, "float argument required, "
421 "not %.200s", Py_TYPE(v)->tp_name);
422 return NULL;
423 }
424
425 if (prec < 0)
426 prec = 6;
427
428 p = PyOS_double_to_string(x, type, prec,
429 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
430
431 if (p == NULL)
432 return NULL;
433 result = PyBytes_FromStringAndSize(p, strlen(p));
434 PyMem_Free(p);
435 return result;
436}
437
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300438static PyObject *
439formatlong(PyObject *v, int flags, int prec, int type)
440{
441 PyObject *result, *iobj;
442 if (type == 'i')
443 type = 'd';
444 if (PyLong_Check(v))
445 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
446 if (PyNumber_Check(v)) {
447 /* make sure number is a type of integer for o, x, and X */
448 if (type == 'o' || type == 'x' || type == 'X')
449 iobj = PyNumber_Index(v);
450 else
451 iobj = PyNumber_Long(v);
452 if (iobj == NULL) {
453 if (!PyErr_ExceptionMatches(PyExc_TypeError))
454 return NULL;
455 }
456 else if (!PyLong_Check(iobj))
457 Py_CLEAR(iobj);
458 if (iobj != NULL) {
459 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
460 Py_DECREF(iobj);
461 return result;
462 }
463 }
464 PyErr_Format(PyExc_TypeError,
465 "%%%c format: %s is required, not %.200s", type,
466 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
467 : "a number",
468 Py_TYPE(v)->tp_name);
469 return NULL;
470}
471
472static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200473byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800474{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200475 if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) {
476 *p = PyBytes_AS_STRING(arg)[0];
477 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800478 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200479 else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) {
480 *p = PyByteArray_AS_STRING(arg)[0];
481 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800482 }
483 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300484 PyObject *iobj;
485 long ival;
486 int overflow;
487 /* make sure number is a type of integer */
488 if (PyLong_Check(arg)) {
489 ival = PyLong_AsLongAndOverflow(arg, &overflow);
490 }
491 else {
492 iobj = PyNumber_Index(arg);
493 if (iobj == NULL) {
494 if (!PyErr_ExceptionMatches(PyExc_TypeError))
495 return 0;
496 goto onError;
497 }
498 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
499 Py_DECREF(iobj);
500 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300501 if (!overflow && ival == -1 && PyErr_Occurred())
502 goto onError;
503 if (overflow || !(0 <= ival && ival <= 255)) {
504 PyErr_SetString(PyExc_OverflowError,
505 "%c arg not in range(256)");
506 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800507 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300508 *p = (char)ival;
509 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800510 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300511 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200512 PyErr_SetString(PyExc_TypeError,
513 "%c requires an integer in range(256) or a single byte");
514 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800515}
516
517static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200518format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800519{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200520 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800521 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800522 /* is it a bytes object? */
523 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200524 *pbuf = PyBytes_AS_STRING(v);
525 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800526 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200527 return v;
528 }
529 if (PyByteArray_Check(v)) {
530 *pbuf = PyByteArray_AS_STRING(v);
531 *plen = PyByteArray_GET_SIZE(v);
532 Py_INCREF(v);
533 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800534 }
535 /* does it support __bytes__? */
536 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
537 if (func != NULL) {
538 result = PyObject_CallFunctionObjArgs(func, NULL);
539 Py_DECREF(func);
540 if (result == NULL)
541 return NULL;
542 if (!PyBytes_Check(result)) {
543 PyErr_Format(PyExc_TypeError,
544 "__bytes__ returned non-bytes (type %.200s)",
545 Py_TYPE(result)->tp_name);
546 Py_DECREF(result);
547 return NULL;
548 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200549 *pbuf = PyBytes_AS_STRING(result);
550 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800551 return result;
552 }
553 PyErr_Format(PyExc_TypeError,
554 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
555 Py_TYPE(v)->tp_name);
556 return NULL;
557}
558
559/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
560
561 FORMATBUFLEN is the length of the buffer in which the ints &
562 chars are formatted. XXX This is a magic number. Each formatting
563 routine does bounds checking to ensure no overflow, but a better
564 solution may be to malloc a buffer of appropriate size for each
565 format. For now, the current solution is sufficient.
566*/
567#define FORMATBUFLEN (size_t)120
568
569PyObject *
570_PyBytes_Format(PyObject *format, PyObject *args)
571{
572 char *fmt, *res;
573 Py_ssize_t arglen, argidx;
574 Py_ssize_t reslen, rescnt, fmtcnt;
575 int args_owned = 0;
576 PyObject *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800577 PyObject *dict = NULL;
578 if (format == NULL || !PyBytes_Check(format) || args == NULL) {
579 PyErr_BadInternalCall();
580 return NULL;
581 }
582 fmt = PyBytes_AS_STRING(format);
583 fmtcnt = PyBytes_GET_SIZE(format);
584 reslen = rescnt = fmtcnt + 100;
585 result = PyBytes_FromStringAndSize((char *)NULL, reslen);
586 if (result == NULL)
587 return NULL;
588 res = PyBytes_AsString(result);
589 if (PyTuple_Check(args)) {
590 arglen = PyTuple_GET_SIZE(args);
591 argidx = 0;
592 }
593 else {
594 arglen = -1;
595 argidx = -2;
596 }
597 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
598 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
599 !PyByteArray_Check(args)) {
600 dict = args;
601 }
602 while (--fmtcnt >= 0) {
603 if (*fmt != '%') {
604 if (--rescnt < 0) {
605 rescnt = fmtcnt + 100;
606 reslen += rescnt;
607 if (_PyBytes_Resize(&result, reslen))
608 return NULL;
609 res = PyBytes_AS_STRING(result)
610 + reslen - rescnt;
611 --rescnt;
612 }
613 *res++ = *fmt++;
614 }
615 else {
616 /* Got a format specifier */
617 int flags = 0;
618 Py_ssize_t width = -1;
619 int prec = -1;
620 int c = '\0';
621 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800622 PyObject *v = NULL;
623 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200624 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800625 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200626 Py_ssize_t len = 0;
627 char onechar; /* For byte_converter() */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800628
Ethan Furmanb95b5612015-01-23 20:05:18 -0800629 fmt++;
630 if (*fmt == '(') {
631 char *keystart;
632 Py_ssize_t keylen;
633 PyObject *key;
634 int pcount = 1;
635
636 if (dict == NULL) {
637 PyErr_SetString(PyExc_TypeError,
638 "format requires a mapping");
639 goto error;
640 }
641 ++fmt;
642 --fmtcnt;
643 keystart = fmt;
644 /* Skip over balanced parentheses */
645 while (pcount > 0 && --fmtcnt >= 0) {
646 if (*fmt == ')')
647 --pcount;
648 else if (*fmt == '(')
649 ++pcount;
650 fmt++;
651 }
652 keylen = fmt - keystart - 1;
653 if (fmtcnt < 0 || pcount > 0) {
654 PyErr_SetString(PyExc_ValueError,
655 "incomplete format key");
656 goto error;
657 }
658 key = PyBytes_FromStringAndSize(keystart,
659 keylen);
660 if (key == NULL)
661 goto error;
662 if (args_owned) {
663 Py_DECREF(args);
664 args_owned = 0;
665 }
666 args = PyObject_GetItem(dict, key);
667 Py_DECREF(key);
668 if (args == NULL) {
669 goto error;
670 }
671 args_owned = 1;
672 arglen = -1;
673 argidx = -2;
674 }
675 while (--fmtcnt >= 0) {
676 switch (c = *fmt++) {
677 case '-': flags |= F_LJUST; continue;
678 case '+': flags |= F_SIGN; continue;
679 case ' ': flags |= F_BLANK; continue;
680 case '#': flags |= F_ALT; continue;
681 case '0': flags |= F_ZERO; continue;
682 }
683 break;
684 }
685 if (c == '*') {
686 v = getnextarg(args, arglen, &argidx);
687 if (v == NULL)
688 goto error;
689 if (!PyLong_Check(v)) {
690 PyErr_SetString(PyExc_TypeError,
691 "* wants int");
692 goto error;
693 }
694 width = PyLong_AsSsize_t(v);
695 if (width == -1 && PyErr_Occurred())
696 goto error;
697 if (width < 0) {
698 flags |= F_LJUST;
699 width = -width;
700 }
701 if (--fmtcnt >= 0)
702 c = *fmt++;
703 }
704 else if (c >= 0 && isdigit(c)) {
705 width = c - '0';
706 while (--fmtcnt >= 0) {
707 c = Py_CHARMASK(*fmt++);
708 if (!isdigit(c))
709 break;
710 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
711 PyErr_SetString(
712 PyExc_ValueError,
713 "width too big");
714 goto error;
715 }
716 width = width*10 + (c - '0');
717 }
718 }
719 if (c == '.') {
720 prec = 0;
721 if (--fmtcnt >= 0)
722 c = *fmt++;
723 if (c == '*') {
724 v = getnextarg(args, arglen, &argidx);
725 if (v == NULL)
726 goto error;
727 if (!PyLong_Check(v)) {
728 PyErr_SetString(
729 PyExc_TypeError,
730 "* wants int");
731 goto error;
732 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200733 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800734 if (prec == -1 && PyErr_Occurred())
735 goto error;
736 if (prec < 0)
737 prec = 0;
738 if (--fmtcnt >= 0)
739 c = *fmt++;
740 }
741 else if (c >= 0 && isdigit(c)) {
742 prec = c - '0';
743 while (--fmtcnt >= 0) {
744 c = Py_CHARMASK(*fmt++);
745 if (!isdigit(c))
746 break;
747 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
748 PyErr_SetString(
749 PyExc_ValueError,
750 "prec too big");
751 goto error;
752 }
753 prec = prec*10 + (c - '0');
754 }
755 }
756 } /* prec */
757 if (fmtcnt >= 0) {
758 if (c == 'h' || c == 'l' || c == 'L') {
759 if (--fmtcnt >= 0)
760 c = *fmt++;
761 }
762 }
763 if (fmtcnt < 0) {
764 PyErr_SetString(PyExc_ValueError,
765 "incomplete format");
766 goto error;
767 }
768 if (c != '%') {
769 v = getnextarg(args, arglen, &argidx);
770 if (v == NULL)
771 goto error;
772 }
773 sign = 0;
774 fill = ' ';
775 switch (c) {
776 case '%':
777 pbuf = "%";
778 len = 1;
779 break;
Ethan Furman62e977f2015-03-11 08:17:00 -0700780 case 'r':
781 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800782 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200783 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800784 if (temp == NULL)
785 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200786 assert(PyUnicode_IS_ASCII(temp));
787 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
788 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800789 if (prec >= 0 && len > prec)
790 len = prec;
791 break;
792 case 's':
793 // %s is only for 2/3 code; 3 only code should use %b
794 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200795 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800796 if (temp == NULL)
797 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800798 if (prec >= 0 && len > prec)
799 len = prec;
800 break;
801 case 'i':
802 case 'd':
803 case 'u':
804 case 'o':
805 case 'x':
806 case 'X':
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300807 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200808 if (!temp)
809 goto error;
810 assert(PyUnicode_IS_ASCII(temp));
811 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
812 len = PyUnicode_GET_LENGTH(temp);
813 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800814 if (flags & F_ZERO)
815 fill = '0';
816 break;
817 case 'e':
818 case 'E':
819 case 'f':
820 case 'F':
821 case 'g':
822 case 'G':
823 temp = formatfloat(v, flags, prec, c);
824 if (temp == NULL)
825 goto error;
826 pbuf = PyBytes_AS_STRING(temp);
827 len = PyBytes_GET_SIZE(temp);
828 sign = 1;
829 if (flags & F_ZERO)
830 fill = '0';
831 break;
832 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200833 pbuf = &onechar;
834 len = byte_converter(v, &onechar);
835 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800836 goto error;
837 break;
838 default:
839 PyErr_Format(PyExc_ValueError,
840 "unsupported format character '%c' (0x%x) "
841 "at index %zd",
842 c, c,
843 (Py_ssize_t)(fmt - 1 -
844 PyBytes_AsString(format)));
845 goto error;
846 }
847 if (sign) {
848 if (*pbuf == '-' || *pbuf == '+') {
849 sign = *pbuf++;
850 len--;
851 }
852 else if (flags & F_SIGN)
853 sign = '+';
854 else if (flags & F_BLANK)
855 sign = ' ';
856 else
857 sign = 0;
858 }
859 if (width < len)
860 width = len;
861 if (rescnt - (sign != 0) < width) {
862 reslen -= rescnt;
863 rescnt = width + fmtcnt + 100;
864 reslen += rescnt;
865 if (reslen < 0) {
866 Py_DECREF(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800867 Py_XDECREF(temp);
868 return PyErr_NoMemory();
869 }
870 if (_PyBytes_Resize(&result, reslen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800871 Py_XDECREF(temp);
872 return NULL;
873 }
874 res = PyBytes_AS_STRING(result)
875 + reslen - rescnt;
876 }
877 if (sign) {
878 if (fill != ' ')
879 *res++ = sign;
880 rescnt--;
881 if (width > len)
882 width--;
883 }
884 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
885 assert(pbuf[0] == '0');
886 assert(pbuf[1] == c);
887 if (fill != ' ') {
888 *res++ = *pbuf++;
889 *res++ = *pbuf++;
890 }
891 rescnt -= 2;
892 width -= 2;
893 if (width < 0)
894 width = 0;
895 len -= 2;
896 }
897 if (width > len && !(flags & F_LJUST)) {
898 do {
899 --rescnt;
900 *res++ = fill;
901 } while (--width > len);
902 }
903 if (fill == ' ') {
904 if (sign)
905 *res++ = sign;
906 if ((flags & F_ALT) &&
907 (c == 'x' || c == 'X')) {
908 assert(pbuf[0] == '0');
909 assert(pbuf[1] == c);
910 *res++ = *pbuf++;
911 *res++ = *pbuf++;
912 }
913 }
914 Py_MEMCPY(res, pbuf, len);
915 res += len;
916 rescnt -= len;
917 while (--width >= len) {
918 --rescnt;
919 *res++ = ' ';
920 }
921 if (dict && (argidx < arglen) && c != '%') {
922 PyErr_SetString(PyExc_TypeError,
923 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -0800924 Py_XDECREF(temp);
925 goto error;
926 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800927 Py_XDECREF(temp);
928 } /* '%' */
929 } /* until end */
930 if (argidx < arglen && !dict) {
931 PyErr_SetString(PyExc_TypeError,
932 "not all arguments converted during bytes formatting");
933 goto error;
934 }
935 if (args_owned) {
936 Py_DECREF(args);
937 }
938 if (_PyBytes_Resize(&result, reslen - rescnt))
939 return NULL;
940 return result;
941
942 error:
943 Py_DECREF(result);
944 if (args_owned) {
945 Py_DECREF(args);
946 }
947 return NULL;
948}
949
950/* =-= */
951
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000952static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000953bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000954{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000955 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000956}
957
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000958/* Unescape a backslash-escaped string. If unicode is non-zero,
959 the string is a u-literal. If recode_encoding is non-zero,
960 the string is UTF-8 encoded and should be re-encoded in the
961 specified encoding. */
962
963PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000964 Py_ssize_t len,
965 const char *errors,
966 Py_ssize_t unicode,
967 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000968{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000969 int c;
970 char *p, *buf;
971 const char *end;
972 PyObject *v;
973 Py_ssize_t newlen = recode_encoding ? 4*len:len;
974 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
975 if (v == NULL)
976 return NULL;
977 p = buf = PyBytes_AsString(v);
978 end = s + len;
979 while (s < end) {
980 if (*s != '\\') {
981 non_esc:
982 if (recode_encoding && (*s & 0x80)) {
983 PyObject *u, *w;
984 char *r;
985 const char* t;
986 Py_ssize_t rn;
987 t = s;
988 /* Decode non-ASCII bytes as UTF-8. */
989 while (t < end && (*t & 0x80)) t++;
990 u = PyUnicode_DecodeUTF8(s, t - s, errors);
991 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000992
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000993 /* Recode them in target encoding. */
994 w = PyUnicode_AsEncodedString(
995 u, recode_encoding, errors);
996 Py_DECREF(u);
997 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000998
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000999 /* Append bytes to output buffer. */
1000 assert(PyBytes_Check(w));
1001 r = PyBytes_AS_STRING(w);
1002 rn = PyBytes_GET_SIZE(w);
1003 Py_MEMCPY(p, r, rn);
1004 p += rn;
1005 Py_DECREF(w);
1006 s = t;
1007 } else {
1008 *p++ = *s++;
1009 }
1010 continue;
1011 }
1012 s++;
1013 if (s==end) {
1014 PyErr_SetString(PyExc_ValueError,
1015 "Trailing \\ in string");
1016 goto failed;
1017 }
1018 switch (*s++) {
1019 /* XXX This assumes ASCII! */
1020 case '\n': break;
1021 case '\\': *p++ = '\\'; break;
1022 case '\'': *p++ = '\''; break;
1023 case '\"': *p++ = '\"'; break;
1024 case 'b': *p++ = '\b'; break;
1025 case 'f': *p++ = '\014'; break; /* FF */
1026 case 't': *p++ = '\t'; break;
1027 case 'n': *p++ = '\n'; break;
1028 case 'r': *p++ = '\r'; break;
1029 case 'v': *p++ = '\013'; break; /* VT */
1030 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1031 case '0': case '1': case '2': case '3':
1032 case '4': case '5': case '6': case '7':
1033 c = s[-1] - '0';
1034 if (s < end && '0' <= *s && *s <= '7') {
1035 c = (c<<3) + *s++ - '0';
1036 if (s < end && '0' <= *s && *s <= '7')
1037 c = (c<<3) + *s++ - '0';
1038 }
1039 *p++ = c;
1040 break;
1041 case 'x':
David Malcolm96960882010-11-05 17:23:41 +00001042 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001043 unsigned int x = 0;
1044 c = Py_CHARMASK(*s);
1045 s++;
David Malcolm96960882010-11-05 17:23:41 +00001046 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001047 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001048 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001049 x = 10 + c - 'a';
1050 else
1051 x = 10 + c - 'A';
1052 x = x << 4;
1053 c = Py_CHARMASK(*s);
1054 s++;
David Malcolm96960882010-11-05 17:23:41 +00001055 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001056 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001057 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001058 x += 10 + c - 'a';
1059 else
1060 x += 10 + c - 'A';
1061 *p++ = x;
1062 break;
1063 }
1064 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001065 PyErr_Format(PyExc_ValueError,
1066 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001067 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001068 goto failed;
1069 }
1070 if (strcmp(errors, "replace") == 0) {
1071 *p++ = '?';
1072 } else if (strcmp(errors, "ignore") == 0)
1073 /* do nothing */;
1074 else {
1075 PyErr_Format(PyExc_ValueError,
1076 "decoding error; unknown "
1077 "error handling code: %.400s",
1078 errors);
1079 goto failed;
1080 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001081 /* skip \x */
1082 if (s < end && Py_ISXDIGIT(s[0]))
1083 s++; /* and a hexdigit */
1084 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001085 default:
1086 *p++ = '\\';
1087 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001088 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001089 UTF-8 bytes may follow. */
1090 }
1091 }
1092 if (p-buf < newlen)
1093 _PyBytes_Resize(&v, p - buf);
1094 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001095 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001096 Py_DECREF(v);
1097 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001098}
1099
1100/* -------------------------------------------------------------------- */
1101/* object api */
1102
1103Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001104PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001105{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001106 if (!PyBytes_Check(op)) {
1107 PyErr_Format(PyExc_TypeError,
1108 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1109 return -1;
1110 }
1111 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001112}
1113
1114char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001115PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001116{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001117 if (!PyBytes_Check(op)) {
1118 PyErr_Format(PyExc_TypeError,
1119 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1120 return NULL;
1121 }
1122 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001123}
1124
1125int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001126PyBytes_AsStringAndSize(PyObject *obj,
1127 char **s,
1128 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001129{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001130 if (s == NULL) {
1131 PyErr_BadInternalCall();
1132 return -1;
1133 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001134
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001135 if (!PyBytes_Check(obj)) {
1136 PyErr_Format(PyExc_TypeError,
1137 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1138 return -1;
1139 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001141 *s = PyBytes_AS_STRING(obj);
1142 if (len != NULL)
1143 *len = PyBytes_GET_SIZE(obj);
1144 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001145 PyErr_SetString(PyExc_ValueError,
1146 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001147 return -1;
1148 }
1149 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001150}
Neal Norwitz6968b052007-02-27 19:02:19 +00001151
1152/* -------------------------------------------------------------------- */
1153/* Methods */
1154
Eric Smith0923d1d2009-04-16 20:16:10 +00001155#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001156
1157#include "stringlib/fastsearch.h"
1158#include "stringlib/count.h"
1159#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001160#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001161#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001162#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001163#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001164
Eric Smith0f78bff2009-11-30 01:01:42 +00001165#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001166
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001167PyObject *
1168PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001169{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001170 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001171 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001172 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001173 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001174 unsigned char quote, *s, *p;
1175
1176 /* Compute size of output string */
1177 squotes = dquotes = 0;
1178 newsize = 3; /* b'' */
1179 s = (unsigned char*)op->ob_sval;
1180 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001181 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001182 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001183 case '\'': squotes++; break;
1184 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001185 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001186 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001187 default:
1188 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001189 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001190 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001191 if (newsize > PY_SSIZE_T_MAX - incr)
1192 goto overflow;
1193 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001194 }
1195 quote = '\'';
1196 if (smartquotes && squotes && !dquotes)
1197 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001198 if (squotes && quote == '\'') {
1199 if (newsize > PY_SSIZE_T_MAX - squotes)
1200 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001201 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001202 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001203
1204 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001205 if (v == NULL) {
1206 return NULL;
1207 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001208 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001209
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001210 *p++ = 'b', *p++ = quote;
1211 for (i = 0; i < length; i++) {
1212 unsigned char c = op->ob_sval[i];
1213 if (c == quote || c == '\\')
1214 *p++ = '\\', *p++ = c;
1215 else if (c == '\t')
1216 *p++ = '\\', *p++ = 't';
1217 else if (c == '\n')
1218 *p++ = '\\', *p++ = 'n';
1219 else if (c == '\r')
1220 *p++ = '\\', *p++ = 'r';
1221 else if (c < ' ' || c >= 0x7f) {
1222 *p++ = '\\';
1223 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001224 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1225 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001227 else
1228 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001229 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001230 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001231 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001232 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001233
1234 overflow:
1235 PyErr_SetString(PyExc_OverflowError,
1236 "bytes object is too large to make repr");
1237 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001238}
1239
Neal Norwitz6968b052007-02-27 19:02:19 +00001240static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001241bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001242{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001243 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001244}
1245
Neal Norwitz6968b052007-02-27 19:02:19 +00001246static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001247bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001248{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001249 if (Py_BytesWarningFlag) {
1250 if (PyErr_WarnEx(PyExc_BytesWarning,
1251 "str() on a bytes instance", 1))
1252 return NULL;
1253 }
1254 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001255}
1256
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001257static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001258bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001259{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001260 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001261}
Neal Norwitz6968b052007-02-27 19:02:19 +00001262
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001263/* This is also used by PyBytes_Concat() */
1264static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001265bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001266{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001267 Py_ssize_t size;
1268 Py_buffer va, vb;
1269 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001270
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001271 va.len = -1;
1272 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001273 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1274 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001275 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1276 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1277 goto done;
1278 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001279
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001280 /* Optimize end cases */
1281 if (va.len == 0 && PyBytes_CheckExact(b)) {
1282 result = b;
1283 Py_INCREF(result);
1284 goto done;
1285 }
1286 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1287 result = a;
1288 Py_INCREF(result);
1289 goto done;
1290 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001291
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001292 size = va.len + vb.len;
1293 if (size < 0) {
1294 PyErr_NoMemory();
1295 goto done;
1296 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001297
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001298 result = PyBytes_FromStringAndSize(NULL, size);
1299 if (result != NULL) {
1300 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1301 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1302 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001303
1304 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001305 if (va.len != -1)
1306 PyBuffer_Release(&va);
1307 if (vb.len != -1)
1308 PyBuffer_Release(&vb);
1309 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001310}
Neal Norwitz6968b052007-02-27 19:02:19 +00001311
1312static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001313bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001314{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001315 Py_ssize_t i;
1316 Py_ssize_t j;
1317 Py_ssize_t size;
1318 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001319 size_t nbytes;
1320 if (n < 0)
1321 n = 0;
1322 /* watch out for overflows: the size can overflow int,
1323 * and the # of bytes needed can overflow size_t
1324 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001325 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001326 PyErr_SetString(PyExc_OverflowError,
1327 "repeated bytes are too long");
1328 return NULL;
1329 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001330 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001331 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1332 Py_INCREF(a);
1333 return (PyObject *)a;
1334 }
1335 nbytes = (size_t)size;
1336 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1337 PyErr_SetString(PyExc_OverflowError,
1338 "repeated bytes are too long");
1339 return NULL;
1340 }
1341 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1342 if (op == NULL)
1343 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001344 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001345 op->ob_shash = -1;
1346 op->ob_sval[size] = '\0';
1347 if (Py_SIZE(a) == 1 && n > 0) {
1348 memset(op->ob_sval, a->ob_sval[0] , n);
1349 return (PyObject *) op;
1350 }
1351 i = 0;
1352 if (i < size) {
1353 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1354 i = Py_SIZE(a);
1355 }
1356 while (i < size) {
1357 j = (i <= size-i) ? i : size-i;
1358 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1359 i += j;
1360 }
1361 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001362}
1363
Guido van Rossum98297ee2007-11-06 21:34:58 +00001364static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001365bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001366{
1367 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1368 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001369 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +00001370 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +00001371 PyErr_Clear();
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001372 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
Antoine Pitroud1188562010-06-09 16:38:55 +00001373 return -1;
1374 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
1375 varg.buf, varg.len, 0);
1376 PyBuffer_Release(&varg);
1377 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001378 }
1379 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001380 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1381 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001382 }
1383
Antoine Pitrou0010d372010-08-15 17:12:55 +00001384 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001385}
1386
Neal Norwitz6968b052007-02-27 19:02:19 +00001387static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001388bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001389{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 if (i < 0 || i >= Py_SIZE(a)) {
1391 PyErr_SetString(PyExc_IndexError, "index out of range");
1392 return NULL;
1393 }
1394 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001395}
1396
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001397Py_LOCAL(int)
1398bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1399{
1400 int cmp;
1401 Py_ssize_t len;
1402
1403 len = Py_SIZE(a);
1404 if (Py_SIZE(b) != len)
1405 return 0;
1406
1407 if (a->ob_sval[0] != b->ob_sval[0])
1408 return 0;
1409
1410 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1411 return (cmp == 0);
1412}
1413
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001414static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001415bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001416{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 int c;
1418 Py_ssize_t len_a, len_b;
1419 Py_ssize_t min_len;
1420 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 /* Make sure both arguments are strings. */
1423 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001424 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
1425 if (PyObject_IsInstance((PyObject*)a,
1426 (PyObject*)&PyUnicode_Type) ||
1427 PyObject_IsInstance((PyObject*)b,
1428 (PyObject*)&PyUnicode_Type)) {
1429 if (PyErr_WarnEx(PyExc_BytesWarning,
1430 "Comparison between bytes and string", 1))
1431 return NULL;
1432 }
1433 else if (PyObject_IsInstance((PyObject*)a,
1434 (PyObject*)&PyLong_Type) ||
1435 PyObject_IsInstance((PyObject*)b,
1436 (PyObject*)&PyLong_Type)) {
1437 if (PyErr_WarnEx(PyExc_BytesWarning,
1438 "Comparison between bytes and int", 1))
1439 return NULL;
1440 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001441 }
1442 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001443 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001444 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001445 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001446 case Py_EQ:
1447 case Py_LE:
1448 case Py_GE:
1449 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001450 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001451 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001452 case Py_NE:
1453 case Py_LT:
1454 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001455 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001456 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001457 default:
1458 PyErr_BadArgument();
1459 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001460 }
1461 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001462 else if (op == Py_EQ || op == Py_NE) {
1463 int eq = bytes_compare_eq(a, b);
1464 eq ^= (op == Py_NE);
1465 result = eq ? Py_True : Py_False;
1466 }
1467 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001468 len_a = Py_SIZE(a);
1469 len_b = Py_SIZE(b);
1470 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001471 if (min_len > 0) {
1472 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001473 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001474 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001475 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001476 else
1477 c = 0;
1478 if (c == 0)
1479 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1480 switch (op) {
1481 case Py_LT: c = c < 0; break;
1482 case Py_LE: c = c <= 0; break;
1483 case Py_GT: c = c > 0; break;
1484 case Py_GE: c = c >= 0; break;
1485 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001486 PyErr_BadArgument();
1487 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001488 }
1489 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001490 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001491
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001492 Py_INCREF(result);
1493 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001494}
1495
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001496static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001497bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001498{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001499 if (a->ob_shash == -1) {
1500 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001501 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001502 }
1503 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001504}
1505
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001506static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001507bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001508{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001509 if (PyIndex_Check(item)) {
1510 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1511 if (i == -1 && PyErr_Occurred())
1512 return NULL;
1513 if (i < 0)
1514 i += PyBytes_GET_SIZE(self);
1515 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1516 PyErr_SetString(PyExc_IndexError,
1517 "index out of range");
1518 return NULL;
1519 }
1520 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1521 }
1522 else if (PySlice_Check(item)) {
1523 Py_ssize_t start, stop, step, slicelength, cur, i;
1524 char* source_buf;
1525 char* result_buf;
1526 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001527
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001528 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001529 PyBytes_GET_SIZE(self),
1530 &start, &stop, &step, &slicelength) < 0) {
1531 return NULL;
1532 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001533
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001534 if (slicelength <= 0) {
1535 return PyBytes_FromStringAndSize("", 0);
1536 }
1537 else if (start == 0 && step == 1 &&
1538 slicelength == PyBytes_GET_SIZE(self) &&
1539 PyBytes_CheckExact(self)) {
1540 Py_INCREF(self);
1541 return (PyObject *)self;
1542 }
1543 else if (step == 1) {
1544 return PyBytes_FromStringAndSize(
1545 PyBytes_AS_STRING(self) + start,
1546 slicelength);
1547 }
1548 else {
1549 source_buf = PyBytes_AS_STRING(self);
1550 result = PyBytes_FromStringAndSize(NULL, slicelength);
1551 if (result == NULL)
1552 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001553
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001554 result_buf = PyBytes_AS_STRING(result);
1555 for (cur = start, i = 0; i < slicelength;
1556 cur += step, i++) {
1557 result_buf[i] = source_buf[cur];
1558 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001559
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001560 return result;
1561 }
1562 }
1563 else {
1564 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001565 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001566 Py_TYPE(item)->tp_name);
1567 return NULL;
1568 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001569}
1570
1571static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001572bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001573{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001574 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1575 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001576}
1577
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001578static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001579 (lenfunc)bytes_length, /*sq_length*/
1580 (binaryfunc)bytes_concat, /*sq_concat*/
1581 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1582 (ssizeargfunc)bytes_item, /*sq_item*/
1583 0, /*sq_slice*/
1584 0, /*sq_ass_item*/
1585 0, /*sq_ass_slice*/
1586 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001587};
1588
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001589static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001590 (lenfunc)bytes_length,
1591 (binaryfunc)bytes_subscript,
1592 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001593};
1594
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001595static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001596 (getbufferproc)bytes_buffer_getbuffer,
1597 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001598};
1599
1600
1601#define LEFTSTRIP 0
1602#define RIGHTSTRIP 1
1603#define BOTHSTRIP 2
1604
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001605/*[clinic input]
1606bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001607
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001608 sep: object = None
1609 The delimiter according which to split the bytes.
1610 None (the default value) means split on ASCII whitespace characters
1611 (space, tab, return, newline, formfeed, vertical tab).
1612 maxsplit: Py_ssize_t = -1
1613 Maximum number of splits to do.
1614 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001615
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001616Return a list of the sections in the bytes, using sep as the delimiter.
1617[clinic start generated code]*/
1618
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001619static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001620bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001621/*[clinic end generated code: output=8bde44dacb36ef2e input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001622{
1623 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001624 const char *s = PyBytes_AS_STRING(self), *sub;
1625 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001626 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001627
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001628 if (maxsplit < 0)
1629 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001630 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001631 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001632 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001633 return NULL;
1634 sub = vsub.buf;
1635 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001636
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001637 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1638 PyBuffer_Release(&vsub);
1639 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001640}
1641
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001642/*[clinic input]
1643bytes.partition
1644
1645 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001646 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001647 /
1648
1649Partition the bytes into three parts using the given separator.
1650
1651This will search for the separator sep in the bytes. If the separator is found,
1652returns a 3-tuple containing the part before the separator, the separator
1653itself, and the part after it.
1654
1655If the separator is not found, returns a 3-tuple containing the original bytes
1656object and two empty bytes objects.
1657[clinic start generated code]*/
1658
Neal Norwitz6968b052007-02-27 19:02:19 +00001659static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001660bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001661/*[clinic end generated code: output=f532b392a17ff695 input=bc855dc63ca949de]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001662{
Neal Norwitz6968b052007-02-27 19:02:19 +00001663 return stringlib_partition(
1664 (PyObject*) self,
1665 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001666 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001667 );
1668}
1669
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001670/*[clinic input]
1671bytes.rpartition
1672
1673 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001674 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001675 /
1676
1677Partition the bytes into three parts using the given separator.
1678
1679This will search for the separator sep in the bytes, starting and the end. If
1680the separator is found, returns a 3-tuple containing the part before the
1681separator, the separator itself, and the part after it.
1682
1683If the separator is not found, returns a 3-tuple containing two empty bytes
1684objects and the original bytes object.
1685[clinic start generated code]*/
1686
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001687static PyObject *
1688bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001689/*[clinic end generated code: output=191b114cbb028e50 input=6588fff262a9170e]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001690{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001691 return stringlib_rpartition(
1692 (PyObject*) self,
1693 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001694 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001695 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001696}
1697
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001698/*[clinic input]
1699bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001700
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001701Return a list of the sections in the bytes, using sep as the delimiter.
1702
1703Splitting is done starting at the end of the bytes and working to the front.
1704[clinic start generated code]*/
1705
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001706static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001707bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001708/*[clinic end generated code: output=0b6570b977911d88 input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001709{
1710 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001711 const char *s = PyBytes_AS_STRING(self), *sub;
1712 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001713 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001714
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001715 if (maxsplit < 0)
1716 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001717 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001718 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001719 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001720 return NULL;
1721 sub = vsub.buf;
1722 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001723
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001724 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1725 PyBuffer_Release(&vsub);
1726 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001727}
1728
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001729
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001730/*[clinic input]
1731bytes.join
1732
1733 iterable_of_bytes: object
1734 /
1735
1736Concatenate any number of bytes objects.
1737
1738The bytes whose method is called is inserted in between each pair.
1739
1740The result is returned as a new bytes object.
1741
1742Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1743[clinic start generated code]*/
1744
Neal Norwitz6968b052007-02-27 19:02:19 +00001745static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001746bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001747/*[clinic end generated code: output=634aff14764ff997 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001748{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001749 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001750}
1751
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001752PyObject *
1753_PyBytes_Join(PyObject *sep, PyObject *x)
1754{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001755 assert(sep != NULL && PyBytes_Check(sep));
1756 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001757 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001758}
1759
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001760/* helper macro to fixup start/end slice values */
1761#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001762 if (end > len) \
1763 end = len; \
1764 else if (end < 0) { \
1765 end += len; \
1766 if (end < 0) \
1767 end = 0; \
1768 } \
1769 if (start < 0) { \
1770 start += len; \
1771 if (start < 0) \
1772 start = 0; \
1773 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001774
1775Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001776bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001777{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001778 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001779 char byte;
1780 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001781 const char *sub;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001782 Py_ssize_t len, sub_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001783 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001784 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001785
Antoine Pitrouac65d962011-10-20 23:54:17 +02001786 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1787 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001788 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001789
Antoine Pitrouac65d962011-10-20 23:54:17 +02001790 if (subobj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001791 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001792 return -2;
1793
1794 sub = subbuf.buf;
1795 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001796 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001797 else {
1798 sub = &byte;
1799 sub_len = 1;
1800 }
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001801 len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001802
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001803 ADJUST_INDICES(start, end, len);
1804 if (end - start < sub_len)
1805 res = -1;
Victor Stinnerdabbfe72015-03-25 03:16:32 +01001806 /* Issue #23573: FIXME, windows has no memrchr() */
1807 else if (sub_len == 1 && dir > 0) {
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001808 unsigned char needle = *sub;
1809 int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
1810 res = stringlib_fastsearch_memchr_1char(
1811 PyBytes_AS_STRING(self) + start, end - start,
1812 needle, needle, mode);
1813 if (res >= 0)
1814 res += start;
1815 }
1816 else {
1817 if (dir > 0)
1818 res = stringlib_find_slice(
1819 PyBytes_AS_STRING(self), len,
1820 sub, sub_len, start, end);
1821 else
1822 res = stringlib_rfind_slice(
1823 PyBytes_AS_STRING(self), len,
1824 sub, sub_len, start, end);
1825 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001826
1827 if (subobj)
1828 PyBuffer_Release(&subbuf);
1829
1830 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001831}
1832
1833
1834PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001835"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001836\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001837Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001838such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001839arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001840\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001841Return -1 on failure.");
1842
Neal Norwitz6968b052007-02-27 19:02:19 +00001843static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001844bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001845{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001846 Py_ssize_t result = bytes_find_internal(self, args, +1);
1847 if (result == -2)
1848 return NULL;
1849 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001850}
1851
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001852
1853PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001854"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001855\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001856Like B.find() but raise ValueError when the substring is not found.");
1857
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001858static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001859bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001860{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001861 Py_ssize_t result = bytes_find_internal(self, args, +1);
1862 if (result == -2)
1863 return NULL;
1864 if (result == -1) {
1865 PyErr_SetString(PyExc_ValueError,
1866 "substring not found");
1867 return NULL;
1868 }
1869 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001870}
1871
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001872
1873PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001874"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001875\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001876Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001877such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001878arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001879\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001880Return -1 on failure.");
1881
Neal Norwitz6968b052007-02-27 19:02:19 +00001882static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001883bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001884{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001885 Py_ssize_t result = bytes_find_internal(self, args, -1);
1886 if (result == -2)
1887 return NULL;
1888 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001889}
1890
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001891
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001892PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001893"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001894\n\
1895Like B.rfind() but raise ValueError when the substring is not found.");
1896
1897static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001898bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001899{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001900 Py_ssize_t result = bytes_find_internal(self, args, -1);
1901 if (result == -2)
1902 return NULL;
1903 if (result == -1) {
1904 PyErr_SetString(PyExc_ValueError,
1905 "substring not found");
1906 return NULL;
1907 }
1908 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001909}
1910
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001911
1912Py_LOCAL_INLINE(PyObject *)
1913do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001914{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001915 Py_buffer vsep;
1916 char *s = PyBytes_AS_STRING(self);
1917 Py_ssize_t len = PyBytes_GET_SIZE(self);
1918 char *sep;
1919 Py_ssize_t seplen;
1920 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001921
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001922 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001923 return NULL;
1924 sep = vsep.buf;
1925 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001926
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001927 i = 0;
1928 if (striptype != RIGHTSTRIP) {
1929 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1930 i++;
1931 }
1932 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001933
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001934 j = len;
1935 if (striptype != LEFTSTRIP) {
1936 do {
1937 j--;
1938 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1939 j++;
1940 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001941
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001942 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001943
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001944 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1945 Py_INCREF(self);
1946 return (PyObject*)self;
1947 }
1948 else
1949 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001950}
1951
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001952
1953Py_LOCAL_INLINE(PyObject *)
1954do_strip(PyBytesObject *self, int striptype)
1955{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001956 char *s = PyBytes_AS_STRING(self);
1957 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001958
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001959 i = 0;
1960 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001961 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001962 i++;
1963 }
1964 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001965
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001966 j = len;
1967 if (striptype != LEFTSTRIP) {
1968 do {
1969 j--;
David Malcolm96960882010-11-05 17:23:41 +00001970 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001971 j++;
1972 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001973
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001974 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1975 Py_INCREF(self);
1976 return (PyObject*)self;
1977 }
1978 else
1979 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001980}
1981
1982
1983Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001984do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001985{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001986 if (bytes != NULL && bytes != Py_None) {
1987 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001988 }
1989 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001990}
1991
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001992/*[clinic input]
1993bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001994
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001995 self: self(type="PyBytesObject *")
1996 bytes: object = None
1997 /
1998
1999Strip leading and trailing bytes contained in the argument.
2000
2001If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2002[clinic start generated code]*/
2003
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002004static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002005bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002006/*[clinic end generated code: output=c7c228d3bd104a1b input=37daa5fad1395d95]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002007{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002008 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002009}
2010
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002011/*[clinic input]
2012bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002013
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002014 self: self(type="PyBytesObject *")
2015 bytes: object = None
2016 /
2017
2018Strip leading bytes contained in the argument.
2019
2020If the argument is omitted or None, strip leading ASCII whitespace.
2021[clinic start generated code]*/
2022
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002023static PyObject *
2024bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002025/*[clinic end generated code: output=28602e586f524e82 input=88811b09dfbc2988]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002026{
2027 return do_argstrip(self, LEFTSTRIP, bytes);
2028}
2029
2030/*[clinic input]
2031bytes.rstrip
2032
2033 self: self(type="PyBytesObject *")
2034 bytes: object = None
2035 /
2036
2037Strip trailing bytes contained in the argument.
2038
2039If the argument is omitted or None, strip trailing ASCII whitespace.
2040[clinic start generated code]*/
2041
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002042static PyObject *
2043bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002044/*[clinic end generated code: output=547e3815c95447da input=8f93c9cd361f0140]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002045{
2046 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002047}
Neal Norwitz6968b052007-02-27 19:02:19 +00002048
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002049
2050PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002051"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002052\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002053Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002054string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002055as in slice notation.");
2056
2057static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002058bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002059{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002060 PyObject *sub_obj;
2061 const char *str = PyBytes_AS_STRING(self), *sub;
2062 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02002063 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002064 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002065
Antoine Pitrouac65d962011-10-20 23:54:17 +02002066 Py_buffer vsub;
2067 PyObject *count_obj;
2068
2069 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
2070 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002071 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002072
Antoine Pitrouac65d962011-10-20 23:54:17 +02002073 if (sub_obj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002074 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02002075 return NULL;
2076
2077 sub = vsub.buf;
2078 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002079 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02002080 else {
2081 sub = &byte;
2082 sub_len = 1;
2083 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002085 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002086
Antoine Pitrouac65d962011-10-20 23:54:17 +02002087 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002088 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2089 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02002090
2091 if (sub_obj)
2092 PyBuffer_Release(&vsub);
2093
2094 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002095}
2096
2097
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002098/*[clinic input]
2099bytes.translate
2100
2101 self: self(type="PyBytesObject *")
Victor Stinner049e5092014-08-17 22:20:00 +02002102 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002103 Translation table, which must be a bytes object of length 256.
2104 [
2105 deletechars: object
2106 ]
2107 /
2108
2109Return a copy with each character mapped by the given translation table.
2110
2111All characters occurring in the optional argument deletechars are removed.
2112The remaining characters are mapped through the given translation table.
2113[clinic start generated code]*/
2114
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002115static PyObject *
2116bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1, PyObject *deletechars)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002117/*[clinic end generated code: output=0ddd2cef4f4918f2 input=d8fa5519d7cc4be7]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002118{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002119 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002120 Py_buffer table_view = {NULL, NULL};
2121 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002122 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002123 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002124 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002125 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002126 Py_ssize_t inlen, tablen, dellen = 0;
2127 PyObject *result;
2128 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002129
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002130 if (PyBytes_Check(table)) {
2131 table_chars = PyBytes_AS_STRING(table);
2132 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002133 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002134 else if (table == Py_None) {
2135 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002136 tablen = 256;
2137 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002138 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002139 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002140 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002141 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002142 tablen = table_view.len;
2143 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002145 if (tablen != 256) {
2146 PyErr_SetString(PyExc_ValueError,
2147 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002148 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002149 return NULL;
2150 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002151
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002152 if (deletechars != NULL) {
2153 if (PyBytes_Check(deletechars)) {
2154 del_table_chars = PyBytes_AS_STRING(deletechars);
2155 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002156 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002157 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002158 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002159 PyBuffer_Release(&table_view);
2160 return NULL;
2161 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002162 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002163 dellen = del_table_view.len;
2164 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002165 }
2166 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002167 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002168 dellen = 0;
2169 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002170
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002171 inlen = PyBytes_GET_SIZE(input_obj);
2172 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002173 if (result == NULL) {
2174 PyBuffer_Release(&del_table_view);
2175 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002176 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002177 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002178 output_start = output = PyBytes_AsString(result);
2179 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002180
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002181 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002182 /* If no deletions are required, use faster code */
2183 for (i = inlen; --i >= 0; ) {
2184 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002185 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002186 changed = 1;
2187 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002188 if (!changed && PyBytes_CheckExact(input_obj)) {
2189 Py_INCREF(input_obj);
2190 Py_DECREF(result);
2191 result = input_obj;
2192 }
2193 PyBuffer_Release(&del_table_view);
2194 PyBuffer_Release(&table_view);
2195 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002196 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002197
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002198 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002199 for (i = 0; i < 256; i++)
2200 trans_table[i] = Py_CHARMASK(i);
2201 } else {
2202 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002203 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002204 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002205 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002206
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002207 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002208 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002209 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002210
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002211 for (i = inlen; --i >= 0; ) {
2212 c = Py_CHARMASK(*input++);
2213 if (trans_table[c] != -1)
2214 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2215 continue;
2216 changed = 1;
2217 }
2218 if (!changed && PyBytes_CheckExact(input_obj)) {
2219 Py_DECREF(result);
2220 Py_INCREF(input_obj);
2221 return input_obj;
2222 }
2223 /* Fix the size of the resulting string */
2224 if (inlen > 0)
2225 _PyBytes_Resize(&result, output - output_start);
2226 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002227}
2228
2229
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002230/*[clinic input]
2231
2232@staticmethod
2233bytes.maketrans
2234
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002235 frm: Py_buffer
2236 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002237 /
2238
2239Return a translation table useable for the bytes or bytearray translate method.
2240
2241The returned table will be one where each byte in frm is mapped to the byte at
2242the same position in to.
2243
2244The bytes objects frm and to must be of the same length.
2245[clinic start generated code]*/
2246
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002247static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002248bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002249/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002250{
2251 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002252}
2253
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002254/* find and count characters and substrings */
2255
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002256#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002257 ((char *)memchr((const void *)(target), c, target_len))
2258
2259/* String ops must return a string. */
2260/* If the object is subclass of string, create a copy */
2261Py_LOCAL(PyBytesObject *)
2262return_self(PyBytesObject *self)
2263{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002264 if (PyBytes_CheckExact(self)) {
2265 Py_INCREF(self);
2266 return self;
2267 }
2268 return (PyBytesObject *)PyBytes_FromStringAndSize(
2269 PyBytes_AS_STRING(self),
2270 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002271}
2272
2273Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00002274countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002275{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002276 Py_ssize_t count=0;
2277 const char *start=target;
2278 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002279
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002280 while ( (start=findchar(start, end-start, c)) != NULL ) {
2281 count++;
2282 if (count >= maxcount)
2283 break;
2284 start += 1;
2285 }
2286 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002287}
2288
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002289
2290/* Algorithms for different cases of string replacement */
2291
2292/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2293Py_LOCAL(PyBytesObject *)
2294replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002295 const char *to_s, Py_ssize_t to_len,
2296 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002297{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002298 char *self_s, *result_s;
2299 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002300 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002301 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002302
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002303 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002304
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002305 /* 1 at the end plus 1 after every character;
2306 count = min(maxcount, self_len + 1) */
2307 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002308 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002309 else
2310 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2311 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002312
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002313 /* Check for overflow */
2314 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002315 assert(count > 0);
2316 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002317 PyErr_SetString(PyExc_OverflowError,
2318 "replacement bytes are too long");
2319 return NULL;
2320 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002321 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002322
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002323 if (! (result = (PyBytesObject *)
2324 PyBytes_FromStringAndSize(NULL, result_len)) )
2325 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002326
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002327 self_s = PyBytes_AS_STRING(self);
2328 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002329
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002330 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002331
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002332 /* Lay the first one down (guaranteed this will occur) */
2333 Py_MEMCPY(result_s, to_s, to_len);
2334 result_s += to_len;
2335 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002336
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002337 for (i=0; i<count; i++) {
2338 *result_s++ = *self_s++;
2339 Py_MEMCPY(result_s, to_s, to_len);
2340 result_s += to_len;
2341 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002342
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002343 /* Copy the rest of the original string */
2344 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002345
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002346 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002347}
2348
2349/* Special case for deleting a single character */
2350/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2351Py_LOCAL(PyBytesObject *)
2352replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002353 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002354{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002355 char *self_s, *result_s;
2356 char *start, *next, *end;
2357 Py_ssize_t self_len, result_len;
2358 Py_ssize_t count;
2359 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002360
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002361 self_len = PyBytes_GET_SIZE(self);
2362 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002363
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002364 count = countchar(self_s, self_len, from_c, maxcount);
2365 if (count == 0) {
2366 return return_self(self);
2367 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002368
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002369 result_len = self_len - count; /* from_len == 1 */
2370 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002371
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002372 if ( (result = (PyBytesObject *)
2373 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2374 return NULL;
2375 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002376
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002377 start = self_s;
2378 end = self_s + self_len;
2379 while (count-- > 0) {
2380 next = findchar(start, end-start, from_c);
2381 if (next == NULL)
2382 break;
2383 Py_MEMCPY(result_s, start, next-start);
2384 result_s += (next-start);
2385 start = next+1;
2386 }
2387 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002388
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002389 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002390}
2391
2392/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2393
2394Py_LOCAL(PyBytesObject *)
2395replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002396 const char *from_s, Py_ssize_t from_len,
2397 Py_ssize_t maxcount) {
2398 char *self_s, *result_s;
2399 char *start, *next, *end;
2400 Py_ssize_t self_len, result_len;
2401 Py_ssize_t count, offset;
2402 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002403
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002404 self_len = PyBytes_GET_SIZE(self);
2405 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002406
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002407 count = stringlib_count(self_s, self_len,
2408 from_s, from_len,
2409 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002410
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002411 if (count == 0) {
2412 /* no matches */
2413 return return_self(self);
2414 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002415
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002416 result_len = self_len - (count * from_len);
2417 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002418
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002419 if ( (result = (PyBytesObject *)
2420 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2421 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002422
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002423 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002424
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002425 start = self_s;
2426 end = self_s + self_len;
2427 while (count-- > 0) {
2428 offset = stringlib_find(start, end-start,
2429 from_s, from_len,
2430 0);
2431 if (offset == -1)
2432 break;
2433 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002434
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002435 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002436
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002437 result_s += (next-start);
2438 start = next+from_len;
2439 }
2440 Py_MEMCPY(result_s, start, end-start);
2441 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002442}
2443
2444/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2445Py_LOCAL(PyBytesObject *)
2446replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002447 char from_c, char to_c,
2448 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002449{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002450 char *self_s, *result_s, *start, *end, *next;
2451 Py_ssize_t self_len;
2452 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002453
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002454 /* The result string will be the same size */
2455 self_s = PyBytes_AS_STRING(self);
2456 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002457
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002458 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002459
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002460 if (next == NULL) {
2461 /* No matches; return the original string */
2462 return return_self(self);
2463 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002464
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002465 /* Need to make a new string */
2466 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2467 if (result == NULL)
2468 return NULL;
2469 result_s = PyBytes_AS_STRING(result);
2470 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002471
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002472 /* change everything in-place, starting with this one */
2473 start = result_s + (next-self_s);
2474 *start = to_c;
2475 start++;
2476 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002477
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002478 while (--maxcount > 0) {
2479 next = findchar(start, end-start, from_c);
2480 if (next == NULL)
2481 break;
2482 *next = to_c;
2483 start = next+1;
2484 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002485
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002486 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002487}
2488
2489/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2490Py_LOCAL(PyBytesObject *)
2491replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002492 const char *from_s, Py_ssize_t from_len,
2493 const char *to_s, Py_ssize_t to_len,
2494 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002495{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002496 char *result_s, *start, *end;
2497 char *self_s;
2498 Py_ssize_t self_len, offset;
2499 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002500
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002501 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002502
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002503 self_s = PyBytes_AS_STRING(self);
2504 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002505
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002506 offset = stringlib_find(self_s, self_len,
2507 from_s, from_len,
2508 0);
2509 if (offset == -1) {
2510 /* No matches; return the original string */
2511 return return_self(self);
2512 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002513
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002514 /* Need to make a new string */
2515 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2516 if (result == NULL)
2517 return NULL;
2518 result_s = PyBytes_AS_STRING(result);
2519 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002520
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002521 /* change everything in-place, starting with this one */
2522 start = result_s + offset;
2523 Py_MEMCPY(start, to_s, from_len);
2524 start += from_len;
2525 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002526
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002527 while ( --maxcount > 0) {
2528 offset = stringlib_find(start, end-start,
2529 from_s, from_len,
2530 0);
2531 if (offset==-1)
2532 break;
2533 Py_MEMCPY(start+offset, to_s, from_len);
2534 start += offset+from_len;
2535 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002536
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002537 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002538}
2539
2540/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2541Py_LOCAL(PyBytesObject *)
2542replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002543 char from_c,
2544 const char *to_s, Py_ssize_t to_len,
2545 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002546{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002547 char *self_s, *result_s;
2548 char *start, *next, *end;
2549 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002550 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002551 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002552
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002553 self_s = PyBytes_AS_STRING(self);
2554 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002555
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002556 count = countchar(self_s, self_len, from_c, maxcount);
2557 if (count == 0) {
2558 /* no matches, return unchanged */
2559 return return_self(self);
2560 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002561
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002562 /* use the difference between current and new, hence the "-1" */
2563 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002564 assert(count > 0);
2565 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002566 PyErr_SetString(PyExc_OverflowError,
2567 "replacement bytes are too long");
2568 return NULL;
2569 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002570 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002571
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002572 if ( (result = (PyBytesObject *)
2573 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2574 return NULL;
2575 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002576
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002577 start = self_s;
2578 end = self_s + self_len;
2579 while (count-- > 0) {
2580 next = findchar(start, end-start, from_c);
2581 if (next == NULL)
2582 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002583
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002584 if (next == start) {
2585 /* replace with the 'to' */
2586 Py_MEMCPY(result_s, to_s, to_len);
2587 result_s += to_len;
2588 start += 1;
2589 } else {
2590 /* copy the unchanged old then the 'to' */
2591 Py_MEMCPY(result_s, start, next-start);
2592 result_s += (next-start);
2593 Py_MEMCPY(result_s, to_s, to_len);
2594 result_s += to_len;
2595 start = next+1;
2596 }
2597 }
2598 /* Copy the remainder of the remaining string */
2599 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002600
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002601 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002602}
2603
2604/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2605Py_LOCAL(PyBytesObject *)
2606replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002607 const char *from_s, Py_ssize_t from_len,
2608 const char *to_s, Py_ssize_t to_len,
2609 Py_ssize_t maxcount) {
2610 char *self_s, *result_s;
2611 char *start, *next, *end;
2612 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002613 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002614 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002615
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002616 self_s = PyBytes_AS_STRING(self);
2617 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002618
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002619 count = stringlib_count(self_s, self_len,
2620 from_s, from_len,
2621 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002622
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002623 if (count == 0) {
2624 /* no matches, return unchanged */
2625 return return_self(self);
2626 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002627
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002628 /* Check for overflow */
2629 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002630 assert(count > 0);
2631 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002632 PyErr_SetString(PyExc_OverflowError,
2633 "replacement bytes are too long");
2634 return NULL;
2635 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002636 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002637
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002638 if ( (result = (PyBytesObject *)
2639 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2640 return NULL;
2641 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002642
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002643 start = self_s;
2644 end = self_s + self_len;
2645 while (count-- > 0) {
2646 offset = stringlib_find(start, end-start,
2647 from_s, from_len,
2648 0);
2649 if (offset == -1)
2650 break;
2651 next = start+offset;
2652 if (next == start) {
2653 /* replace with the 'to' */
2654 Py_MEMCPY(result_s, to_s, to_len);
2655 result_s += to_len;
2656 start += from_len;
2657 } else {
2658 /* copy the unchanged old then the 'to' */
2659 Py_MEMCPY(result_s, start, next-start);
2660 result_s += (next-start);
2661 Py_MEMCPY(result_s, to_s, to_len);
2662 result_s += to_len;
2663 start = next+from_len;
2664 }
2665 }
2666 /* Copy the remainder of the remaining string */
2667 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002668
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002669 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002670}
2671
2672
2673Py_LOCAL(PyBytesObject *)
2674replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002675 const char *from_s, Py_ssize_t from_len,
2676 const char *to_s, Py_ssize_t to_len,
2677 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002678{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002679 if (maxcount < 0) {
2680 maxcount = PY_SSIZE_T_MAX;
2681 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2682 /* nothing to do; return the original string */
2683 return return_self(self);
2684 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002685
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002686 if (maxcount == 0 ||
2687 (from_len == 0 && to_len == 0)) {
2688 /* nothing to do; return the original string */
2689 return return_self(self);
2690 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002691
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002692 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002693
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002694 if (from_len == 0) {
2695 /* insert the 'to' string everywhere. */
2696 /* >>> "Python".replace("", ".") */
2697 /* '.P.y.t.h.o.n.' */
2698 return replace_interleave(self, to_s, to_len, maxcount);
2699 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002700
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002701 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2702 /* point for an empty self string to generate a non-empty string */
2703 /* Special case so the remaining code always gets a non-empty string */
2704 if (PyBytes_GET_SIZE(self) == 0) {
2705 return return_self(self);
2706 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002707
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002708 if (to_len == 0) {
2709 /* delete all occurrences of 'from' string */
2710 if (from_len == 1) {
2711 return replace_delete_single_character(
2712 self, from_s[0], maxcount);
2713 } else {
2714 return replace_delete_substring(self, from_s,
2715 from_len, maxcount);
2716 }
2717 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002718
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002719 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002720
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002721 if (from_len == to_len) {
2722 if (from_len == 1) {
2723 return replace_single_character_in_place(
2724 self,
2725 from_s[0],
2726 to_s[0],
2727 maxcount);
2728 } else {
2729 return replace_substring_in_place(
2730 self, from_s, from_len, to_s, to_len,
2731 maxcount);
2732 }
2733 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002734
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002735 /* Otherwise use the more generic algorithms */
2736 if (from_len == 1) {
2737 return replace_single_character(self, from_s[0],
2738 to_s, to_len, maxcount);
2739 } else {
2740 /* len('from')>=2, len('to')>=1 */
2741 return replace_substring(self, from_s, from_len, to_s, to_len,
2742 maxcount);
2743 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002744}
2745
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002746
2747/*[clinic input]
2748bytes.replace
2749
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002750 old: Py_buffer
2751 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002752 count: Py_ssize_t = -1
2753 Maximum number of occurrences to replace.
2754 -1 (the default value) means replace all occurrences.
2755 /
2756
2757Return a copy with all occurrences of substring old replaced by new.
2758
2759If the optional argument count is given, only the first count occurrences are
2760replaced.
2761[clinic start generated code]*/
2762
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002763static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002764bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new, Py_ssize_t count)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002765/*[clinic end generated code: output=3fe052c3c60cffc2 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002766{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002767 return (PyObject *)replace((PyBytesObject *) self,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002768 (const char *)old->buf, old->len,
2769 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002770}
2771
2772/** End DALKE **/
2773
2774/* Matches the end (direction >= 0) or start (direction < 0) of self
2775 * against substr, using the start and end arguments. Returns
2776 * -1 on error, 0 if not found and 1 if found.
2777 */
2778Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002779_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002780 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002781{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002782 Py_ssize_t len = PyBytes_GET_SIZE(self);
2783 Py_ssize_t slen;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002784 Py_buffer sub_view = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002785 const char* sub;
2786 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002787
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002788 if (PyBytes_Check(substr)) {
2789 sub = PyBytes_AS_STRING(substr);
2790 slen = PyBytes_GET_SIZE(substr);
2791 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002792 else {
2793 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
2794 return -1;
2795 sub = sub_view.buf;
2796 slen = sub_view.len;
2797 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002798 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002799
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002800 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002801
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002802 if (direction < 0) {
2803 /* startswith */
2804 if (start+slen > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002805 goto notfound;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002806 } else {
2807 /* endswith */
2808 if (end-start < slen || start > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002809 goto notfound;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002810
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002811 if (end-slen > start)
2812 start = end - slen;
2813 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002814 if (end-start < slen)
2815 goto notfound;
2816 if (memcmp(str+start, sub, slen) != 0)
2817 goto notfound;
2818
2819 PyBuffer_Release(&sub_view);
2820 return 1;
2821
2822notfound:
2823 PyBuffer_Release(&sub_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002824 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002825}
2826
2827
2828PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002829"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002830\n\
2831Return True if B starts with the specified prefix, False otherwise.\n\
2832With optional start, test B beginning at that position.\n\
2833With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002834prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002835
2836static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002837bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002838{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002839 Py_ssize_t start = 0;
2840 Py_ssize_t end = PY_SSIZE_T_MAX;
2841 PyObject *subobj;
2842 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002843
Jesus Ceaac451502011-04-20 17:09:23 +02002844 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002845 return NULL;
2846 if (PyTuple_Check(subobj)) {
2847 Py_ssize_t i;
2848 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2849 result = _bytes_tailmatch(self,
2850 PyTuple_GET_ITEM(subobj, i),
2851 start, end, -1);
2852 if (result == -1)
2853 return NULL;
2854 else if (result) {
2855 Py_RETURN_TRUE;
2856 }
2857 }
2858 Py_RETURN_FALSE;
2859 }
2860 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002861 if (result == -1) {
2862 if (PyErr_ExceptionMatches(PyExc_TypeError))
2863 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2864 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002865 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002866 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002867 else
2868 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002869}
2870
2871
2872PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002873"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002874\n\
2875Return True if B ends with the specified suffix, False otherwise.\n\
2876With optional start, test B beginning at that position.\n\
2877With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002878suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002879
2880static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002881bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002882{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002883 Py_ssize_t start = 0;
2884 Py_ssize_t end = PY_SSIZE_T_MAX;
2885 PyObject *subobj;
2886 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002887
Jesus Ceaac451502011-04-20 17:09:23 +02002888 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002889 return NULL;
2890 if (PyTuple_Check(subobj)) {
2891 Py_ssize_t i;
2892 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2893 result = _bytes_tailmatch(self,
2894 PyTuple_GET_ITEM(subobj, i),
2895 start, end, +1);
2896 if (result == -1)
2897 return NULL;
2898 else if (result) {
2899 Py_RETURN_TRUE;
2900 }
2901 }
2902 Py_RETURN_FALSE;
2903 }
2904 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002905 if (result == -1) {
2906 if (PyErr_ExceptionMatches(PyExc_TypeError))
2907 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2908 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002909 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002910 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002911 else
2912 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002913}
2914
2915
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002916/*[clinic input]
2917bytes.decode
2918
2919 encoding: str(c_default="NULL") = 'utf-8'
2920 The encoding with which to decode the bytes.
2921 errors: str(c_default="NULL") = 'strict'
2922 The error handling scheme to use for the handling of decoding errors.
2923 The default is 'strict' meaning that decoding errors raise a
2924 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2925 as well as any other name registered with codecs.register_error that
2926 can handle UnicodeDecodeErrors.
2927
2928Decode the bytes using the codec registered for encoding.
2929[clinic start generated code]*/
2930
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002931static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002932bytes_decode_impl(PyBytesObject*self, const char *encoding, const char *errors)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002933/*[clinic end generated code: output=8038751c823b9038 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002934{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002935 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002936}
2937
Guido van Rossum20188312006-05-05 15:15:40 +00002938
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002939/*[clinic input]
2940bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002941
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002942 keepends: int(py_default="False") = 0
2943
2944Return a list of the lines in the bytes, breaking at line boundaries.
2945
2946Line breaks are not included in the resulting list unless keepends is given and
2947true.
2948[clinic start generated code]*/
2949
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002950static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002951bytes_splitlines_impl(PyBytesObject*self, int keepends)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002952/*[clinic end generated code: output=995c3598f7833cad input=ddb93e3351080c8c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002953{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002954 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002955 (PyObject*) self, PyBytes_AS_STRING(self),
2956 PyBytes_GET_SIZE(self), keepends
2957 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002958}
2959
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002960static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002961hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002962{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002963 if (c >= 128)
2964 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002965 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002966 return c - '0';
2967 else {
David Malcolm96960882010-11-05 17:23:41 +00002968 if (Py_ISUPPER(c))
2969 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002970 if (c >= 'a' && c <= 'f')
2971 return c - 'a' + 10;
2972 }
2973 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002974}
2975
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002976/*[clinic input]
2977@classmethod
2978bytes.fromhex
2979
2980 string: unicode
2981 /
2982
2983Create a bytes object from a string of hexadecimal numbers.
2984
2985Spaces between two numbers are accepted.
2986Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2987[clinic start generated code]*/
2988
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002989static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002990bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002991/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002992{
2993 PyObject *newstring;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002994 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002995 Py_ssize_t hexlen, byteslen, i, j;
2996 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002997 void *data;
2998 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002999
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003000 assert(PyUnicode_Check(string));
3001 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003002 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003003 kind = PyUnicode_KIND(string);
3004 data = PyUnicode_DATA(string);
3005 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003006
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003007 byteslen = hexlen/2; /* This overestimates if there are spaces */
3008 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
3009 if (!newstring)
3010 return NULL;
3011 buf = PyBytes_AS_STRING(newstring);
3012 for (i = j = 0; i < hexlen; i += 2) {
3013 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003014 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003015 i++;
3016 if (i >= hexlen)
3017 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003018 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
3019 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003020 if (top == -1 || bot == -1) {
3021 PyErr_Format(PyExc_ValueError,
3022 "non-hexadecimal number found in "
3023 "fromhex() arg at position %zd", i);
3024 goto error;
3025 }
3026 buf[j++] = (top << 4) + bot;
3027 }
3028 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
3029 goto error;
3030 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003031
3032 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003033 Py_XDECREF(newstring);
3034 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003035}
3036
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003037static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003038bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003039{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003040 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003041}
3042
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003043
3044static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003045bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003046 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
3047 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3048 _Py_capitalize__doc__},
3049 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3050 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003051 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003052 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
3053 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02003054 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003055 expandtabs__doc__},
3056 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003057 BYTES_FROMHEX_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003058 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3059 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3060 _Py_isalnum__doc__},
3061 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3062 _Py_isalpha__doc__},
3063 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3064 _Py_isdigit__doc__},
3065 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3066 _Py_islower__doc__},
3067 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3068 _Py_isspace__doc__},
3069 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3070 _Py_istitle__doc__},
3071 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3072 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003073 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003074 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3075 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003076 BYTES_LSTRIP_METHODDEF
3077 BYTES_MAKETRANS_METHODDEF
3078 BYTES_PARTITION_METHODDEF
3079 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003080 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3081 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3082 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003083 BYTES_RPARTITION_METHODDEF
3084 BYTES_RSPLIT_METHODDEF
3085 BYTES_RSTRIP_METHODDEF
3086 BYTES_SPLIT_METHODDEF
3087 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003088 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
3089 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003090 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003091 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3092 _Py_swapcase__doc__},
3093 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003094 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003095 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3096 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003097 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003098};
3099
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003100static PyObject *
Ethan Furmanb95b5612015-01-23 20:05:18 -08003101bytes_mod(PyObject *v, PyObject *w)
3102{
3103 if (!PyBytes_Check(v))
3104 Py_RETURN_NOTIMPLEMENTED;
3105 return _PyBytes_Format(v, w);
3106}
3107
3108static PyNumberMethods bytes_as_number = {
3109 0, /*nb_add*/
3110 0, /*nb_subtract*/
3111 0, /*nb_multiply*/
3112 bytes_mod, /*nb_remainder*/
3113};
3114
3115static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003116str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3117
3118static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003119bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003120{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003121 PyObject *x = NULL;
3122 const char *encoding = NULL;
3123 const char *errors = NULL;
3124 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003125 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003126 Py_ssize_t size;
3127 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003128 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003129
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003130 if (type != &PyBytes_Type)
3131 return str_subtype_new(type, args, kwds);
3132 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3133 &encoding, &errors))
3134 return NULL;
3135 if (x == NULL) {
3136 if (encoding != NULL || errors != NULL) {
3137 PyErr_SetString(PyExc_TypeError,
3138 "encoding or errors without sequence "
3139 "argument");
3140 return NULL;
3141 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003142 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003143 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003145 if (PyUnicode_Check(x)) {
3146 /* Encode via the codec registry */
3147 if (encoding == NULL) {
3148 PyErr_SetString(PyExc_TypeError,
3149 "string argument without an encoding");
3150 return NULL;
3151 }
3152 new = PyUnicode_AsEncodedString(x, encoding, errors);
3153 if (new == NULL)
3154 return NULL;
3155 assert(PyBytes_Check(new));
3156 return new;
3157 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003158
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003159 /* If it's not unicode, there can't be encoding or errors */
3160 if (encoding != NULL || errors != NULL) {
3161 PyErr_SetString(PyExc_TypeError,
3162 "encoding or errors without a string argument");
3163 return NULL;
3164 }
3165
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003166 /* We'd like to call PyObject_Bytes here, but we need to check for an
3167 integer argument before deferring to PyBytes_FromObject, something
3168 PyObject_Bytes doesn't do. */
3169 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3170 if (func != NULL) {
3171 new = PyObject_CallFunctionObjArgs(func, NULL);
3172 Py_DECREF(func);
3173 if (new == NULL)
3174 return NULL;
3175 if (!PyBytes_Check(new)) {
3176 PyErr_Format(PyExc_TypeError,
3177 "__bytes__ returned non-bytes (type %.200s)",
3178 Py_TYPE(new)->tp_name);
3179 Py_DECREF(new);
3180 return NULL;
3181 }
3182 return new;
3183 }
3184 else if (PyErr_Occurred())
3185 return NULL;
3186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003187 /* Is it an integer? */
3188 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3189 if (size == -1 && PyErr_Occurred()) {
3190 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3191 return NULL;
3192 PyErr_Clear();
3193 }
3194 else if (size < 0) {
3195 PyErr_SetString(PyExc_ValueError, "negative count");
3196 return NULL;
3197 }
3198 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003199 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003200 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003201 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003202 return new;
3203 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003204
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003205 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003206}
3207
3208PyObject *
3209PyBytes_FromObject(PyObject *x)
3210{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003211 PyObject *new, *it;
3212 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003213
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003214 if (x == NULL) {
3215 PyErr_BadInternalCall();
3216 return NULL;
3217 }
Larry Hastingsca28e992012-05-24 22:58:30 -07003218
3219 if (PyBytes_CheckExact(x)) {
3220 Py_INCREF(x);
3221 return x;
3222 }
3223
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003224 /* Use the modern buffer interface */
3225 if (PyObject_CheckBuffer(x)) {
3226 Py_buffer view;
3227 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3228 return NULL;
3229 new = PyBytes_FromStringAndSize(NULL, view.len);
3230 if (!new)
3231 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003232 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3233 &view, view.len, 'C') < 0)
3234 goto fail;
3235 PyBuffer_Release(&view);
3236 return new;
3237 fail:
3238 Py_XDECREF(new);
3239 PyBuffer_Release(&view);
3240 return NULL;
3241 }
3242 if (PyUnicode_Check(x)) {
3243 PyErr_SetString(PyExc_TypeError,
3244 "cannot convert unicode object to bytes");
3245 return NULL;
3246 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003247
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003248 if (PyList_CheckExact(x)) {
3249 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3250 if (new == NULL)
3251 return NULL;
3252 for (i = 0; i < Py_SIZE(x); i++) {
3253 Py_ssize_t value = PyNumber_AsSsize_t(
3254 PyList_GET_ITEM(x, i), PyExc_ValueError);
3255 if (value == -1 && PyErr_Occurred()) {
3256 Py_DECREF(new);
3257 return NULL;
3258 }
3259 if (value < 0 || value >= 256) {
3260 PyErr_SetString(PyExc_ValueError,
3261 "bytes must be in range(0, 256)");
3262 Py_DECREF(new);
3263 return NULL;
3264 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003265 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003266 }
3267 return new;
3268 }
3269 if (PyTuple_CheckExact(x)) {
3270 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3271 if (new == NULL)
3272 return NULL;
3273 for (i = 0; i < Py_SIZE(x); i++) {
3274 Py_ssize_t value = PyNumber_AsSsize_t(
3275 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
3276 if (value == -1 && PyErr_Occurred()) {
3277 Py_DECREF(new);
3278 return NULL;
3279 }
3280 if (value < 0 || value >= 256) {
3281 PyErr_SetString(PyExc_ValueError,
3282 "bytes must be in range(0, 256)");
3283 Py_DECREF(new);
3284 return NULL;
3285 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003286 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003287 }
3288 return new;
3289 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00003290
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003291 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003292 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003293 if (size == -1 && PyErr_Occurred())
3294 return NULL;
3295 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
3296 returning a shared empty bytes string. This required because we
3297 want to call _PyBytes_Resize() the returned object, which we can
3298 only do on bytes objects with refcount == 1. */
Victor Stinner88d146b2014-08-17 21:12:18 +02003299 if (size == 0)
3300 size = 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003301 new = PyBytes_FromStringAndSize(NULL, size);
3302 if (new == NULL)
3303 return NULL;
Victor Stinner88d146b2014-08-17 21:12:18 +02003304 assert(Py_REFCNT(new) == 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003305
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003306 /* Get the iterator */
3307 it = PyObject_GetIter(x);
3308 if (it == NULL)
3309 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003310
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003311 /* Run the iterator to exhaustion */
3312 for (i = 0; ; i++) {
3313 PyObject *item;
3314 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003315
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003316 /* Get the next item */
3317 item = PyIter_Next(it);
3318 if (item == NULL) {
3319 if (PyErr_Occurred())
3320 goto error;
3321 break;
3322 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003323
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003324 /* Interpret it as an int (__index__) */
3325 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3326 Py_DECREF(item);
3327 if (value == -1 && PyErr_Occurred())
3328 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003329
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003330 /* Range check */
3331 if (value < 0 || value >= 256) {
3332 PyErr_SetString(PyExc_ValueError,
3333 "bytes must be in range(0, 256)");
3334 goto error;
3335 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003336
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003337 /* Append the byte */
3338 if (i >= size) {
3339 size = 2 * size + 1;
3340 if (_PyBytes_Resize(&new, size) < 0)
3341 goto error;
3342 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003343 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003344 }
3345 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003346
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003347 /* Clean up and return success */
3348 Py_DECREF(it);
3349 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003350
3351 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003352 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01003353 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003354 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003355}
3356
3357static PyObject *
3358str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3359{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003360 PyObject *tmp, *pnew;
3361 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003362
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003363 assert(PyType_IsSubtype(type, &PyBytes_Type));
3364 tmp = bytes_new(&PyBytes_Type, args, kwds);
3365 if (tmp == NULL)
3366 return NULL;
3367 assert(PyBytes_CheckExact(tmp));
3368 n = PyBytes_GET_SIZE(tmp);
3369 pnew = type->tp_alloc(type, n);
3370 if (pnew != NULL) {
3371 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3372 PyBytes_AS_STRING(tmp), n+1);
3373 ((PyBytesObject *)pnew)->ob_shash =
3374 ((PyBytesObject *)tmp)->ob_shash;
3375 }
3376 Py_DECREF(tmp);
3377 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003378}
3379
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003380PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003381"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003382bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003383bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003384bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3385bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003386\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003387Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003388 - an iterable yielding integers in range(256)\n\
3389 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003390 - any object implementing the buffer API.\n\
3391 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003392
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003393static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003394
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003395PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003396 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3397 "bytes",
3398 PyBytesObject_SIZE,
3399 sizeof(char),
3400 bytes_dealloc, /* tp_dealloc */
3401 0, /* tp_print */
3402 0, /* tp_getattr */
3403 0, /* tp_setattr */
3404 0, /* tp_reserved */
3405 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08003406 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003407 &bytes_as_sequence, /* tp_as_sequence */
3408 &bytes_as_mapping, /* tp_as_mapping */
3409 (hashfunc)bytes_hash, /* tp_hash */
3410 0, /* tp_call */
3411 bytes_str, /* tp_str */
3412 PyObject_GenericGetAttr, /* tp_getattro */
3413 0, /* tp_setattro */
3414 &bytes_as_buffer, /* tp_as_buffer */
3415 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3416 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3417 bytes_doc, /* tp_doc */
3418 0, /* tp_traverse */
3419 0, /* tp_clear */
3420 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3421 0, /* tp_weaklistoffset */
3422 bytes_iter, /* tp_iter */
3423 0, /* tp_iternext */
3424 bytes_methods, /* tp_methods */
3425 0, /* tp_members */
3426 0, /* tp_getset */
3427 &PyBaseObject_Type, /* tp_base */
3428 0, /* tp_dict */
3429 0, /* tp_descr_get */
3430 0, /* tp_descr_set */
3431 0, /* tp_dictoffset */
3432 0, /* tp_init */
3433 0, /* tp_alloc */
3434 bytes_new, /* tp_new */
3435 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003436};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003437
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003438void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003439PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003440{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003441 assert(pv != NULL);
3442 if (*pv == NULL)
3443 return;
3444 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003445 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003446 return;
3447 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003448
3449 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3450 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003451 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003452 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02003453
Antoine Pitrou161d6952014-05-01 14:36:20 +02003454 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003455 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02003456 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3457 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3458 Py_CLEAR(*pv);
3459 return;
3460 }
3461
3462 oldsize = PyBytes_GET_SIZE(*pv);
3463 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3464 PyErr_NoMemory();
3465 goto error;
3466 }
3467 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3468 goto error;
3469
3470 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3471 PyBuffer_Release(&wb);
3472 return;
3473
3474 error:
3475 PyBuffer_Release(&wb);
3476 Py_CLEAR(*pv);
3477 return;
3478 }
3479
3480 else {
3481 /* Multiple references, need to create new object */
3482 PyObject *v;
3483 v = bytes_concat(*pv, w);
3484 Py_DECREF(*pv);
3485 *pv = v;
3486 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003487}
3488
3489void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003490PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003491{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003492 PyBytes_Concat(pv, w);
3493 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003494}
3495
3496
Ethan Furmanb95b5612015-01-23 20:05:18 -08003497/* The following function breaks the notion that bytes are immutable:
3498 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003499 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08003500 as creating a new bytes object and destroying the old one, only
3501 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003502 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08003503 Note that if there's not enough memory to resize the bytes object, the
3504 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003505 memory" exception is set, and -1 is returned. Else (on success) 0 is
3506 returned, and the value in *pv may or may not be the same as on input.
3507 As always, an extra byte is allocated for a trailing \0 byte (newsize
3508 does *not* include that), and a trailing \0 byte is stored.
3509*/
3510
3511int
3512_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3513{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003514 PyObject *v;
3515 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003516 v = *pv;
3517 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3518 *pv = 0;
3519 Py_DECREF(v);
3520 PyErr_BadInternalCall();
3521 return -1;
3522 }
3523 /* XXX UNREF/NEWREF interface should be more symmetrical */
3524 _Py_DEC_REFTOTAL;
3525 _Py_ForgetReference(v);
3526 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003527 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003528 if (*pv == NULL) {
3529 PyObject_Del(v);
3530 PyErr_NoMemory();
3531 return -1;
3532 }
3533 _Py_NewReference(*pv);
3534 sv = (PyBytesObject *) *pv;
3535 Py_SIZE(sv) = newsize;
3536 sv->ob_sval[newsize] = '\0';
3537 sv->ob_shash = -1; /* invalidate cached hash value */
3538 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003539}
3540
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003541void
3542PyBytes_Fini(void)
3543{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003544 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003545 for (i = 0; i < UCHAR_MAX + 1; i++)
3546 Py_CLEAR(characters[i]);
3547 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003548}
3549
Benjamin Peterson4116f362008-05-27 00:36:20 +00003550/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003551
3552typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003553 PyObject_HEAD
3554 Py_ssize_t it_index;
3555 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003556} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003557
3558static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003559striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003560{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003561 _PyObject_GC_UNTRACK(it);
3562 Py_XDECREF(it->it_seq);
3563 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003564}
3565
3566static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003567striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003568{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003569 Py_VISIT(it->it_seq);
3570 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003571}
3572
3573static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003574striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003575{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003576 PyBytesObject *seq;
3577 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003578
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003579 assert(it != NULL);
3580 seq = it->it_seq;
3581 if (seq == NULL)
3582 return NULL;
3583 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003584
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003585 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3586 item = PyLong_FromLong(
3587 (unsigned char)seq->ob_sval[it->it_index]);
3588 if (item != NULL)
3589 ++it->it_index;
3590 return item;
3591 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003592
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003593 Py_DECREF(seq);
3594 it->it_seq = NULL;
3595 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003596}
3597
3598static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003599striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003600{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003601 Py_ssize_t len = 0;
3602 if (it->it_seq)
3603 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3604 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003605}
3606
3607PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003608 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003609
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003610static PyObject *
3611striter_reduce(striterobject *it)
3612{
3613 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003614 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003615 it->it_seq, it->it_index);
3616 } else {
3617 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3618 if (u == NULL)
3619 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02003620 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003621 }
3622}
3623
3624PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3625
3626static PyObject *
3627striter_setstate(striterobject *it, PyObject *state)
3628{
3629 Py_ssize_t index = PyLong_AsSsize_t(state);
3630 if (index == -1 && PyErr_Occurred())
3631 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003632 if (it->it_seq != NULL) {
3633 if (index < 0)
3634 index = 0;
3635 else if (index > PyBytes_GET_SIZE(it->it_seq))
3636 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3637 it->it_index = index;
3638 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003639 Py_RETURN_NONE;
3640}
3641
3642PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3643
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003644static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003645 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3646 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003647 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3648 reduce_doc},
3649 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3650 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003651 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003652};
3653
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003654PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003655 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3656 "bytes_iterator", /* tp_name */
3657 sizeof(striterobject), /* tp_basicsize */
3658 0, /* tp_itemsize */
3659 /* methods */
3660 (destructor)striter_dealloc, /* tp_dealloc */
3661 0, /* tp_print */
3662 0, /* tp_getattr */
3663 0, /* tp_setattr */
3664 0, /* tp_reserved */
3665 0, /* tp_repr */
3666 0, /* tp_as_number */
3667 0, /* tp_as_sequence */
3668 0, /* tp_as_mapping */
3669 0, /* tp_hash */
3670 0, /* tp_call */
3671 0, /* tp_str */
3672 PyObject_GenericGetAttr, /* tp_getattro */
3673 0, /* tp_setattro */
3674 0, /* tp_as_buffer */
3675 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3676 0, /* tp_doc */
3677 (traverseproc)striter_traverse, /* tp_traverse */
3678 0, /* tp_clear */
3679 0, /* tp_richcompare */
3680 0, /* tp_weaklistoffset */
3681 PyObject_SelfIter, /* tp_iter */
3682 (iternextfunc)striter_next, /* tp_iternext */
3683 striter_methods, /* tp_methods */
3684 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003685};
3686
3687static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003688bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003689{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003690 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003691
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003692 if (!PyBytes_Check(seq)) {
3693 PyErr_BadInternalCall();
3694 return NULL;
3695 }
3696 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3697 if (it == NULL)
3698 return NULL;
3699 it->it_index = 0;
3700 Py_INCREF(seq);
3701 it->it_seq = (PyBytesObject *)seq;
3702 _PyObject_GC_TRACK(it);
3703 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003704}