blob: d2b52c77b3329b71852799672a24ad963234a589 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +00008#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00009#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000010
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020011/*[clinic input]
Martin v. Löwis0efea322014-07-27 17:29:17 +020012class bytes "PyBytesObject*" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020013[clinic start generated code]*/
Martin v. Löwis0efea322014-07-27 17:29:17 +020014/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030016#include "clinic/bytesobject.c.h"
17
Christian Heimes2c9c7a52008-05-26 13:42:13 +000018#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000019Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000020#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000021
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022static PyBytesObject *characters[UCHAR_MAX + 1];
23static PyBytesObject *nullstring;
24
Mark Dickinsonfd24b322008-12-06 15:33:31 +000025/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
26 for a string of length n should request PyBytesObject_SIZE + n bytes.
27
28 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29 3 bytes per string allocation on a typical system.
30*/
31#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32
Christian Heimes2c9c7a52008-05-26 13:42:13 +000033/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000034 For PyBytes_FromString(), the parameter `str' points to a null-terminated
35 string containing exactly `size' bytes.
36
37 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
38 either NULL or else points to a string containing at least `size' bytes.
39 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
40 not have to be null-terminated. (Therefore it is safe to construct a
41 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
42 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
43 bytes (setting the last byte to the null terminating character) and you can
44 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000045 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000046 alter the data yourself, since the strings may be shared.
47
48 The PyObject member `op->ob_size', which denotes the number of "extra
49 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020050 allocated for string data, not counting the null terminating character.
51 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000052 PyBytes_FromStringAndSize()) or the length of the string in the `str'
53 parameter (for PyBytes_FromString()).
54*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020055static PyObject *
56_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000057{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020058 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020059 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020060
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000061 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000062#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000063 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000064#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 Py_INCREF(op);
66 return (PyObject *)op;
67 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000068
Victor Stinner049e5092014-08-17 22:20:00 +020069 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000070 PyErr_SetString(PyExc_OverflowError,
71 "byte string is too large");
72 return NULL;
73 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000074
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000075 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020076 if (use_calloc)
77 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
78 else
79 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 if (op == NULL)
81 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010082 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020084 if (!use_calloc)
85 op->ob_sval[size] = '\0';
86 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 if (size == 0) {
88 nullstring = op;
89 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020090 }
91 return (PyObject *) op;
92}
93
94PyObject *
95PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
96{
97 PyBytesObject *op;
98 if (size < 0) {
99 PyErr_SetString(PyExc_SystemError,
100 "Negative size passed to PyBytes_FromStringAndSize");
101 return NULL;
102 }
103 if (size == 1 && str != NULL &&
104 (op = characters[*str & UCHAR_MAX]) != NULL)
105 {
106#ifdef COUNT_ALLOCS
107 one_strings++;
108#endif
109 Py_INCREF(op);
110 return (PyObject *)op;
111 }
112
113 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
114 if (op == NULL)
115 return NULL;
116 if (str == NULL)
117 return (PyObject *) op;
118
119 Py_MEMCPY(op->ob_sval, str, size);
120 /* share short strings */
121 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 characters[*str & UCHAR_MAX] = op;
123 Py_INCREF(op);
124 }
125 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000126}
127
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000128PyObject *
129PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000130{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200131 size_t size;
132 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 assert(str != NULL);
135 size = strlen(str);
136 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
137 PyErr_SetString(PyExc_OverflowError,
138 "byte string is too long");
139 return NULL;
140 }
141 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000144#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
148 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000151#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 Py_INCREF(op);
153 return (PyObject *)op;
154 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 /* Inline PyObject_NewVar */
157 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
158 if (op == NULL)
159 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100160 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 op->ob_shash = -1;
162 Py_MEMCPY(op->ob_sval, str, size+1);
163 /* share short strings */
164 if (size == 0) {
165 nullstring = op;
166 Py_INCREF(op);
167 } else if (size == 1) {
168 characters[*str & UCHAR_MAX] = op;
169 Py_INCREF(op);
170 }
171 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000172}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000173
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000174PyObject *
175PyBytes_FromFormatV(const char *format, va_list vargs)
176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 va_list count;
178 Py_ssize_t n = 0;
179 const char* f;
180 char *s;
181 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000182
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000183 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 /* step 1: figure out how large a buffer we need */
185 for (f = format; *f; f++) {
186 if (*f == '%') {
187 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000188 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
192 * they don't affect the amount of space we reserve.
193 */
194 if ((*f == 'l' || *f == 'z') &&
195 (f[1] == 'd' || f[1] == 'u'))
196 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000197
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000198 switch (*f) {
199 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100200 {
201 int c = va_arg(count, int);
202 if (c < 0 || c > 255) {
203 PyErr_SetString(PyExc_OverflowError,
204 "PyBytes_FromFormatV(): %c format "
205 "expects an integer in range [0; 255]");
206 return NULL;
207 }
208 n++;
209 break;
210 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 case '%':
212 n++;
213 break;
214 case 'd': case 'u': case 'i': case 'x':
215 (void) va_arg(count, int);
216 /* 20 bytes is enough to hold a 64-bit
217 integer. Decimal takes the most space.
218 This isn't enough for octal. */
219 n += 20;
220 break;
221 case 's':
222 s = va_arg(count, char*);
223 n += strlen(s);
224 break;
225 case 'p':
226 (void) va_arg(count, int);
227 /* maximum 64-bit pointer representation:
228 * 0xffffffffffffffff
229 * so 19 characters is enough.
230 * XXX I count 18 -- what's the extra for?
231 */
232 n += 19;
233 break;
234 default:
235 /* if we stumble upon an unknown
236 formatting code, copy the rest of
237 the format string to the output
238 string. (we cannot just skip the
239 code, since there's no way to know
240 what's in the argument list) */
241 n += strlen(p);
242 goto expand;
243 }
244 } else
245 n++;
246 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000247 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 /* step 2: fill the buffer */
249 /* Since we've analyzed how much space we need for the worst case,
250 use sprintf directly instead of the slower PyOS_snprintf. */
251 string = PyBytes_FromStringAndSize(NULL, n);
252 if (!string)
253 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000254
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000256
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000257 for (f = format; *f; f++) {
258 if (*f == '%') {
259 const char* p = f++;
260 Py_ssize_t i;
261 int longflag = 0;
262 int size_tflag = 0;
263 /* parse the width.precision part (we're only
264 interested in the precision value, if any) */
265 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000266 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000267 n = (n*10) + *f++ - '0';
268 if (*f == '.') {
269 f++;
270 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000271 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 n = (n*10) + *f++ - '0';
273 }
David Malcolm96960882010-11-05 17:23:41 +0000274 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 f++;
276 /* handle the long flag, but only for %ld and %lu.
277 others can be added when necessary. */
278 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
279 longflag = 1;
280 ++f;
281 }
282 /* handle the size_t flag. */
283 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
284 size_tflag = 1;
285 ++f;
286 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000287
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000288 switch (*f) {
289 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100290 {
291 int c = va_arg(vargs, int);
292 /* c has been checked for overflow in the first step */
293 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000294 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100295 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 case 'd':
297 if (longflag)
298 sprintf(s, "%ld", va_arg(vargs, long));
299 else if (size_tflag)
300 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
301 va_arg(vargs, Py_ssize_t));
302 else
303 sprintf(s, "%d", va_arg(vargs, int));
304 s += strlen(s);
305 break;
306 case 'u':
307 if (longflag)
308 sprintf(s, "%lu",
309 va_arg(vargs, unsigned long));
310 else if (size_tflag)
311 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
312 va_arg(vargs, size_t));
313 else
314 sprintf(s, "%u",
315 va_arg(vargs, unsigned int));
316 s += strlen(s);
317 break;
318 case 'i':
319 sprintf(s, "%i", va_arg(vargs, int));
320 s += strlen(s);
321 break;
322 case 'x':
323 sprintf(s, "%x", va_arg(vargs, int));
324 s += strlen(s);
325 break;
326 case 's':
327 p = va_arg(vargs, char*);
328 i = strlen(p);
329 if (n > 0 && i > n)
330 i = n;
331 Py_MEMCPY(s, p, i);
332 s += i;
333 break;
334 case 'p':
335 sprintf(s, "%p", va_arg(vargs, void*));
336 /* %p is ill-defined: ensure leading 0x. */
337 if (s[1] == 'X')
338 s[1] = 'x';
339 else if (s[1] != 'x') {
340 memmove(s+2, s, strlen(s)+1);
341 s[0] = '0';
342 s[1] = 'x';
343 }
344 s += strlen(s);
345 break;
346 case '%':
347 *s++ = '%';
348 break;
349 default:
350 strcpy(s, p);
351 s += strlen(s);
352 goto end;
353 }
354 } else
355 *s++ = *f;
356 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000357
358 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
360 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361}
362
363PyObject *
364PyBytes_FromFormat(const char *format, ...)
365{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 PyObject* ret;
367 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368
369#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000373#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000374 ret = PyBytes_FromFormatV(format, vargs);
375 va_end(vargs);
376 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000377}
378
Ethan Furmanb95b5612015-01-23 20:05:18 -0800379/* Helpers for formatstring */
380
381Py_LOCAL_INLINE(PyObject *)
382getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
383{
384 Py_ssize_t argidx = *p_argidx;
385 if (argidx < arglen) {
386 (*p_argidx)++;
387 if (arglen < 0)
388 return args;
389 else
390 return PyTuple_GetItem(args, argidx);
391 }
392 PyErr_SetString(PyExc_TypeError,
393 "not enough arguments for format string");
394 return NULL;
395}
396
397/* Format codes
398 * F_LJUST '-'
399 * F_SIGN '+'
400 * F_BLANK ' '
401 * F_ALT '#'
402 * F_ZERO '0'
403 */
404#define F_LJUST (1<<0)
405#define F_SIGN (1<<1)
406#define F_BLANK (1<<2)
407#define F_ALT (1<<3)
408#define F_ZERO (1<<4)
409
410/* Returns a new reference to a PyBytes object, or NULL on failure. */
411
412static PyObject *
413formatfloat(PyObject *v, int flags, int prec, int type)
414{
415 char *p;
416 PyObject *result;
417 double x;
418
419 x = PyFloat_AsDouble(v);
420 if (x == -1.0 && PyErr_Occurred()) {
421 PyErr_Format(PyExc_TypeError, "float argument required, "
422 "not %.200s", Py_TYPE(v)->tp_name);
423 return NULL;
424 }
425
426 if (prec < 0)
427 prec = 6;
428
429 p = PyOS_double_to_string(x, type, prec,
430 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
431
432 if (p == NULL)
433 return NULL;
434 result = PyBytes_FromStringAndSize(p, strlen(p));
435 PyMem_Free(p);
436 return result;
437}
438
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300439static PyObject *
440formatlong(PyObject *v, int flags, int prec, int type)
441{
442 PyObject *result, *iobj;
443 if (type == 'i')
444 type = 'd';
445 if (PyLong_Check(v))
446 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
447 if (PyNumber_Check(v)) {
448 /* make sure number is a type of integer for o, x, and X */
449 if (type == 'o' || type == 'x' || type == 'X')
450 iobj = PyNumber_Index(v);
451 else
452 iobj = PyNumber_Long(v);
453 if (iobj == NULL) {
454 if (!PyErr_ExceptionMatches(PyExc_TypeError))
455 return NULL;
456 }
457 else if (!PyLong_Check(iobj))
458 Py_CLEAR(iobj);
459 if (iobj != NULL) {
460 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
461 Py_DECREF(iobj);
462 return result;
463 }
464 }
465 PyErr_Format(PyExc_TypeError,
466 "%%%c format: %s is required, not %.200s", type,
467 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
468 : "a number",
469 Py_TYPE(v)->tp_name);
470 return NULL;
471}
472
473static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200474byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800475{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200476 if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) {
477 *p = PyBytes_AS_STRING(arg)[0];
478 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800479 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200480 else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) {
481 *p = PyByteArray_AS_STRING(arg)[0];
482 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800483 }
484 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300485 PyObject *iobj;
486 long ival;
487 int overflow;
488 /* make sure number is a type of integer */
489 if (PyLong_Check(arg)) {
490 ival = PyLong_AsLongAndOverflow(arg, &overflow);
491 }
492 else {
493 iobj = PyNumber_Index(arg);
494 if (iobj == NULL) {
495 if (!PyErr_ExceptionMatches(PyExc_TypeError))
496 return 0;
497 goto onError;
498 }
499 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
500 Py_DECREF(iobj);
501 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300502 if (!overflow && ival == -1 && PyErr_Occurred())
503 goto onError;
504 if (overflow || !(0 <= ival && ival <= 255)) {
505 PyErr_SetString(PyExc_OverflowError,
506 "%c arg not in range(256)");
507 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800508 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300509 *p = (char)ival;
510 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800511 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300512 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200513 PyErr_SetString(PyExc_TypeError,
514 "%c requires an integer in range(256) or a single byte");
515 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800516}
517
518static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200519format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800520{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200521 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800522 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800523 /* is it a bytes object? */
524 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200525 *pbuf = PyBytes_AS_STRING(v);
526 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800527 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200528 return v;
529 }
530 if (PyByteArray_Check(v)) {
531 *pbuf = PyByteArray_AS_STRING(v);
532 *plen = PyByteArray_GET_SIZE(v);
533 Py_INCREF(v);
534 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800535 }
536 /* does it support __bytes__? */
537 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
538 if (func != NULL) {
539 result = PyObject_CallFunctionObjArgs(func, NULL);
540 Py_DECREF(func);
541 if (result == NULL)
542 return NULL;
543 if (!PyBytes_Check(result)) {
544 PyErr_Format(PyExc_TypeError,
545 "__bytes__ returned non-bytes (type %.200s)",
546 Py_TYPE(result)->tp_name);
547 Py_DECREF(result);
548 return NULL;
549 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200550 *pbuf = PyBytes_AS_STRING(result);
551 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800552 return result;
553 }
554 PyErr_Format(PyExc_TypeError,
555 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
556 Py_TYPE(v)->tp_name);
557 return NULL;
558}
559
560/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
561
562 FORMATBUFLEN is the length of the buffer in which the ints &
563 chars are formatted. XXX This is a magic number. Each formatting
564 routine does bounds checking to ensure no overflow, but a better
565 solution may be to malloc a buffer of appropriate size for each
566 format. For now, the current solution is sufficient.
567*/
568#define FORMATBUFLEN (size_t)120
569
570PyObject *
571_PyBytes_Format(PyObject *format, PyObject *args)
572{
573 char *fmt, *res;
574 Py_ssize_t arglen, argidx;
575 Py_ssize_t reslen, rescnt, fmtcnt;
576 int args_owned = 0;
577 PyObject *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800578 PyObject *dict = NULL;
579 if (format == NULL || !PyBytes_Check(format) || args == NULL) {
580 PyErr_BadInternalCall();
581 return NULL;
582 }
583 fmt = PyBytes_AS_STRING(format);
584 fmtcnt = PyBytes_GET_SIZE(format);
585 reslen = rescnt = fmtcnt + 100;
586 result = PyBytes_FromStringAndSize((char *)NULL, reslen);
587 if (result == NULL)
588 return NULL;
589 res = PyBytes_AsString(result);
590 if (PyTuple_Check(args)) {
591 arglen = PyTuple_GET_SIZE(args);
592 argidx = 0;
593 }
594 else {
595 arglen = -1;
596 argidx = -2;
597 }
598 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
599 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
600 !PyByteArray_Check(args)) {
601 dict = args;
602 }
603 while (--fmtcnt >= 0) {
604 if (*fmt != '%') {
605 if (--rescnt < 0) {
606 rescnt = fmtcnt + 100;
607 reslen += rescnt;
608 if (_PyBytes_Resize(&result, reslen))
609 return NULL;
610 res = PyBytes_AS_STRING(result)
611 + reslen - rescnt;
612 --rescnt;
613 }
614 *res++ = *fmt++;
615 }
616 else {
617 /* Got a format specifier */
618 int flags = 0;
619 Py_ssize_t width = -1;
620 int prec = -1;
621 int c = '\0';
622 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800623 PyObject *v = NULL;
624 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200625 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800626 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200627 Py_ssize_t len = 0;
628 char onechar; /* For byte_converter() */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800629
Ethan Furmanb95b5612015-01-23 20:05:18 -0800630 fmt++;
631 if (*fmt == '(') {
632 char *keystart;
633 Py_ssize_t keylen;
634 PyObject *key;
635 int pcount = 1;
636
637 if (dict == NULL) {
638 PyErr_SetString(PyExc_TypeError,
639 "format requires a mapping");
640 goto error;
641 }
642 ++fmt;
643 --fmtcnt;
644 keystart = fmt;
645 /* Skip over balanced parentheses */
646 while (pcount > 0 && --fmtcnt >= 0) {
647 if (*fmt == ')')
648 --pcount;
649 else if (*fmt == '(')
650 ++pcount;
651 fmt++;
652 }
653 keylen = fmt - keystart - 1;
654 if (fmtcnt < 0 || pcount > 0) {
655 PyErr_SetString(PyExc_ValueError,
656 "incomplete format key");
657 goto error;
658 }
659 key = PyBytes_FromStringAndSize(keystart,
660 keylen);
661 if (key == NULL)
662 goto error;
663 if (args_owned) {
664 Py_DECREF(args);
665 args_owned = 0;
666 }
667 args = PyObject_GetItem(dict, key);
668 Py_DECREF(key);
669 if (args == NULL) {
670 goto error;
671 }
672 args_owned = 1;
673 arglen = -1;
674 argidx = -2;
675 }
676 while (--fmtcnt >= 0) {
677 switch (c = *fmt++) {
678 case '-': flags |= F_LJUST; continue;
679 case '+': flags |= F_SIGN; continue;
680 case ' ': flags |= F_BLANK; continue;
681 case '#': flags |= F_ALT; continue;
682 case '0': flags |= F_ZERO; continue;
683 }
684 break;
685 }
686 if (c == '*') {
687 v = getnextarg(args, arglen, &argidx);
688 if (v == NULL)
689 goto error;
690 if (!PyLong_Check(v)) {
691 PyErr_SetString(PyExc_TypeError,
692 "* wants int");
693 goto error;
694 }
695 width = PyLong_AsSsize_t(v);
696 if (width == -1 && PyErr_Occurred())
697 goto error;
698 if (width < 0) {
699 flags |= F_LJUST;
700 width = -width;
701 }
702 if (--fmtcnt >= 0)
703 c = *fmt++;
704 }
705 else if (c >= 0 && isdigit(c)) {
706 width = c - '0';
707 while (--fmtcnt >= 0) {
708 c = Py_CHARMASK(*fmt++);
709 if (!isdigit(c))
710 break;
711 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
712 PyErr_SetString(
713 PyExc_ValueError,
714 "width too big");
715 goto error;
716 }
717 width = width*10 + (c - '0');
718 }
719 }
720 if (c == '.') {
721 prec = 0;
722 if (--fmtcnt >= 0)
723 c = *fmt++;
724 if (c == '*') {
725 v = getnextarg(args, arglen, &argidx);
726 if (v == NULL)
727 goto error;
728 if (!PyLong_Check(v)) {
729 PyErr_SetString(
730 PyExc_TypeError,
731 "* wants int");
732 goto error;
733 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200734 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800735 if (prec == -1 && PyErr_Occurred())
736 goto error;
737 if (prec < 0)
738 prec = 0;
739 if (--fmtcnt >= 0)
740 c = *fmt++;
741 }
742 else if (c >= 0 && isdigit(c)) {
743 prec = c - '0';
744 while (--fmtcnt >= 0) {
745 c = Py_CHARMASK(*fmt++);
746 if (!isdigit(c))
747 break;
748 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
749 PyErr_SetString(
750 PyExc_ValueError,
751 "prec too big");
752 goto error;
753 }
754 prec = prec*10 + (c - '0');
755 }
756 }
757 } /* prec */
758 if (fmtcnt >= 0) {
759 if (c == 'h' || c == 'l' || c == 'L') {
760 if (--fmtcnt >= 0)
761 c = *fmt++;
762 }
763 }
764 if (fmtcnt < 0) {
765 PyErr_SetString(PyExc_ValueError,
766 "incomplete format");
767 goto error;
768 }
769 if (c != '%') {
770 v = getnextarg(args, arglen, &argidx);
771 if (v == NULL)
772 goto error;
773 }
774 sign = 0;
775 fill = ' ';
776 switch (c) {
777 case '%':
778 pbuf = "%";
779 len = 1;
780 break;
Ethan Furman62e977f2015-03-11 08:17:00 -0700781 case 'r':
782 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800783 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200784 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800785 if (temp == NULL)
786 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200787 assert(PyUnicode_IS_ASCII(temp));
788 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
789 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800790 if (prec >= 0 && len > prec)
791 len = prec;
792 break;
793 case 's':
794 // %s is only for 2/3 code; 3 only code should use %b
795 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200796 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800797 if (temp == NULL)
798 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800799 if (prec >= 0 && len > prec)
800 len = prec;
801 break;
802 case 'i':
803 case 'd':
804 case 'u':
805 case 'o':
806 case 'x':
807 case 'X':
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300808 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200809 if (!temp)
810 goto error;
811 assert(PyUnicode_IS_ASCII(temp));
812 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
813 len = PyUnicode_GET_LENGTH(temp);
814 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800815 if (flags & F_ZERO)
816 fill = '0';
817 break;
818 case 'e':
819 case 'E':
820 case 'f':
821 case 'F':
822 case 'g':
823 case 'G':
824 temp = formatfloat(v, flags, prec, c);
825 if (temp == NULL)
826 goto error;
827 pbuf = PyBytes_AS_STRING(temp);
828 len = PyBytes_GET_SIZE(temp);
829 sign = 1;
830 if (flags & F_ZERO)
831 fill = '0';
832 break;
833 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200834 pbuf = &onechar;
835 len = byte_converter(v, &onechar);
836 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800837 goto error;
838 break;
839 default:
840 PyErr_Format(PyExc_ValueError,
841 "unsupported format character '%c' (0x%x) "
842 "at index %zd",
843 c, c,
844 (Py_ssize_t)(fmt - 1 -
845 PyBytes_AsString(format)));
846 goto error;
847 }
848 if (sign) {
849 if (*pbuf == '-' || *pbuf == '+') {
850 sign = *pbuf++;
851 len--;
852 }
853 else if (flags & F_SIGN)
854 sign = '+';
855 else if (flags & F_BLANK)
856 sign = ' ';
857 else
858 sign = 0;
859 }
860 if (width < len)
861 width = len;
862 if (rescnt - (sign != 0) < width) {
863 reslen -= rescnt;
864 rescnt = width + fmtcnt + 100;
865 reslen += rescnt;
866 if (reslen < 0) {
867 Py_DECREF(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800868 Py_XDECREF(temp);
869 return PyErr_NoMemory();
870 }
871 if (_PyBytes_Resize(&result, reslen)) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800872 Py_XDECREF(temp);
873 return NULL;
874 }
875 res = PyBytes_AS_STRING(result)
876 + reslen - rescnt;
877 }
878 if (sign) {
879 if (fill != ' ')
880 *res++ = sign;
881 rescnt--;
882 if (width > len)
883 width--;
884 }
885 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
886 assert(pbuf[0] == '0');
887 assert(pbuf[1] == c);
888 if (fill != ' ') {
889 *res++ = *pbuf++;
890 *res++ = *pbuf++;
891 }
892 rescnt -= 2;
893 width -= 2;
894 if (width < 0)
895 width = 0;
896 len -= 2;
897 }
898 if (width > len && !(flags & F_LJUST)) {
899 do {
900 --rescnt;
901 *res++ = fill;
902 } while (--width > len);
903 }
904 if (fill == ' ') {
905 if (sign)
906 *res++ = sign;
907 if ((flags & F_ALT) &&
908 (c == 'x' || c == 'X')) {
909 assert(pbuf[0] == '0');
910 assert(pbuf[1] == c);
911 *res++ = *pbuf++;
912 *res++ = *pbuf++;
913 }
914 }
915 Py_MEMCPY(res, pbuf, len);
916 res += len;
917 rescnt -= len;
918 while (--width >= len) {
919 --rescnt;
920 *res++ = ' ';
921 }
922 if (dict && (argidx < arglen) && c != '%') {
923 PyErr_SetString(PyExc_TypeError,
924 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -0800925 Py_XDECREF(temp);
926 goto error;
927 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800928 Py_XDECREF(temp);
929 } /* '%' */
930 } /* until end */
931 if (argidx < arglen && !dict) {
932 PyErr_SetString(PyExc_TypeError,
933 "not all arguments converted during bytes formatting");
934 goto error;
935 }
936 if (args_owned) {
937 Py_DECREF(args);
938 }
939 if (_PyBytes_Resize(&result, reslen - rescnt))
940 return NULL;
941 return result;
942
943 error:
944 Py_DECREF(result);
945 if (args_owned) {
946 Py_DECREF(args);
947 }
948 return NULL;
949}
950
951/* =-= */
952
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000953static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +0000954bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000955{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000956 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000957}
958
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000959/* Unescape a backslash-escaped string. If unicode is non-zero,
960 the string is a u-literal. If recode_encoding is non-zero,
961 the string is UTF-8 encoded and should be re-encoded in the
962 specified encoding. */
963
964PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000965 Py_ssize_t len,
966 const char *errors,
967 Py_ssize_t unicode,
968 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000969{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000970 int c;
971 char *p, *buf;
972 const char *end;
973 PyObject *v;
974 Py_ssize_t newlen = recode_encoding ? 4*len:len;
975 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
976 if (v == NULL)
977 return NULL;
978 p = buf = PyBytes_AsString(v);
979 end = s + len;
980 while (s < end) {
981 if (*s != '\\') {
982 non_esc:
983 if (recode_encoding && (*s & 0x80)) {
984 PyObject *u, *w;
985 char *r;
986 const char* t;
987 Py_ssize_t rn;
988 t = s;
989 /* Decode non-ASCII bytes as UTF-8. */
990 while (t < end && (*t & 0x80)) t++;
991 u = PyUnicode_DecodeUTF8(s, t - s, errors);
992 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000993
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000994 /* Recode them in target encoding. */
995 w = PyUnicode_AsEncodedString(
996 u, recode_encoding, errors);
997 Py_DECREF(u);
998 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000999
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001000 /* Append bytes to output buffer. */
1001 assert(PyBytes_Check(w));
1002 r = PyBytes_AS_STRING(w);
1003 rn = PyBytes_GET_SIZE(w);
1004 Py_MEMCPY(p, r, rn);
1005 p += rn;
1006 Py_DECREF(w);
1007 s = t;
1008 } else {
1009 *p++ = *s++;
1010 }
1011 continue;
1012 }
1013 s++;
1014 if (s==end) {
1015 PyErr_SetString(PyExc_ValueError,
1016 "Trailing \\ in string");
1017 goto failed;
1018 }
1019 switch (*s++) {
1020 /* XXX This assumes ASCII! */
1021 case '\n': break;
1022 case '\\': *p++ = '\\'; break;
1023 case '\'': *p++ = '\''; break;
1024 case '\"': *p++ = '\"'; break;
1025 case 'b': *p++ = '\b'; break;
1026 case 'f': *p++ = '\014'; break; /* FF */
1027 case 't': *p++ = '\t'; break;
1028 case 'n': *p++ = '\n'; break;
1029 case 'r': *p++ = '\r'; break;
1030 case 'v': *p++ = '\013'; break; /* VT */
1031 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1032 case '0': case '1': case '2': case '3':
1033 case '4': case '5': case '6': case '7':
1034 c = s[-1] - '0';
1035 if (s < end && '0' <= *s && *s <= '7') {
1036 c = (c<<3) + *s++ - '0';
1037 if (s < end && '0' <= *s && *s <= '7')
1038 c = (c<<3) + *s++ - '0';
1039 }
1040 *p++ = c;
1041 break;
1042 case 'x':
David Malcolm96960882010-11-05 17:23:41 +00001043 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001044 unsigned int x = 0;
1045 c = Py_CHARMASK(*s);
1046 s++;
David Malcolm96960882010-11-05 17:23:41 +00001047 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001048 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001049 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001050 x = 10 + c - 'a';
1051 else
1052 x = 10 + c - 'A';
1053 x = x << 4;
1054 c = Py_CHARMASK(*s);
1055 s++;
David Malcolm96960882010-11-05 17:23:41 +00001056 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001057 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001058 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001059 x += 10 + c - 'a';
1060 else
1061 x += 10 + c - 'A';
1062 *p++ = x;
1063 break;
1064 }
1065 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001066 PyErr_Format(PyExc_ValueError,
1067 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001068 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001069 goto failed;
1070 }
1071 if (strcmp(errors, "replace") == 0) {
1072 *p++ = '?';
1073 } else if (strcmp(errors, "ignore") == 0)
1074 /* do nothing */;
1075 else {
1076 PyErr_Format(PyExc_ValueError,
1077 "decoding error; unknown "
1078 "error handling code: %.400s",
1079 errors);
1080 goto failed;
1081 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001082 /* skip \x */
1083 if (s < end && Py_ISXDIGIT(s[0]))
1084 s++; /* and a hexdigit */
1085 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 default:
1087 *p++ = '\\';
1088 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001089 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001090 UTF-8 bytes may follow. */
1091 }
1092 }
1093 if (p-buf < newlen)
1094 _PyBytes_Resize(&v, p - buf);
1095 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001096 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 Py_DECREF(v);
1098 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001099}
1100
1101/* -------------------------------------------------------------------- */
1102/* object api */
1103
1104Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001105PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001106{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001107 if (!PyBytes_Check(op)) {
1108 PyErr_Format(PyExc_TypeError,
1109 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1110 return -1;
1111 }
1112 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001113}
1114
1115char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001116PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001117{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001118 if (!PyBytes_Check(op)) {
1119 PyErr_Format(PyExc_TypeError,
1120 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1121 return NULL;
1122 }
1123 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001124}
1125
1126int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001127PyBytes_AsStringAndSize(PyObject *obj,
1128 char **s,
1129 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001130{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001131 if (s == NULL) {
1132 PyErr_BadInternalCall();
1133 return -1;
1134 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001135
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001136 if (!PyBytes_Check(obj)) {
1137 PyErr_Format(PyExc_TypeError,
1138 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1139 return -1;
1140 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001141
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001142 *s = PyBytes_AS_STRING(obj);
1143 if (len != NULL)
1144 *len = PyBytes_GET_SIZE(obj);
1145 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001146 PyErr_SetString(PyExc_ValueError,
1147 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001148 return -1;
1149 }
1150 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001151}
Neal Norwitz6968b052007-02-27 19:02:19 +00001152
1153/* -------------------------------------------------------------------- */
1154/* Methods */
1155
Eric Smith0923d1d2009-04-16 20:16:10 +00001156#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001157
1158#include "stringlib/fastsearch.h"
1159#include "stringlib/count.h"
1160#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001161#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001162#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001163#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001164#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001165
Eric Smith0f78bff2009-11-30 01:01:42 +00001166#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001167
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001168PyObject *
1169PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001170{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001171 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001172 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001173 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001174 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001175 unsigned char quote, *s, *p;
1176
1177 /* Compute size of output string */
1178 squotes = dquotes = 0;
1179 newsize = 3; /* b'' */
1180 s = (unsigned char*)op->ob_sval;
1181 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001182 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001183 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001184 case '\'': squotes++; break;
1185 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001186 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001187 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001188 default:
1189 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001190 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001191 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001192 if (newsize > PY_SSIZE_T_MAX - incr)
1193 goto overflow;
1194 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001195 }
1196 quote = '\'';
1197 if (smartquotes && squotes && !dquotes)
1198 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001199 if (squotes && quote == '\'') {
1200 if (newsize > PY_SSIZE_T_MAX - squotes)
1201 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001202 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001203 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001204
1205 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001206 if (v == NULL) {
1207 return NULL;
1208 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001209 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001210
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001211 *p++ = 'b', *p++ = quote;
1212 for (i = 0; i < length; i++) {
1213 unsigned char c = op->ob_sval[i];
1214 if (c == quote || c == '\\')
1215 *p++ = '\\', *p++ = c;
1216 else if (c == '\t')
1217 *p++ = '\\', *p++ = 't';
1218 else if (c == '\n')
1219 *p++ = '\\', *p++ = 'n';
1220 else if (c == '\r')
1221 *p++ = '\\', *p++ = 'r';
1222 else if (c < ' ' || c >= 0x7f) {
1223 *p++ = '\\';
1224 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001225 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1226 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001227 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001228 else
1229 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001230 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001231 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001232 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001233 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001234
1235 overflow:
1236 PyErr_SetString(PyExc_OverflowError,
1237 "bytes object is too large to make repr");
1238 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001239}
1240
Neal Norwitz6968b052007-02-27 19:02:19 +00001241static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001242bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001243{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001244 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001245}
1246
Neal Norwitz6968b052007-02-27 19:02:19 +00001247static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001248bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001249{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 if (Py_BytesWarningFlag) {
1251 if (PyErr_WarnEx(PyExc_BytesWarning,
1252 "str() on a bytes instance", 1))
1253 return NULL;
1254 }
1255 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001256}
1257
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001258static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001259bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001260{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001261 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001262}
Neal Norwitz6968b052007-02-27 19:02:19 +00001263
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001264/* This is also used by PyBytes_Concat() */
1265static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001266bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001267{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001268 Py_ssize_t size;
1269 Py_buffer va, vb;
1270 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001271
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001272 va.len = -1;
1273 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001274 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1275 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001276 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1277 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1278 goto done;
1279 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001280
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001281 /* Optimize end cases */
1282 if (va.len == 0 && PyBytes_CheckExact(b)) {
1283 result = b;
1284 Py_INCREF(result);
1285 goto done;
1286 }
1287 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1288 result = a;
1289 Py_INCREF(result);
1290 goto done;
1291 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001292
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001293 size = va.len + vb.len;
1294 if (size < 0) {
1295 PyErr_NoMemory();
1296 goto done;
1297 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001298
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001299 result = PyBytes_FromStringAndSize(NULL, size);
1300 if (result != NULL) {
1301 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1302 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1303 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001304
1305 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001306 if (va.len != -1)
1307 PyBuffer_Release(&va);
1308 if (vb.len != -1)
1309 PyBuffer_Release(&vb);
1310 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001311}
Neal Norwitz6968b052007-02-27 19:02:19 +00001312
1313static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001314bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001315{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001316 Py_ssize_t i;
1317 Py_ssize_t j;
1318 Py_ssize_t size;
1319 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001320 size_t nbytes;
1321 if (n < 0)
1322 n = 0;
1323 /* watch out for overflows: the size can overflow int,
1324 * and the # of bytes needed can overflow size_t
1325 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001326 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001327 PyErr_SetString(PyExc_OverflowError,
1328 "repeated bytes are too long");
1329 return NULL;
1330 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001331 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001332 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1333 Py_INCREF(a);
1334 return (PyObject *)a;
1335 }
1336 nbytes = (size_t)size;
1337 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1338 PyErr_SetString(PyExc_OverflowError,
1339 "repeated bytes are too long");
1340 return NULL;
1341 }
1342 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1343 if (op == NULL)
1344 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001345 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001346 op->ob_shash = -1;
1347 op->ob_sval[size] = '\0';
1348 if (Py_SIZE(a) == 1 && n > 0) {
1349 memset(op->ob_sval, a->ob_sval[0] , n);
1350 return (PyObject *) op;
1351 }
1352 i = 0;
1353 if (i < size) {
1354 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1355 i = Py_SIZE(a);
1356 }
1357 while (i < size) {
1358 j = (i <= size-i) ? i : size-i;
1359 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1360 i += j;
1361 }
1362 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001363}
1364
Guido van Rossum98297ee2007-11-06 21:34:58 +00001365static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001366bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001367{
1368 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1369 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001370 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +00001371 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +00001372 PyErr_Clear();
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001373 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
Antoine Pitroud1188562010-06-09 16:38:55 +00001374 return -1;
1375 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
1376 varg.buf, varg.len, 0);
1377 PyBuffer_Release(&varg);
1378 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001379 }
1380 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001381 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1382 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001383 }
1384
Antoine Pitrou0010d372010-08-15 17:12:55 +00001385 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001386}
1387
Neal Norwitz6968b052007-02-27 19:02:19 +00001388static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001389bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001390{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001391 if (i < 0 || i >= Py_SIZE(a)) {
1392 PyErr_SetString(PyExc_IndexError, "index out of range");
1393 return NULL;
1394 }
1395 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001396}
1397
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001398Py_LOCAL(int)
1399bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1400{
1401 int cmp;
1402 Py_ssize_t len;
1403
1404 len = Py_SIZE(a);
1405 if (Py_SIZE(b) != len)
1406 return 0;
1407
1408 if (a->ob_sval[0] != b->ob_sval[0])
1409 return 0;
1410
1411 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1412 return (cmp == 0);
1413}
1414
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001415static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001416bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001417{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001418 int c;
1419 Py_ssize_t len_a, len_b;
1420 Py_ssize_t min_len;
1421 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001422
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001423 /* Make sure both arguments are strings. */
1424 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001425 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
1426 if (PyObject_IsInstance((PyObject*)a,
1427 (PyObject*)&PyUnicode_Type) ||
1428 PyObject_IsInstance((PyObject*)b,
1429 (PyObject*)&PyUnicode_Type)) {
1430 if (PyErr_WarnEx(PyExc_BytesWarning,
1431 "Comparison between bytes and string", 1))
1432 return NULL;
1433 }
1434 else if (PyObject_IsInstance((PyObject*)a,
1435 (PyObject*)&PyLong_Type) ||
1436 PyObject_IsInstance((PyObject*)b,
1437 (PyObject*)&PyLong_Type)) {
1438 if (PyErr_WarnEx(PyExc_BytesWarning,
1439 "Comparison between bytes and int", 1))
1440 return NULL;
1441 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 }
1443 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001444 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001445 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001446 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001447 case Py_EQ:
1448 case Py_LE:
1449 case Py_GE:
1450 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001451 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001452 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001453 case Py_NE:
1454 case Py_LT:
1455 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001456 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001457 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001458 default:
1459 PyErr_BadArgument();
1460 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001461 }
1462 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001463 else if (op == Py_EQ || op == Py_NE) {
1464 int eq = bytes_compare_eq(a, b);
1465 eq ^= (op == Py_NE);
1466 result = eq ? Py_True : Py_False;
1467 }
1468 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001469 len_a = Py_SIZE(a);
1470 len_b = Py_SIZE(b);
1471 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001472 if (min_len > 0) {
1473 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001474 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001475 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001476 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001477 else
1478 c = 0;
1479 if (c == 0)
1480 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1481 switch (op) {
1482 case Py_LT: c = c < 0; break;
1483 case Py_LE: c = c <= 0; break;
1484 case Py_GT: c = c > 0; break;
1485 case Py_GE: c = c >= 0; break;
1486 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001487 PyErr_BadArgument();
1488 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001489 }
1490 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001491 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001492
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001493 Py_INCREF(result);
1494 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001495}
1496
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001497static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001498bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001499{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001500 if (a->ob_shash == -1) {
1501 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001502 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001503 }
1504 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001505}
1506
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001507static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001508bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001509{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001510 if (PyIndex_Check(item)) {
1511 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1512 if (i == -1 && PyErr_Occurred())
1513 return NULL;
1514 if (i < 0)
1515 i += PyBytes_GET_SIZE(self);
1516 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1517 PyErr_SetString(PyExc_IndexError,
1518 "index out of range");
1519 return NULL;
1520 }
1521 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1522 }
1523 else if (PySlice_Check(item)) {
1524 Py_ssize_t start, stop, step, slicelength, cur, i;
1525 char* source_buf;
1526 char* result_buf;
1527 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001528
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001529 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001530 PyBytes_GET_SIZE(self),
1531 &start, &stop, &step, &slicelength) < 0) {
1532 return NULL;
1533 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001534
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001535 if (slicelength <= 0) {
1536 return PyBytes_FromStringAndSize("", 0);
1537 }
1538 else if (start == 0 && step == 1 &&
1539 slicelength == PyBytes_GET_SIZE(self) &&
1540 PyBytes_CheckExact(self)) {
1541 Py_INCREF(self);
1542 return (PyObject *)self;
1543 }
1544 else if (step == 1) {
1545 return PyBytes_FromStringAndSize(
1546 PyBytes_AS_STRING(self) + start,
1547 slicelength);
1548 }
1549 else {
1550 source_buf = PyBytes_AS_STRING(self);
1551 result = PyBytes_FromStringAndSize(NULL, slicelength);
1552 if (result == NULL)
1553 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001554
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001555 result_buf = PyBytes_AS_STRING(result);
1556 for (cur = start, i = 0; i < slicelength;
1557 cur += step, i++) {
1558 result_buf[i] = source_buf[cur];
1559 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001560
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001561 return result;
1562 }
1563 }
1564 else {
1565 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001566 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001567 Py_TYPE(item)->tp_name);
1568 return NULL;
1569 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001570}
1571
1572static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001573bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001574{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001575 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1576 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001577}
1578
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001579static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001580 (lenfunc)bytes_length, /*sq_length*/
1581 (binaryfunc)bytes_concat, /*sq_concat*/
1582 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1583 (ssizeargfunc)bytes_item, /*sq_item*/
1584 0, /*sq_slice*/
1585 0, /*sq_ass_item*/
1586 0, /*sq_ass_slice*/
1587 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001588};
1589
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001590static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001591 (lenfunc)bytes_length,
1592 (binaryfunc)bytes_subscript,
1593 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001594};
1595
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001596static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001597 (getbufferproc)bytes_buffer_getbuffer,
1598 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001599};
1600
1601
1602#define LEFTSTRIP 0
1603#define RIGHTSTRIP 1
1604#define BOTHSTRIP 2
1605
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001606/*[clinic input]
1607bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001608
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001609 sep: object = None
1610 The delimiter according which to split the bytes.
1611 None (the default value) means split on ASCII whitespace characters
1612 (space, tab, return, newline, formfeed, vertical tab).
1613 maxsplit: Py_ssize_t = -1
1614 Maximum number of splits to do.
1615 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001616
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001617Return a list of the sections in the bytes, using sep as the delimiter.
1618[clinic start generated code]*/
1619
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001620static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001621bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001622/*[clinic end generated code: output=8bde44dacb36ef2e input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001623{
1624 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001625 const char *s = PyBytes_AS_STRING(self), *sub;
1626 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001627 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001628
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001629 if (maxsplit < 0)
1630 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001631 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001632 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001633 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001634 return NULL;
1635 sub = vsub.buf;
1636 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001637
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001638 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1639 PyBuffer_Release(&vsub);
1640 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001641}
1642
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001643/*[clinic input]
1644bytes.partition
1645
1646 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001647 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001648 /
1649
1650Partition the bytes into three parts using the given separator.
1651
1652This will search for the separator sep in the bytes. If the separator is found,
1653returns a 3-tuple containing the part before the separator, the separator
1654itself, and the part after it.
1655
1656If the separator is not found, returns a 3-tuple containing the original bytes
1657object and two empty bytes objects.
1658[clinic start generated code]*/
1659
Neal Norwitz6968b052007-02-27 19:02:19 +00001660static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001661bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001662/*[clinic end generated code: output=f532b392a17ff695 input=bc855dc63ca949de]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001663{
Neal Norwitz6968b052007-02-27 19:02:19 +00001664 return stringlib_partition(
1665 (PyObject*) self,
1666 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001667 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001668 );
1669}
1670
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001671/*[clinic input]
1672bytes.rpartition
1673
1674 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001675 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001676 /
1677
1678Partition the bytes into three parts using the given separator.
1679
1680This will search for the separator sep in the bytes, starting and the end. If
1681the separator is found, returns a 3-tuple containing the part before the
1682separator, the separator itself, and the part after it.
1683
1684If the separator is not found, returns a 3-tuple containing two empty bytes
1685objects and the original bytes object.
1686[clinic start generated code]*/
1687
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001688static PyObject *
1689bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001690/*[clinic end generated code: output=191b114cbb028e50 input=6588fff262a9170e]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001691{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001692 return stringlib_rpartition(
1693 (PyObject*) self,
1694 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001695 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001696 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001697}
1698
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001699/*[clinic input]
1700bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001701
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001702Return a list of the sections in the bytes, using sep as the delimiter.
1703
1704Splitting is done starting at the end of the bytes and working to the front.
1705[clinic start generated code]*/
1706
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001707static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001708bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001709/*[clinic end generated code: output=0b6570b977911d88 input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001710{
1711 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001712 const char *s = PyBytes_AS_STRING(self), *sub;
1713 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001714 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001715
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001716 if (maxsplit < 0)
1717 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001718 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001719 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001720 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001721 return NULL;
1722 sub = vsub.buf;
1723 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001724
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001725 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1726 PyBuffer_Release(&vsub);
1727 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001728}
1729
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001730
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001731/*[clinic input]
1732bytes.join
1733
1734 iterable_of_bytes: object
1735 /
1736
1737Concatenate any number of bytes objects.
1738
1739The bytes whose method is called is inserted in between each pair.
1740
1741The result is returned as a new bytes object.
1742
1743Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1744[clinic start generated code]*/
1745
Neal Norwitz6968b052007-02-27 19:02:19 +00001746static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001747bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001748/*[clinic end generated code: output=634aff14764ff997 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001749{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001750 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001751}
1752
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001753PyObject *
1754_PyBytes_Join(PyObject *sep, PyObject *x)
1755{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001756 assert(sep != NULL && PyBytes_Check(sep));
1757 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001758 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001759}
1760
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001761/* helper macro to fixup start/end slice values */
1762#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001763 if (end > len) \
1764 end = len; \
1765 else if (end < 0) { \
1766 end += len; \
1767 if (end < 0) \
1768 end = 0; \
1769 } \
1770 if (start < 0) { \
1771 start += len; \
1772 if (start < 0) \
1773 start = 0; \
1774 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001775
1776Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001777bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001778{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001779 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001780 char byte;
1781 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001782 const char *sub;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001783 Py_ssize_t len, sub_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001784 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001785 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001786
Antoine Pitrouac65d962011-10-20 23:54:17 +02001787 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1788 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001789 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001790
Antoine Pitrouac65d962011-10-20 23:54:17 +02001791 if (subobj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001792 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001793 return -2;
1794
1795 sub = subbuf.buf;
1796 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001797 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001798 else {
1799 sub = &byte;
1800 sub_len = 1;
1801 }
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001802 len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001803
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001804 ADJUST_INDICES(start, end, len);
1805 if (end - start < sub_len)
1806 res = -1;
Victor Stinnerdabbfe72015-03-25 03:16:32 +01001807 /* Issue #23573: FIXME, windows has no memrchr() */
1808 else if (sub_len == 1 && dir > 0) {
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001809 unsigned char needle = *sub;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001810 res = stringlib_fastsearch_memchr_1char(
1811 PyBytes_AS_STRING(self) + start, end - start,
Christian Heimes4e259132015-04-18 05:54:02 +02001812 needle, needle, FAST_SEARCH);
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001813 if (res >= 0)
1814 res += start;
1815 }
1816 else {
1817 if (dir > 0)
1818 res = stringlib_find_slice(
1819 PyBytes_AS_STRING(self), len,
1820 sub, sub_len, start, end);
1821 else
1822 res = stringlib_rfind_slice(
1823 PyBytes_AS_STRING(self), len,
1824 sub, sub_len, start, end);
1825 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001826
1827 if (subobj)
1828 PyBuffer_Release(&subbuf);
1829
1830 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001831}
1832
1833
1834PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001835"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001836\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001837Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001838such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001839arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001840\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001841Return -1 on failure.");
1842
Neal Norwitz6968b052007-02-27 19:02:19 +00001843static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001844bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001845{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001846 Py_ssize_t result = bytes_find_internal(self, args, +1);
1847 if (result == -2)
1848 return NULL;
1849 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001850}
1851
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001852
1853PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001854"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001855\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001856Like B.find() but raise ValueError when the substring is not found.");
1857
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001858static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001859bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001860{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001861 Py_ssize_t result = bytes_find_internal(self, args, +1);
1862 if (result == -2)
1863 return NULL;
1864 if (result == -1) {
1865 PyErr_SetString(PyExc_ValueError,
1866 "substring not found");
1867 return NULL;
1868 }
1869 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001870}
1871
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001872
1873PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001874"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001875\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001876Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001877such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001878arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001879\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001880Return -1 on failure.");
1881
Neal Norwitz6968b052007-02-27 19:02:19 +00001882static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001883bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001884{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001885 Py_ssize_t result = bytes_find_internal(self, args, -1);
1886 if (result == -2)
1887 return NULL;
1888 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001889}
1890
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001891
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001892PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001893"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001894\n\
1895Like B.rfind() but raise ValueError when the substring is not found.");
1896
1897static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001898bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001899{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001900 Py_ssize_t result = bytes_find_internal(self, args, -1);
1901 if (result == -2)
1902 return NULL;
1903 if (result == -1) {
1904 PyErr_SetString(PyExc_ValueError,
1905 "substring not found");
1906 return NULL;
1907 }
1908 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001909}
1910
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001911
1912Py_LOCAL_INLINE(PyObject *)
1913do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001914{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001915 Py_buffer vsep;
1916 char *s = PyBytes_AS_STRING(self);
1917 Py_ssize_t len = PyBytes_GET_SIZE(self);
1918 char *sep;
1919 Py_ssize_t seplen;
1920 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001921
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001922 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001923 return NULL;
1924 sep = vsep.buf;
1925 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001926
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001927 i = 0;
1928 if (striptype != RIGHTSTRIP) {
1929 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1930 i++;
1931 }
1932 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001933
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001934 j = len;
1935 if (striptype != LEFTSTRIP) {
1936 do {
1937 j--;
1938 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1939 j++;
1940 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001941
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001942 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001943
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001944 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1945 Py_INCREF(self);
1946 return (PyObject*)self;
1947 }
1948 else
1949 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001950}
1951
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001952
1953Py_LOCAL_INLINE(PyObject *)
1954do_strip(PyBytesObject *self, int striptype)
1955{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001956 char *s = PyBytes_AS_STRING(self);
1957 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001958
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001959 i = 0;
1960 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00001961 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001962 i++;
1963 }
1964 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001965
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001966 j = len;
1967 if (striptype != LEFTSTRIP) {
1968 do {
1969 j--;
David Malcolm96960882010-11-05 17:23:41 +00001970 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001971 j++;
1972 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001973
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001974 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1975 Py_INCREF(self);
1976 return (PyObject*)self;
1977 }
1978 else
1979 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001980}
1981
1982
1983Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001984do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001985{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001986 if (bytes != NULL && bytes != Py_None) {
1987 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001988 }
1989 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001990}
1991
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001992/*[clinic input]
1993bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001994
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001995 self: self(type="PyBytesObject *")
1996 bytes: object = None
1997 /
1998
1999Strip leading and trailing bytes contained in the argument.
2000
2001If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2002[clinic start generated code]*/
2003
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002004static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002005bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002006/*[clinic end generated code: output=c7c228d3bd104a1b input=37daa5fad1395d95]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002007{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002008 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002009}
2010
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002011/*[clinic input]
2012bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002013
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002014 self: self(type="PyBytesObject *")
2015 bytes: object = None
2016 /
2017
2018Strip leading bytes contained in the argument.
2019
2020If the argument is omitted or None, strip leading ASCII whitespace.
2021[clinic start generated code]*/
2022
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002023static PyObject *
2024bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002025/*[clinic end generated code: output=28602e586f524e82 input=88811b09dfbc2988]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002026{
2027 return do_argstrip(self, LEFTSTRIP, bytes);
2028}
2029
2030/*[clinic input]
2031bytes.rstrip
2032
2033 self: self(type="PyBytesObject *")
2034 bytes: object = None
2035 /
2036
2037Strip trailing bytes contained in the argument.
2038
2039If the argument is omitted or None, strip trailing ASCII whitespace.
2040[clinic start generated code]*/
2041
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002042static PyObject *
2043bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002044/*[clinic end generated code: output=547e3815c95447da input=8f93c9cd361f0140]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002045{
2046 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002047}
Neal Norwitz6968b052007-02-27 19:02:19 +00002048
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002049
2050PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002051"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002052\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002053Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002054string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002055as in slice notation.");
2056
2057static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002058bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002059{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002060 PyObject *sub_obj;
2061 const char *str = PyBytes_AS_STRING(self), *sub;
2062 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02002063 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002064 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002065
Antoine Pitrouac65d962011-10-20 23:54:17 +02002066 Py_buffer vsub;
2067 PyObject *count_obj;
2068
2069 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
2070 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002071 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002072
Antoine Pitrouac65d962011-10-20 23:54:17 +02002073 if (sub_obj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002074 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02002075 return NULL;
2076
2077 sub = vsub.buf;
2078 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002079 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02002080 else {
2081 sub = &byte;
2082 sub_len = 1;
2083 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002085 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002086
Antoine Pitrouac65d962011-10-20 23:54:17 +02002087 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002088 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2089 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02002090
2091 if (sub_obj)
2092 PyBuffer_Release(&vsub);
2093
2094 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002095}
2096
2097
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002098/*[clinic input]
2099bytes.translate
2100
2101 self: self(type="PyBytesObject *")
Victor Stinner049e5092014-08-17 22:20:00 +02002102 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002103 Translation table, which must be a bytes object of length 256.
2104 [
2105 deletechars: object
2106 ]
2107 /
2108
2109Return a copy with each character mapped by the given translation table.
2110
2111All characters occurring in the optional argument deletechars are removed.
2112The remaining characters are mapped through the given translation table.
2113[clinic start generated code]*/
2114
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002115static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002116bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1,
2117 PyObject *deletechars)
2118/*[clinic end generated code: output=233df850eb50bf8d input=d8fa5519d7cc4be7]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002119{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002120 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002121 Py_buffer table_view = {NULL, NULL};
2122 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002123 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002124 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002125 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002126 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002127 Py_ssize_t inlen, tablen, dellen = 0;
2128 PyObject *result;
2129 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002130
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002131 if (PyBytes_Check(table)) {
2132 table_chars = PyBytes_AS_STRING(table);
2133 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002134 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002135 else if (table == Py_None) {
2136 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002137 tablen = 256;
2138 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002139 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002140 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002141 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002142 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002143 tablen = table_view.len;
2144 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002145
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002146 if (tablen != 256) {
2147 PyErr_SetString(PyExc_ValueError,
2148 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002149 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002150 return NULL;
2151 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002152
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002153 if (deletechars != NULL) {
2154 if (PyBytes_Check(deletechars)) {
2155 del_table_chars = PyBytes_AS_STRING(deletechars);
2156 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002157 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002158 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002159 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002160 PyBuffer_Release(&table_view);
2161 return NULL;
2162 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002163 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002164 dellen = del_table_view.len;
2165 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002166 }
2167 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002168 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002169 dellen = 0;
2170 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002171
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002172 inlen = PyBytes_GET_SIZE(input_obj);
2173 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002174 if (result == NULL) {
2175 PyBuffer_Release(&del_table_view);
2176 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002177 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002178 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002179 output_start = output = PyBytes_AsString(result);
2180 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002181
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002182 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002183 /* If no deletions are required, use faster code */
2184 for (i = inlen; --i >= 0; ) {
2185 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002186 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002187 changed = 1;
2188 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002189 if (!changed && PyBytes_CheckExact(input_obj)) {
2190 Py_INCREF(input_obj);
2191 Py_DECREF(result);
2192 result = input_obj;
2193 }
2194 PyBuffer_Release(&del_table_view);
2195 PyBuffer_Release(&table_view);
2196 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002197 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002198
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002199 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002200 for (i = 0; i < 256; i++)
2201 trans_table[i] = Py_CHARMASK(i);
2202 } else {
2203 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002204 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002205 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002206 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002207
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002208 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002209 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002210 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002211
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002212 for (i = inlen; --i >= 0; ) {
2213 c = Py_CHARMASK(*input++);
2214 if (trans_table[c] != -1)
2215 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2216 continue;
2217 changed = 1;
2218 }
2219 if (!changed && PyBytes_CheckExact(input_obj)) {
2220 Py_DECREF(result);
2221 Py_INCREF(input_obj);
2222 return input_obj;
2223 }
2224 /* Fix the size of the resulting string */
2225 if (inlen > 0)
2226 _PyBytes_Resize(&result, output - output_start);
2227 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002228}
2229
2230
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002231/*[clinic input]
2232
2233@staticmethod
2234bytes.maketrans
2235
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002236 frm: Py_buffer
2237 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002238 /
2239
2240Return a translation table useable for the bytes or bytearray translate method.
2241
2242The returned table will be one where each byte in frm is mapped to the byte at
2243the same position in to.
2244
2245The bytes objects frm and to must be of the same length.
2246[clinic start generated code]*/
2247
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002248static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002249bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002250/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002251{
2252 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002253}
2254
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002255/* find and count characters and substrings */
2256
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002257#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002258 ((char *)memchr((const void *)(target), c, target_len))
2259
2260/* String ops must return a string. */
2261/* If the object is subclass of string, create a copy */
2262Py_LOCAL(PyBytesObject *)
2263return_self(PyBytesObject *self)
2264{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002265 if (PyBytes_CheckExact(self)) {
2266 Py_INCREF(self);
2267 return self;
2268 }
2269 return (PyBytesObject *)PyBytes_FromStringAndSize(
2270 PyBytes_AS_STRING(self),
2271 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002272}
2273
2274Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00002275countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002276{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002277 Py_ssize_t count=0;
2278 const char *start=target;
2279 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002280
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002281 while ( (start=findchar(start, end-start, c)) != NULL ) {
2282 count++;
2283 if (count >= maxcount)
2284 break;
2285 start += 1;
2286 }
2287 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002288}
2289
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002290
2291/* Algorithms for different cases of string replacement */
2292
2293/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2294Py_LOCAL(PyBytesObject *)
2295replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002296 const char *to_s, Py_ssize_t to_len,
2297 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002298{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002299 char *self_s, *result_s;
2300 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002301 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002302 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002303
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002304 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002305
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002306 /* 1 at the end plus 1 after every character;
2307 count = min(maxcount, self_len + 1) */
2308 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002309 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002310 else
2311 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2312 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002313
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002314 /* Check for overflow */
2315 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002316 assert(count > 0);
2317 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002318 PyErr_SetString(PyExc_OverflowError,
2319 "replacement bytes are too long");
2320 return NULL;
2321 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002322 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002323
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002324 if (! (result = (PyBytesObject *)
2325 PyBytes_FromStringAndSize(NULL, result_len)) )
2326 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002327
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002328 self_s = PyBytes_AS_STRING(self);
2329 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002330
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002331 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002332
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002333 /* Lay the first one down (guaranteed this will occur) */
2334 Py_MEMCPY(result_s, to_s, to_len);
2335 result_s += to_len;
2336 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002337
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002338 for (i=0; i<count; i++) {
2339 *result_s++ = *self_s++;
2340 Py_MEMCPY(result_s, to_s, to_len);
2341 result_s += to_len;
2342 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002343
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002344 /* Copy the rest of the original string */
2345 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002346
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002347 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002348}
2349
2350/* Special case for deleting a single character */
2351/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2352Py_LOCAL(PyBytesObject *)
2353replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002354 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002355{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002356 char *self_s, *result_s;
2357 char *start, *next, *end;
2358 Py_ssize_t self_len, result_len;
2359 Py_ssize_t count;
2360 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002361
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002362 self_len = PyBytes_GET_SIZE(self);
2363 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002364
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002365 count = countchar(self_s, self_len, from_c, maxcount);
2366 if (count == 0) {
2367 return return_self(self);
2368 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002369
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002370 result_len = self_len - count; /* from_len == 1 */
2371 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002372
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002373 if ( (result = (PyBytesObject *)
2374 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2375 return NULL;
2376 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002377
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002378 start = self_s;
2379 end = self_s + self_len;
2380 while (count-- > 0) {
2381 next = findchar(start, end-start, from_c);
2382 if (next == NULL)
2383 break;
2384 Py_MEMCPY(result_s, start, next-start);
2385 result_s += (next-start);
2386 start = next+1;
2387 }
2388 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002389
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002390 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002391}
2392
2393/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2394
2395Py_LOCAL(PyBytesObject *)
2396replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002397 const char *from_s, Py_ssize_t from_len,
2398 Py_ssize_t maxcount) {
2399 char *self_s, *result_s;
2400 char *start, *next, *end;
2401 Py_ssize_t self_len, result_len;
2402 Py_ssize_t count, offset;
2403 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002404
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002405 self_len = PyBytes_GET_SIZE(self);
2406 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002407
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002408 count = stringlib_count(self_s, self_len,
2409 from_s, from_len,
2410 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002411
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002412 if (count == 0) {
2413 /* no matches */
2414 return return_self(self);
2415 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002416
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002417 result_len = self_len - (count * from_len);
2418 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002419
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002420 if ( (result = (PyBytesObject *)
2421 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2422 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002423
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002424 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002425
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002426 start = self_s;
2427 end = self_s + self_len;
2428 while (count-- > 0) {
2429 offset = stringlib_find(start, end-start,
2430 from_s, from_len,
2431 0);
2432 if (offset == -1)
2433 break;
2434 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002435
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002436 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002437
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002438 result_s += (next-start);
2439 start = next+from_len;
2440 }
2441 Py_MEMCPY(result_s, start, end-start);
2442 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002443}
2444
2445/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2446Py_LOCAL(PyBytesObject *)
2447replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002448 char from_c, char to_c,
2449 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002450{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002451 char *self_s, *result_s, *start, *end, *next;
2452 Py_ssize_t self_len;
2453 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002454
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002455 /* The result string will be the same size */
2456 self_s = PyBytes_AS_STRING(self);
2457 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002458
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002459 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002460
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002461 if (next == NULL) {
2462 /* No matches; return the original string */
2463 return return_self(self);
2464 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002465
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002466 /* Need to make a new string */
2467 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2468 if (result == NULL)
2469 return NULL;
2470 result_s = PyBytes_AS_STRING(result);
2471 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002472
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002473 /* change everything in-place, starting with this one */
2474 start = result_s + (next-self_s);
2475 *start = to_c;
2476 start++;
2477 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002478
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002479 while (--maxcount > 0) {
2480 next = findchar(start, end-start, from_c);
2481 if (next == NULL)
2482 break;
2483 *next = to_c;
2484 start = next+1;
2485 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002486
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002487 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002488}
2489
2490/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2491Py_LOCAL(PyBytesObject *)
2492replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002493 const char *from_s, Py_ssize_t from_len,
2494 const char *to_s, Py_ssize_t to_len,
2495 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002496{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002497 char *result_s, *start, *end;
2498 char *self_s;
2499 Py_ssize_t self_len, offset;
2500 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002501
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002502 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002503
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002504 self_s = PyBytes_AS_STRING(self);
2505 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002506
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002507 offset = stringlib_find(self_s, self_len,
2508 from_s, from_len,
2509 0);
2510 if (offset == -1) {
2511 /* No matches; return the original string */
2512 return return_self(self);
2513 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002514
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002515 /* Need to make a new string */
2516 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2517 if (result == NULL)
2518 return NULL;
2519 result_s = PyBytes_AS_STRING(result);
2520 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002521
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002522 /* change everything in-place, starting with this one */
2523 start = result_s + offset;
2524 Py_MEMCPY(start, to_s, from_len);
2525 start += from_len;
2526 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002527
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002528 while ( --maxcount > 0) {
2529 offset = stringlib_find(start, end-start,
2530 from_s, from_len,
2531 0);
2532 if (offset==-1)
2533 break;
2534 Py_MEMCPY(start+offset, to_s, from_len);
2535 start += offset+from_len;
2536 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002537
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002538 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002539}
2540
2541/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2542Py_LOCAL(PyBytesObject *)
2543replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002544 char from_c,
2545 const char *to_s, Py_ssize_t to_len,
2546 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002547{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002548 char *self_s, *result_s;
2549 char *start, *next, *end;
2550 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002551 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002552 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002553
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002554 self_s = PyBytes_AS_STRING(self);
2555 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002556
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002557 count = countchar(self_s, self_len, from_c, maxcount);
2558 if (count == 0) {
2559 /* no matches, return unchanged */
2560 return return_self(self);
2561 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002562
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002563 /* use the difference between current and new, hence the "-1" */
2564 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002565 assert(count > 0);
2566 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002567 PyErr_SetString(PyExc_OverflowError,
2568 "replacement bytes are too long");
2569 return NULL;
2570 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002571 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002572
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002573 if ( (result = (PyBytesObject *)
2574 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2575 return NULL;
2576 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002577
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002578 start = self_s;
2579 end = self_s + self_len;
2580 while (count-- > 0) {
2581 next = findchar(start, end-start, from_c);
2582 if (next == NULL)
2583 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002584
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002585 if (next == start) {
2586 /* replace with the 'to' */
2587 Py_MEMCPY(result_s, to_s, to_len);
2588 result_s += to_len;
2589 start += 1;
2590 } else {
2591 /* copy the unchanged old then the 'to' */
2592 Py_MEMCPY(result_s, start, next-start);
2593 result_s += (next-start);
2594 Py_MEMCPY(result_s, to_s, to_len);
2595 result_s += to_len;
2596 start = next+1;
2597 }
2598 }
2599 /* Copy the remainder of the remaining string */
2600 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002601
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002602 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002603}
2604
2605/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2606Py_LOCAL(PyBytesObject *)
2607replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002608 const char *from_s, Py_ssize_t from_len,
2609 const char *to_s, Py_ssize_t to_len,
2610 Py_ssize_t maxcount) {
2611 char *self_s, *result_s;
2612 char *start, *next, *end;
2613 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002614 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002615 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002616
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002617 self_s = PyBytes_AS_STRING(self);
2618 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002619
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002620 count = stringlib_count(self_s, self_len,
2621 from_s, from_len,
2622 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002623
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002624 if (count == 0) {
2625 /* no matches, return unchanged */
2626 return return_self(self);
2627 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002628
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002629 /* Check for overflow */
2630 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002631 assert(count > 0);
2632 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002633 PyErr_SetString(PyExc_OverflowError,
2634 "replacement bytes are too long");
2635 return NULL;
2636 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002637 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002638
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002639 if ( (result = (PyBytesObject *)
2640 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2641 return NULL;
2642 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002643
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002644 start = self_s;
2645 end = self_s + self_len;
2646 while (count-- > 0) {
2647 offset = stringlib_find(start, end-start,
2648 from_s, from_len,
2649 0);
2650 if (offset == -1)
2651 break;
2652 next = start+offset;
2653 if (next == start) {
2654 /* replace with the 'to' */
2655 Py_MEMCPY(result_s, to_s, to_len);
2656 result_s += to_len;
2657 start += from_len;
2658 } else {
2659 /* copy the unchanged old then the 'to' */
2660 Py_MEMCPY(result_s, start, next-start);
2661 result_s += (next-start);
2662 Py_MEMCPY(result_s, to_s, to_len);
2663 result_s += to_len;
2664 start = next+from_len;
2665 }
2666 }
2667 /* Copy the remainder of the remaining string */
2668 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002669
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002670 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002671}
2672
2673
2674Py_LOCAL(PyBytesObject *)
2675replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002676 const char *from_s, Py_ssize_t from_len,
2677 const char *to_s, Py_ssize_t to_len,
2678 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002679{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002680 if (maxcount < 0) {
2681 maxcount = PY_SSIZE_T_MAX;
2682 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2683 /* nothing to do; return the original string */
2684 return return_self(self);
2685 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002686
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002687 if (maxcount == 0 ||
2688 (from_len == 0 && to_len == 0)) {
2689 /* nothing to do; return the original string */
2690 return return_self(self);
2691 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002692
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002693 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002694
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002695 if (from_len == 0) {
2696 /* insert the 'to' string everywhere. */
2697 /* >>> "Python".replace("", ".") */
2698 /* '.P.y.t.h.o.n.' */
2699 return replace_interleave(self, to_s, to_len, maxcount);
2700 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002701
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002702 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2703 /* point for an empty self string to generate a non-empty string */
2704 /* Special case so the remaining code always gets a non-empty string */
2705 if (PyBytes_GET_SIZE(self) == 0) {
2706 return return_self(self);
2707 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002708
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002709 if (to_len == 0) {
2710 /* delete all occurrences of 'from' string */
2711 if (from_len == 1) {
2712 return replace_delete_single_character(
2713 self, from_s[0], maxcount);
2714 } else {
2715 return replace_delete_substring(self, from_s,
2716 from_len, maxcount);
2717 }
2718 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002719
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002720 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002721
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002722 if (from_len == to_len) {
2723 if (from_len == 1) {
2724 return replace_single_character_in_place(
2725 self,
2726 from_s[0],
2727 to_s[0],
2728 maxcount);
2729 } else {
2730 return replace_substring_in_place(
2731 self, from_s, from_len, to_s, to_len,
2732 maxcount);
2733 }
2734 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002735
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002736 /* Otherwise use the more generic algorithms */
2737 if (from_len == 1) {
2738 return replace_single_character(self, from_s[0],
2739 to_s, to_len, maxcount);
2740 } else {
2741 /* len('from')>=2, len('to')>=1 */
2742 return replace_substring(self, from_s, from_len, to_s, to_len,
2743 maxcount);
2744 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002745}
2746
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002747
2748/*[clinic input]
2749bytes.replace
2750
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002751 old: Py_buffer
2752 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002753 count: Py_ssize_t = -1
2754 Maximum number of occurrences to replace.
2755 -1 (the default value) means replace all occurrences.
2756 /
2757
2758Return a copy with all occurrences of substring old replaced by new.
2759
2760If the optional argument count is given, only the first count occurrences are
2761replaced.
2762[clinic start generated code]*/
2763
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002764static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002765bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new,
2766 Py_ssize_t count)
2767/*[clinic end generated code: output=403dc9d7a83c5a1d input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002768{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002769 return (PyObject *)replace((PyBytesObject *) self,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002770 (const char *)old->buf, old->len,
2771 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002772}
2773
2774/** End DALKE **/
2775
2776/* Matches the end (direction >= 0) or start (direction < 0) of self
2777 * against substr, using the start and end arguments. Returns
2778 * -1 on error, 0 if not found and 1 if found.
2779 */
2780Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002781_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002782 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002783{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002784 Py_ssize_t len = PyBytes_GET_SIZE(self);
2785 Py_ssize_t slen;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002786 Py_buffer sub_view = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002787 const char* sub;
2788 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002789
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002790 if (PyBytes_Check(substr)) {
2791 sub = PyBytes_AS_STRING(substr);
2792 slen = PyBytes_GET_SIZE(substr);
2793 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002794 else {
2795 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
2796 return -1;
2797 sub = sub_view.buf;
2798 slen = sub_view.len;
2799 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002800 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002801
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002802 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002803
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002804 if (direction < 0) {
2805 /* startswith */
2806 if (start+slen > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002807 goto notfound;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002808 } else {
2809 /* endswith */
2810 if (end-start < slen || start > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002811 goto notfound;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002812
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002813 if (end-slen > start)
2814 start = end - slen;
2815 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002816 if (end-start < slen)
2817 goto notfound;
2818 if (memcmp(str+start, sub, slen) != 0)
2819 goto notfound;
2820
2821 PyBuffer_Release(&sub_view);
2822 return 1;
2823
2824notfound:
2825 PyBuffer_Release(&sub_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002826 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002827}
2828
2829
2830PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002831"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002832\n\
2833Return True if B starts with the specified prefix, False otherwise.\n\
2834With optional start, test B beginning at that position.\n\
2835With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002836prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002837
2838static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002839bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002840{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002841 Py_ssize_t start = 0;
2842 Py_ssize_t end = PY_SSIZE_T_MAX;
2843 PyObject *subobj;
2844 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002845
Jesus Ceaac451502011-04-20 17:09:23 +02002846 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002847 return NULL;
2848 if (PyTuple_Check(subobj)) {
2849 Py_ssize_t i;
2850 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2851 result = _bytes_tailmatch(self,
2852 PyTuple_GET_ITEM(subobj, i),
2853 start, end, -1);
2854 if (result == -1)
2855 return NULL;
2856 else if (result) {
2857 Py_RETURN_TRUE;
2858 }
2859 }
2860 Py_RETURN_FALSE;
2861 }
2862 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002863 if (result == -1) {
2864 if (PyErr_ExceptionMatches(PyExc_TypeError))
2865 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2866 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002867 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002868 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002869 else
2870 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002871}
2872
2873
2874PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002875"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002876\n\
2877Return True if B ends with the specified suffix, False otherwise.\n\
2878With optional start, test B beginning at that position.\n\
2879With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002880suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002881
2882static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002883bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002884{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002885 Py_ssize_t start = 0;
2886 Py_ssize_t end = PY_SSIZE_T_MAX;
2887 PyObject *subobj;
2888 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002889
Jesus Ceaac451502011-04-20 17:09:23 +02002890 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002891 return NULL;
2892 if (PyTuple_Check(subobj)) {
2893 Py_ssize_t i;
2894 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2895 result = _bytes_tailmatch(self,
2896 PyTuple_GET_ITEM(subobj, i),
2897 start, end, +1);
2898 if (result == -1)
2899 return NULL;
2900 else if (result) {
2901 Py_RETURN_TRUE;
2902 }
2903 }
2904 Py_RETURN_FALSE;
2905 }
2906 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002907 if (result == -1) {
2908 if (PyErr_ExceptionMatches(PyExc_TypeError))
2909 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2910 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002911 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002912 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002913 else
2914 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002915}
2916
2917
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002918/*[clinic input]
2919bytes.decode
2920
2921 encoding: str(c_default="NULL") = 'utf-8'
2922 The encoding with which to decode the bytes.
2923 errors: str(c_default="NULL") = 'strict'
2924 The error handling scheme to use for the handling of decoding errors.
2925 The default is 'strict' meaning that decoding errors raise a
2926 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2927 as well as any other name registered with codecs.register_error that
2928 can handle UnicodeDecodeErrors.
2929
2930Decode the bytes using the codec registered for encoding.
2931[clinic start generated code]*/
2932
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002933static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002934bytes_decode_impl(PyBytesObject*self, const char *encoding,
2935 const char *errors)
2936/*[clinic end generated code: output=2d2016ff8e0bb176 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002937{
Martin v. Löwis0efea322014-07-27 17:29:17 +02002938 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00002939}
2940
Guido van Rossum20188312006-05-05 15:15:40 +00002941
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002942/*[clinic input]
2943bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002944
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002945 keepends: int(py_default="False") = 0
2946
2947Return a list of the lines in the bytes, breaking at line boundaries.
2948
2949Line breaks are not included in the resulting list unless keepends is given and
2950true.
2951[clinic start generated code]*/
2952
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002953static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002954bytes_splitlines_impl(PyBytesObject*self, int keepends)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002955/*[clinic end generated code: output=995c3598f7833cad input=ddb93e3351080c8c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002956{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002957 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00002958 (PyObject*) self, PyBytes_AS_STRING(self),
2959 PyBytes_GET_SIZE(self), keepends
2960 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002961}
2962
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002963static int
Victor Stinner6430fd52011-09-29 04:02:13 +02002964hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002965{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002966 if (c >= 128)
2967 return -1;
David Malcolm96960882010-11-05 17:23:41 +00002968 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002969 return c - '0';
2970 else {
David Malcolm96960882010-11-05 17:23:41 +00002971 if (Py_ISUPPER(c))
2972 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002973 if (c >= 'a' && c <= 'f')
2974 return c - 'a' + 10;
2975 }
2976 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002977}
2978
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002979/*[clinic input]
2980@classmethod
2981bytes.fromhex
2982
2983 string: unicode
2984 /
2985
2986Create a bytes object from a string of hexadecimal numbers.
2987
2988Spaces between two numbers are accepted.
2989Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2990[clinic start generated code]*/
2991
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002992static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002993bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002994/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002995{
2996 PyObject *newstring;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002997 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002998 Py_ssize_t hexlen, byteslen, i, j;
2999 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003000 void *data;
3001 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003002
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003003 assert(PyUnicode_Check(string));
3004 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003005 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003006 kind = PyUnicode_KIND(string);
3007 data = PyUnicode_DATA(string);
3008 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003009
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003010 byteslen = hexlen/2; /* This overestimates if there are spaces */
3011 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
3012 if (!newstring)
3013 return NULL;
3014 buf = PyBytes_AS_STRING(newstring);
3015 for (i = j = 0; i < hexlen; i += 2) {
3016 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003017 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003018 i++;
3019 if (i >= hexlen)
3020 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003021 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
3022 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003023 if (top == -1 || bot == -1) {
3024 PyErr_Format(PyExc_ValueError,
3025 "non-hexadecimal number found in "
3026 "fromhex() arg at position %zd", i);
3027 goto error;
3028 }
3029 buf[j++] = (top << 4) + bot;
3030 }
3031 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
3032 goto error;
3033 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003034
3035 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003036 Py_XDECREF(newstring);
3037 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003038}
3039
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003040PyDoc_STRVAR(hex__doc__,
3041"B.hex() -> string\n\
3042\n\
3043Create a string of hexadecimal numbers from a bytes object.\n\
3044Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
3045
3046static PyObject *
3047bytes_hex(PyBytesObject *self)
3048{
3049 char* argbuf = PyBytes_AS_STRING(self);
3050 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
3051 return _Py_strhex(argbuf, arglen);
3052}
3053
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003054static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003055bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003056{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003057 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003058}
3059
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003060
3061static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003062bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003063 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
3064 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3065 _Py_capitalize__doc__},
3066 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3067 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003068 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003069 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
3070 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02003071 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003072 expandtabs__doc__},
3073 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003074 BYTES_FROMHEX_METHODDEF
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003075 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003076 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3077 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3078 _Py_isalnum__doc__},
3079 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3080 _Py_isalpha__doc__},
3081 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3082 _Py_isdigit__doc__},
3083 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3084 _Py_islower__doc__},
3085 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3086 _Py_isspace__doc__},
3087 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3088 _Py_istitle__doc__},
3089 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3090 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003091 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003092 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3093 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003094 BYTES_LSTRIP_METHODDEF
3095 BYTES_MAKETRANS_METHODDEF
3096 BYTES_PARTITION_METHODDEF
3097 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003098 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3099 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3100 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003101 BYTES_RPARTITION_METHODDEF
3102 BYTES_RSPLIT_METHODDEF
3103 BYTES_RSTRIP_METHODDEF
3104 BYTES_SPLIT_METHODDEF
3105 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003106 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
3107 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003108 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003109 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3110 _Py_swapcase__doc__},
3111 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003112 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003113 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3114 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003115 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003116};
3117
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003118static PyObject *
Ethan Furmanb95b5612015-01-23 20:05:18 -08003119bytes_mod(PyObject *v, PyObject *w)
3120{
3121 if (!PyBytes_Check(v))
3122 Py_RETURN_NOTIMPLEMENTED;
3123 return _PyBytes_Format(v, w);
3124}
3125
3126static PyNumberMethods bytes_as_number = {
3127 0, /*nb_add*/
3128 0, /*nb_subtract*/
3129 0, /*nb_multiply*/
3130 bytes_mod, /*nb_remainder*/
3131};
3132
3133static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003134str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3135
3136static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003137bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003138{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003139 PyObject *x = NULL;
3140 const char *encoding = NULL;
3141 const char *errors = NULL;
3142 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003143 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003144 Py_ssize_t size;
3145 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003146 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003147
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003148 if (type != &PyBytes_Type)
3149 return str_subtype_new(type, args, kwds);
3150 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3151 &encoding, &errors))
3152 return NULL;
3153 if (x == NULL) {
3154 if (encoding != NULL || errors != NULL) {
3155 PyErr_SetString(PyExc_TypeError,
3156 "encoding or errors without sequence "
3157 "argument");
3158 return NULL;
3159 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003160 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003161 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003162
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003163 if (PyUnicode_Check(x)) {
3164 /* Encode via the codec registry */
3165 if (encoding == NULL) {
3166 PyErr_SetString(PyExc_TypeError,
3167 "string argument without an encoding");
3168 return NULL;
3169 }
3170 new = PyUnicode_AsEncodedString(x, encoding, errors);
3171 if (new == NULL)
3172 return NULL;
3173 assert(PyBytes_Check(new));
3174 return new;
3175 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003176
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003177 /* If it's not unicode, there can't be encoding or errors */
3178 if (encoding != NULL || errors != NULL) {
3179 PyErr_SetString(PyExc_TypeError,
3180 "encoding or errors without a string argument");
3181 return NULL;
3182 }
3183
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003184 /* We'd like to call PyObject_Bytes here, but we need to check for an
3185 integer argument before deferring to PyBytes_FromObject, something
3186 PyObject_Bytes doesn't do. */
3187 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3188 if (func != NULL) {
3189 new = PyObject_CallFunctionObjArgs(func, NULL);
3190 Py_DECREF(func);
3191 if (new == NULL)
3192 return NULL;
3193 if (!PyBytes_Check(new)) {
3194 PyErr_Format(PyExc_TypeError,
3195 "__bytes__ returned non-bytes (type %.200s)",
3196 Py_TYPE(new)->tp_name);
3197 Py_DECREF(new);
3198 return NULL;
3199 }
3200 return new;
3201 }
3202 else if (PyErr_Occurred())
3203 return NULL;
3204
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003205 /* Is it an integer? */
3206 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3207 if (size == -1 && PyErr_Occurred()) {
3208 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3209 return NULL;
3210 PyErr_Clear();
3211 }
3212 else if (size < 0) {
3213 PyErr_SetString(PyExc_ValueError, "negative count");
3214 return NULL;
3215 }
3216 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003217 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003218 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003219 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003220 return new;
3221 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003222
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003223 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003224}
3225
3226PyObject *
3227PyBytes_FromObject(PyObject *x)
3228{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003229 PyObject *new, *it;
3230 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003231
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003232 if (x == NULL) {
3233 PyErr_BadInternalCall();
3234 return NULL;
3235 }
Larry Hastingsca28e992012-05-24 22:58:30 -07003236
3237 if (PyBytes_CheckExact(x)) {
3238 Py_INCREF(x);
3239 return x;
3240 }
3241
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003242 /* Use the modern buffer interface */
3243 if (PyObject_CheckBuffer(x)) {
3244 Py_buffer view;
3245 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3246 return NULL;
3247 new = PyBytes_FromStringAndSize(NULL, view.len);
3248 if (!new)
3249 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003250 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3251 &view, view.len, 'C') < 0)
3252 goto fail;
3253 PyBuffer_Release(&view);
3254 return new;
3255 fail:
3256 Py_XDECREF(new);
3257 PyBuffer_Release(&view);
3258 return NULL;
3259 }
3260 if (PyUnicode_Check(x)) {
3261 PyErr_SetString(PyExc_TypeError,
3262 "cannot convert unicode object to bytes");
3263 return NULL;
3264 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003265
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003266 if (PyList_CheckExact(x)) {
3267 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3268 if (new == NULL)
3269 return NULL;
3270 for (i = 0; i < Py_SIZE(x); i++) {
3271 Py_ssize_t value = PyNumber_AsSsize_t(
3272 PyList_GET_ITEM(x, i), PyExc_ValueError);
3273 if (value == -1 && PyErr_Occurred()) {
3274 Py_DECREF(new);
3275 return NULL;
3276 }
3277 if (value < 0 || value >= 256) {
3278 PyErr_SetString(PyExc_ValueError,
3279 "bytes must be in range(0, 256)");
3280 Py_DECREF(new);
3281 return NULL;
3282 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003283 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003284 }
3285 return new;
3286 }
3287 if (PyTuple_CheckExact(x)) {
3288 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3289 if (new == NULL)
3290 return NULL;
3291 for (i = 0; i < Py_SIZE(x); i++) {
3292 Py_ssize_t value = PyNumber_AsSsize_t(
3293 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
3294 if (value == -1 && PyErr_Occurred()) {
3295 Py_DECREF(new);
3296 return NULL;
3297 }
3298 if (value < 0 || value >= 256) {
3299 PyErr_SetString(PyExc_ValueError,
3300 "bytes must be in range(0, 256)");
3301 Py_DECREF(new);
3302 return NULL;
3303 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003304 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003305 }
3306 return new;
3307 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00003308
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003309 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003310 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003311 if (size == -1 && PyErr_Occurred())
3312 return NULL;
3313 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
3314 returning a shared empty bytes string. This required because we
3315 want to call _PyBytes_Resize() the returned object, which we can
3316 only do on bytes objects with refcount == 1. */
Victor Stinner88d146b2014-08-17 21:12:18 +02003317 if (size == 0)
3318 size = 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003319 new = PyBytes_FromStringAndSize(NULL, size);
3320 if (new == NULL)
3321 return NULL;
Victor Stinner88d146b2014-08-17 21:12:18 +02003322 assert(Py_REFCNT(new) == 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003323
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003324 /* Get the iterator */
3325 it = PyObject_GetIter(x);
3326 if (it == NULL)
3327 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003328
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003329 /* Run the iterator to exhaustion */
3330 for (i = 0; ; i++) {
3331 PyObject *item;
3332 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003333
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003334 /* Get the next item */
3335 item = PyIter_Next(it);
3336 if (item == NULL) {
3337 if (PyErr_Occurred())
3338 goto error;
3339 break;
3340 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003341
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003342 /* Interpret it as an int (__index__) */
3343 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3344 Py_DECREF(item);
3345 if (value == -1 && PyErr_Occurred())
3346 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003347
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003348 /* Range check */
3349 if (value < 0 || value >= 256) {
3350 PyErr_SetString(PyExc_ValueError,
3351 "bytes must be in range(0, 256)");
3352 goto error;
3353 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003354
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003355 /* Append the byte */
3356 if (i >= size) {
3357 size = 2 * size + 1;
3358 if (_PyBytes_Resize(&new, size) < 0)
3359 goto error;
3360 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003361 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003362 }
3363 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003364
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003365 /* Clean up and return success */
3366 Py_DECREF(it);
3367 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003368
3369 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003370 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01003371 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003372 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003373}
3374
3375static PyObject *
3376str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3377{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003378 PyObject *tmp, *pnew;
3379 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003380
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003381 assert(PyType_IsSubtype(type, &PyBytes_Type));
3382 tmp = bytes_new(&PyBytes_Type, args, kwds);
3383 if (tmp == NULL)
3384 return NULL;
3385 assert(PyBytes_CheckExact(tmp));
3386 n = PyBytes_GET_SIZE(tmp);
3387 pnew = type->tp_alloc(type, n);
3388 if (pnew != NULL) {
3389 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3390 PyBytes_AS_STRING(tmp), n+1);
3391 ((PyBytesObject *)pnew)->ob_shash =
3392 ((PyBytesObject *)tmp)->ob_shash;
3393 }
3394 Py_DECREF(tmp);
3395 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003396}
3397
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003398PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003399"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003400bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003401bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003402bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3403bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003404\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003405Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003406 - an iterable yielding integers in range(256)\n\
3407 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003408 - any object implementing the buffer API.\n\
3409 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003410
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003411static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003412
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003413PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003414 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3415 "bytes",
3416 PyBytesObject_SIZE,
3417 sizeof(char),
3418 bytes_dealloc, /* tp_dealloc */
3419 0, /* tp_print */
3420 0, /* tp_getattr */
3421 0, /* tp_setattr */
3422 0, /* tp_reserved */
3423 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08003424 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003425 &bytes_as_sequence, /* tp_as_sequence */
3426 &bytes_as_mapping, /* tp_as_mapping */
3427 (hashfunc)bytes_hash, /* tp_hash */
3428 0, /* tp_call */
3429 bytes_str, /* tp_str */
3430 PyObject_GenericGetAttr, /* tp_getattro */
3431 0, /* tp_setattro */
3432 &bytes_as_buffer, /* tp_as_buffer */
3433 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3434 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3435 bytes_doc, /* tp_doc */
3436 0, /* tp_traverse */
3437 0, /* tp_clear */
3438 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3439 0, /* tp_weaklistoffset */
3440 bytes_iter, /* tp_iter */
3441 0, /* tp_iternext */
3442 bytes_methods, /* tp_methods */
3443 0, /* tp_members */
3444 0, /* tp_getset */
3445 &PyBaseObject_Type, /* tp_base */
3446 0, /* tp_dict */
3447 0, /* tp_descr_get */
3448 0, /* tp_descr_set */
3449 0, /* tp_dictoffset */
3450 0, /* tp_init */
3451 0, /* tp_alloc */
3452 bytes_new, /* tp_new */
3453 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003454};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003455
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003456void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003457PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003458{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003459 assert(pv != NULL);
3460 if (*pv == NULL)
3461 return;
3462 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003463 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003464 return;
3465 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003466
3467 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3468 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003469 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003470 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02003471
Antoine Pitrou161d6952014-05-01 14:36:20 +02003472 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003473 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02003474 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3475 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3476 Py_CLEAR(*pv);
3477 return;
3478 }
3479
3480 oldsize = PyBytes_GET_SIZE(*pv);
3481 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3482 PyErr_NoMemory();
3483 goto error;
3484 }
3485 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3486 goto error;
3487
3488 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3489 PyBuffer_Release(&wb);
3490 return;
3491
3492 error:
3493 PyBuffer_Release(&wb);
3494 Py_CLEAR(*pv);
3495 return;
3496 }
3497
3498 else {
3499 /* Multiple references, need to create new object */
3500 PyObject *v;
3501 v = bytes_concat(*pv, w);
3502 Py_DECREF(*pv);
3503 *pv = v;
3504 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003505}
3506
3507void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003508PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003509{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003510 PyBytes_Concat(pv, w);
3511 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003512}
3513
3514
Ethan Furmanb95b5612015-01-23 20:05:18 -08003515/* The following function breaks the notion that bytes are immutable:
3516 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003517 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08003518 as creating a new bytes object and destroying the old one, only
3519 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003520 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08003521 Note that if there's not enough memory to resize the bytes object, the
3522 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003523 memory" exception is set, and -1 is returned. Else (on success) 0 is
3524 returned, and the value in *pv may or may not be the same as on input.
3525 As always, an extra byte is allocated for a trailing \0 byte (newsize
3526 does *not* include that), and a trailing \0 byte is stored.
3527*/
3528
3529int
3530_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3531{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003532 PyObject *v;
3533 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003534 v = *pv;
3535 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3536 *pv = 0;
3537 Py_DECREF(v);
3538 PyErr_BadInternalCall();
3539 return -1;
3540 }
3541 /* XXX UNREF/NEWREF interface should be more symmetrical */
3542 _Py_DEC_REFTOTAL;
3543 _Py_ForgetReference(v);
3544 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003545 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003546 if (*pv == NULL) {
3547 PyObject_Del(v);
3548 PyErr_NoMemory();
3549 return -1;
3550 }
3551 _Py_NewReference(*pv);
3552 sv = (PyBytesObject *) *pv;
3553 Py_SIZE(sv) = newsize;
3554 sv->ob_sval[newsize] = '\0';
3555 sv->ob_shash = -1; /* invalidate cached hash value */
3556 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003557}
3558
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003559void
3560PyBytes_Fini(void)
3561{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003562 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003563 for (i = 0; i < UCHAR_MAX + 1; i++)
3564 Py_CLEAR(characters[i]);
3565 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003566}
3567
Benjamin Peterson4116f362008-05-27 00:36:20 +00003568/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003569
3570typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003571 PyObject_HEAD
3572 Py_ssize_t it_index;
3573 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003574} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003575
3576static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003577striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003578{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003579 _PyObject_GC_UNTRACK(it);
3580 Py_XDECREF(it->it_seq);
3581 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003582}
3583
3584static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003585striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003586{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003587 Py_VISIT(it->it_seq);
3588 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003589}
3590
3591static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003592striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003593{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003594 PyBytesObject *seq;
3595 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003596
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003597 assert(it != NULL);
3598 seq = it->it_seq;
3599 if (seq == NULL)
3600 return NULL;
3601 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003602
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003603 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3604 item = PyLong_FromLong(
3605 (unsigned char)seq->ob_sval[it->it_index]);
3606 if (item != NULL)
3607 ++it->it_index;
3608 return item;
3609 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003610
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003611 Py_DECREF(seq);
3612 it->it_seq = NULL;
3613 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003614}
3615
3616static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003617striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003618{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003619 Py_ssize_t len = 0;
3620 if (it->it_seq)
3621 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3622 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003623}
3624
3625PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003626 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003627
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003628static PyObject *
3629striter_reduce(striterobject *it)
3630{
3631 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003632 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003633 it->it_seq, it->it_index);
3634 } else {
3635 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3636 if (u == NULL)
3637 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02003638 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003639 }
3640}
3641
3642PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3643
3644static PyObject *
3645striter_setstate(striterobject *it, PyObject *state)
3646{
3647 Py_ssize_t index = PyLong_AsSsize_t(state);
3648 if (index == -1 && PyErr_Occurred())
3649 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003650 if (it->it_seq != NULL) {
3651 if (index < 0)
3652 index = 0;
3653 else if (index > PyBytes_GET_SIZE(it->it_seq))
3654 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3655 it->it_index = index;
3656 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003657 Py_RETURN_NONE;
3658}
3659
3660PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3661
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003662static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003663 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3664 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003665 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3666 reduce_doc},
3667 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3668 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003669 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003670};
3671
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003672PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003673 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3674 "bytes_iterator", /* tp_name */
3675 sizeof(striterobject), /* tp_basicsize */
3676 0, /* tp_itemsize */
3677 /* methods */
3678 (destructor)striter_dealloc, /* tp_dealloc */
3679 0, /* tp_print */
3680 0, /* tp_getattr */
3681 0, /* tp_setattr */
3682 0, /* tp_reserved */
3683 0, /* tp_repr */
3684 0, /* tp_as_number */
3685 0, /* tp_as_sequence */
3686 0, /* tp_as_mapping */
3687 0, /* tp_hash */
3688 0, /* tp_call */
3689 0, /* tp_str */
3690 PyObject_GenericGetAttr, /* tp_getattro */
3691 0, /* tp_setattro */
3692 0, /* tp_as_buffer */
3693 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3694 0, /* tp_doc */
3695 (traverseproc)striter_traverse, /* tp_traverse */
3696 0, /* tp_clear */
3697 0, /* tp_richcompare */
3698 0, /* tp_weaklistoffset */
3699 PyObject_SelfIter, /* tp_iter */
3700 (iternextfunc)striter_next, /* tp_iternext */
3701 striter_methods, /* tp_methods */
3702 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003703};
3704
3705static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003706bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003707{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003708 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003709
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003710 if (!PyBytes_Check(seq)) {
3711 PyErr_BadInternalCall();
3712 return NULL;
3713 }
3714 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3715 if (it == NULL)
3716 return NULL;
3717 it->it_index = 0;
3718 Py_INCREF(seq);
3719 it->it_seq = (PyBytesObject *)seq;
3720 _PyObject_GC_TRACK(it);
3721 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003722}