blob: b2fbc926228d8de4c90fa835634620f3f4e2d1ec [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00008#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020010/*[clinic input]
Martin v. Löwis0efea322014-07-27 17:29:17 +020011class bytes "PyBytesObject*" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020012[clinic start generated code]*/
Martin v. Löwis0efea322014-07-27 17:29:17 +020013/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020014
Christian Heimes2c9c7a52008-05-26 13:42:13 +000015#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000016Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000017#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000018
Christian Heimes2c9c7a52008-05-26 13:42:13 +000019static PyBytesObject *characters[UCHAR_MAX + 1];
20static PyBytesObject *nullstring;
21
Mark Dickinsonfd24b322008-12-06 15:33:31 +000022/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
23 for a string of length n should request PyBytesObject_SIZE + n bytes.
24
25 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
26 3 bytes per string allocation on a typical system.
27*/
28#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
29
Christian Heimes2c9c7a52008-05-26 13:42:13 +000030/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000031 For PyBytes_FromString(), the parameter `str' points to a null-terminated
32 string containing exactly `size' bytes.
33
34 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000042 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000043 alter the data yourself, since the strings may be shared.
44
45 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020047 allocated for string data, not counting the null terminating character.
48 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000049 PyBytes_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyBytes_FromString()).
51*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020052static PyObject *
53_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000054{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020055 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020056 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020057
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000058 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000059#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000060 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000061#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000062 Py_INCREF(op);
63 return (PyObject *)op;
64 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000065
Victor Stinner049e5092014-08-17 22:20:00 +020066 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 PyErr_SetString(PyExc_OverflowError,
68 "byte string is too large");
69 return NULL;
70 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020073 if (use_calloc)
74 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
75 else
76 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 if (op == NULL)
78 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010079 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020081 if (!use_calloc)
82 op->ob_sval[size] = '\0';
83 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 if (size == 0) {
85 nullstring = op;
86 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020087 }
88 return (PyObject *) op;
89}
90
91PyObject *
92PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
93{
94 PyBytesObject *op;
95 if (size < 0) {
96 PyErr_SetString(PyExc_SystemError,
97 "Negative size passed to PyBytes_FromStringAndSize");
98 return NULL;
99 }
100 if (size == 1 && str != NULL &&
101 (op = characters[*str & UCHAR_MAX]) != NULL)
102 {
103#ifdef COUNT_ALLOCS
104 one_strings++;
105#endif
106 Py_INCREF(op);
107 return (PyObject *)op;
108 }
109
110 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
111 if (op == NULL)
112 return NULL;
113 if (str == NULL)
114 return (PyObject *) op;
115
116 Py_MEMCPY(op->ob_sval, str, size);
117 /* share short strings */
118 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 characters[*str & UCHAR_MAX] = op;
120 Py_INCREF(op);
121 }
122 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000123}
124
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000125PyObject *
126PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000127{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200128 size_t size;
129 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000130
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000131 assert(str != NULL);
132 size = strlen(str);
133 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
134 PyErr_SetString(PyExc_OverflowError,
135 "byte string is too long");
136 return NULL;
137 }
138 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000139#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000140 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000141#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000142 Py_INCREF(op);
143 return (PyObject *)op;
144 }
145 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000146#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000148#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 Py_INCREF(op);
150 return (PyObject *)op;
151 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000152
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000153 /* Inline PyObject_NewVar */
154 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
155 if (op == NULL)
156 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100157 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000158 op->ob_shash = -1;
159 Py_MEMCPY(op->ob_sval, str, size+1);
160 /* share short strings */
161 if (size == 0) {
162 nullstring = op;
163 Py_INCREF(op);
164 } else if (size == 1) {
165 characters[*str & UCHAR_MAX] = op;
166 Py_INCREF(op);
167 }
168 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000169}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000170
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000171PyObject *
172PyBytes_FromFormatV(const char *format, va_list vargs)
173{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000174 va_list count;
175 Py_ssize_t n = 0;
176 const char* f;
177 char *s;
178 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000179
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000180 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 /* step 1: figure out how large a buffer we need */
182 for (f = format; *f; f++) {
183 if (*f == '%') {
184 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000185 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000186 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000187
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000188 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
189 * they don't affect the amount of space we reserve.
190 */
191 if ((*f == 'l' || *f == 'z') &&
192 (f[1] == 'd' || f[1] == 'u'))
193 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000194
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000195 switch (*f) {
196 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100197 {
198 int c = va_arg(count, int);
199 if (c < 0 || c > 255) {
200 PyErr_SetString(PyExc_OverflowError,
201 "PyBytes_FromFormatV(): %c format "
202 "expects an integer in range [0; 255]");
203 return NULL;
204 }
205 n++;
206 break;
207 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000208 case '%':
209 n++;
210 break;
211 case 'd': case 'u': case 'i': case 'x':
212 (void) va_arg(count, int);
213 /* 20 bytes is enough to hold a 64-bit
214 integer. Decimal takes the most space.
215 This isn't enough for octal. */
216 n += 20;
217 break;
218 case 's':
219 s = va_arg(count, char*);
220 n += strlen(s);
221 break;
222 case 'p':
223 (void) va_arg(count, int);
224 /* maximum 64-bit pointer representation:
225 * 0xffffffffffffffff
226 * so 19 characters is enough.
227 * XXX I count 18 -- what's the extra for?
228 */
229 n += 19;
230 break;
231 default:
232 /* if we stumble upon an unknown
233 formatting code, copy the rest of
234 the format string to the output
235 string. (we cannot just skip the
236 code, since there's no way to know
237 what's in the argument list) */
238 n += strlen(p);
239 goto expand;
240 }
241 } else
242 n++;
243 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000244 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000245 /* step 2: fill the buffer */
246 /* Since we've analyzed how much space we need for the worst case,
247 use sprintf directly instead of the slower PyOS_snprintf. */
248 string = PyBytes_FromStringAndSize(NULL, n);
249 if (!string)
250 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000251
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000253
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000254 for (f = format; *f; f++) {
255 if (*f == '%') {
256 const char* p = f++;
257 Py_ssize_t i;
258 int longflag = 0;
259 int size_tflag = 0;
260 /* parse the width.precision part (we're only
261 interested in the precision value, if any) */
262 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000263 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 n = (n*10) + *f++ - '0';
265 if (*f == '.') {
266 f++;
267 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000268 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000269 n = (n*10) + *f++ - '0';
270 }
David Malcolm96960882010-11-05 17:23:41 +0000271 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 f++;
273 /* handle the long flag, but only for %ld and %lu.
274 others can be added when necessary. */
275 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
276 longflag = 1;
277 ++f;
278 }
279 /* handle the size_t flag. */
280 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
281 size_tflag = 1;
282 ++f;
283 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000284
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000285 switch (*f) {
286 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100287 {
288 int c = va_arg(vargs, int);
289 /* c has been checked for overflow in the first step */
290 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000291 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100292 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000293 case 'd':
294 if (longflag)
295 sprintf(s, "%ld", va_arg(vargs, long));
296 else if (size_tflag)
297 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
298 va_arg(vargs, Py_ssize_t));
299 else
300 sprintf(s, "%d", va_arg(vargs, int));
301 s += strlen(s);
302 break;
303 case 'u':
304 if (longflag)
305 sprintf(s, "%lu",
306 va_arg(vargs, unsigned long));
307 else if (size_tflag)
308 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
309 va_arg(vargs, size_t));
310 else
311 sprintf(s, "%u",
312 va_arg(vargs, unsigned int));
313 s += strlen(s);
314 break;
315 case 'i':
316 sprintf(s, "%i", va_arg(vargs, int));
317 s += strlen(s);
318 break;
319 case 'x':
320 sprintf(s, "%x", va_arg(vargs, int));
321 s += strlen(s);
322 break;
323 case 's':
324 p = va_arg(vargs, char*);
325 i = strlen(p);
326 if (n > 0 && i > n)
327 i = n;
328 Py_MEMCPY(s, p, i);
329 s += i;
330 break;
331 case 'p':
332 sprintf(s, "%p", va_arg(vargs, void*));
333 /* %p is ill-defined: ensure leading 0x. */
334 if (s[1] == 'X')
335 s[1] = 'x';
336 else if (s[1] != 'x') {
337 memmove(s+2, s, strlen(s)+1);
338 s[0] = '0';
339 s[1] = 'x';
340 }
341 s += strlen(s);
342 break;
343 case '%':
344 *s++ = '%';
345 break;
346 default:
347 strcpy(s, p);
348 s += strlen(s);
349 goto end;
350 }
351 } else
352 *s++ = *f;
353 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000354
355 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000356 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
357 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000358}
359
360PyObject *
361PyBytes_FromFormat(const char *format, ...)
362{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000363 PyObject* ret;
364 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000365
366#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000369 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000370#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 ret = PyBytes_FromFormatV(format, vargs);
372 va_end(vargs);
373 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000374}
375
Ethan Furmanb95b5612015-01-23 20:05:18 -0800376/* Helpers for formatstring */
377
378Py_LOCAL_INLINE(PyObject *)
379getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
380{
381 Py_ssize_t argidx = *p_argidx;
382 if (argidx < arglen) {
383 (*p_argidx)++;
384 if (arglen < 0)
385 return args;
386 else
387 return PyTuple_GetItem(args, argidx);
388 }
389 PyErr_SetString(PyExc_TypeError,
390 "not enough arguments for format string");
391 return NULL;
392}
393
394/* Format codes
395 * F_LJUST '-'
396 * F_SIGN '+'
397 * F_BLANK ' '
398 * F_ALT '#'
399 * F_ZERO '0'
400 */
401#define F_LJUST (1<<0)
402#define F_SIGN (1<<1)
403#define F_BLANK (1<<2)
404#define F_ALT (1<<3)
405#define F_ZERO (1<<4)
406
407/* Returns a new reference to a PyBytes object, or NULL on failure. */
408
409static PyObject *
410formatfloat(PyObject *v, int flags, int prec, int type)
411{
412 char *p;
413 PyObject *result;
414 double x;
415
416 x = PyFloat_AsDouble(v);
417 if (x == -1.0 && PyErr_Occurred()) {
418 PyErr_Format(PyExc_TypeError, "float argument required, "
419 "not %.200s", Py_TYPE(v)->tp_name);
420 return NULL;
421 }
422
423 if (prec < 0)
424 prec = 6;
425
426 p = PyOS_double_to_string(x, type, prec,
427 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
428
429 if (p == NULL)
430 return NULL;
431 result = PyBytes_FromStringAndSize(p, strlen(p));
432 PyMem_Free(p);
433 return result;
434}
435
436/* format_long emulates the format codes d, u, o, x and X, and
437 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
438 * Python's regular ints.
439 * Return value: a new PyBytes*, or NULL if error.
440 * . *pbuf is set to point into it,
441 * *plen set to the # of chars following that.
442 * Caller must decref it when done using pbuf.
443 * The string starting at *pbuf is of the form
444 * "-"? ("0x" | "0X")? digit+
445 * "0x"/"0X" are present only for x and X conversions, with F_ALT
446 * set in flags. The case of hex digits will be correct,
447 * There will be at least prec digits, zero-filled on the left if
448 * necessary to get that many.
449 * val object to be converted
450 * flags bitmask of format flags; only F_ALT is looked at
451 * prec minimum number of digits; 0-fill on left if needed
452 * type a character in [duoxX]; u acts the same as d
453 *
454 * CAUTION: o, x and X conversions on regular ints can never
455 * produce a '-' sign, but can for Python's unbounded ints.
456 */
457
458static PyObject *
459format_long(PyObject *val, int flags, int prec, int type,
460 char **pbuf, int *plen)
461{
462 PyObject *s;
463 PyObject *result = NULL;
464
465 s = _PyUnicode_FormatLong(val, flags & F_ALT, prec, type);
466 if (!s)
467 return NULL;
468 result = _PyUnicode_AsASCIIString(s, "strict");
469 Py_DECREF(s);
470 if (!result)
471 return NULL;
472 *pbuf = PyBytes_AS_STRING(result);
473 *plen = PyBytes_GET_SIZE(result);
474 return result;
475}
476
477Py_LOCAL_INLINE(int)
478formatchar(char *buf, size_t buflen, PyObject *v)
479{
480 PyObject *w = NULL;
481 /* convert bytearray to bytes */
482 if (PyByteArray_Check(v)) {
483 w = PyBytes_FromObject(v);
484 if (w == NULL)
485 goto error;
486 v = w;
487 }
488 /* presume that the buffer is at least 2 characters long */
489 if (PyBytes_Check(v)) {
490 if (!PyArg_Parse(v, "c;%c requires an integer in range(256) or a single byte", &buf[0]))
491 goto error;
492 }
493 else {
494 long ival = PyLong_AsLong(v);
495 if (ival == -1 && PyErr_Occurred()) {
496 PyErr_SetString(PyExc_TypeError,
497 "%c requires an integer in range(256) or a single byte");
498 goto error;
499 }
500 if (ival < 0 || ival > 255) {
501 PyErr_SetString(PyExc_TypeError,
502 "%c requires an integer in range(256) or a single byte");
503 goto error;
504 }
Victor Stinner5474d0b2015-01-26 16:43:36 +0100505 buf[0] = (char)ival;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800506 }
507 Py_XDECREF(w);
508 buf[1] = '\0';
509 return 1;
510
511 error:
512 Py_XDECREF(w);
513 return -1;
514}
515
516static PyObject *
517format_obj(PyObject *v)
518{
519 PyObject *result = NULL, *w = NULL;
520 PyObject *func;
521 _Py_IDENTIFIER(__bytes__);
522 /* convert bytearray to bytes */
523 if (PyByteArray_Check(v)) {
524 w = PyBytes_FromObject(v);
525 if (w == NULL)
526 return NULL;
527 v = w;
528 }
529 /* is it a bytes object? */
530 if (PyBytes_Check(v)) {
531 result = v;
532 Py_INCREF(v);
533 Py_XDECREF(w);
534 return result;
535 }
536 /* does it support __bytes__? */
537 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
538 if (func != NULL) {
539 result = PyObject_CallFunctionObjArgs(func, NULL);
540 Py_DECREF(func);
541 if (result == NULL)
542 return NULL;
543 if (!PyBytes_Check(result)) {
544 PyErr_Format(PyExc_TypeError,
545 "__bytes__ returned non-bytes (type %.200s)",
546 Py_TYPE(result)->tp_name);
547 Py_DECREF(result);
548 return NULL;
549 }
550 return result;
551 }
552 PyErr_Format(PyExc_TypeError,
553 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
554 Py_TYPE(v)->tp_name);
555 return NULL;
556}
557
558/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
559
560 FORMATBUFLEN is the length of the buffer in which the ints &
561 chars are formatted. XXX This is a magic number. Each formatting
562 routine does bounds checking to ensure no overflow, but a better
563 solution may be to malloc a buffer of appropriate size for each
564 format. For now, the current solution is sufficient.
565*/
566#define FORMATBUFLEN (size_t)120
567
568PyObject *
569_PyBytes_Format(PyObject *format, PyObject *args)
570{
571 char *fmt, *res;
572 Py_ssize_t arglen, argidx;
573 Py_ssize_t reslen, rescnt, fmtcnt;
574 int args_owned = 0;
575 PyObject *result;
576 PyObject *repr;
577 PyObject *dict = NULL;
578 if (format == NULL || !PyBytes_Check(format) || args == NULL) {
579 PyErr_BadInternalCall();
580 return NULL;
581 }
582 fmt = PyBytes_AS_STRING(format);
583 fmtcnt = PyBytes_GET_SIZE(format);
584 reslen = rescnt = fmtcnt + 100;
585 result = PyBytes_FromStringAndSize((char *)NULL, reslen);
586 if (result == NULL)
587 return NULL;
588 res = PyBytes_AsString(result);
589 if (PyTuple_Check(args)) {
590 arglen = PyTuple_GET_SIZE(args);
591 argidx = 0;
592 }
593 else {
594 arglen = -1;
595 argidx = -2;
596 }
597 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
598 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
599 !PyByteArray_Check(args)) {
600 dict = args;
601 }
602 while (--fmtcnt >= 0) {
603 if (*fmt != '%') {
604 if (--rescnt < 0) {
605 rescnt = fmtcnt + 100;
606 reslen += rescnt;
607 if (_PyBytes_Resize(&result, reslen))
608 return NULL;
609 res = PyBytes_AS_STRING(result)
610 + reslen - rescnt;
611 --rescnt;
612 }
613 *res++ = *fmt++;
614 }
615 else {
616 /* Got a format specifier */
617 int flags = 0;
618 Py_ssize_t width = -1;
619 int prec = -1;
620 int c = '\0';
621 int fill;
622 int isnumok;
623 PyObject *v = NULL;
624 PyObject *temp = NULL;
625 Py_buffer buf;
626 char *pbuf;
627 int sign;
628 Py_ssize_t len;
629 char formatbuf[FORMATBUFLEN];
630 /* For format{int,char}() */
631
632 buf.obj = NULL;
633 fmt++;
634 if (*fmt == '(') {
635 char *keystart;
636 Py_ssize_t keylen;
637 PyObject *key;
638 int pcount = 1;
639
640 if (dict == NULL) {
641 PyErr_SetString(PyExc_TypeError,
642 "format requires a mapping");
643 goto error;
644 }
645 ++fmt;
646 --fmtcnt;
647 keystart = fmt;
648 /* Skip over balanced parentheses */
649 while (pcount > 0 && --fmtcnt >= 0) {
650 if (*fmt == ')')
651 --pcount;
652 else if (*fmt == '(')
653 ++pcount;
654 fmt++;
655 }
656 keylen = fmt - keystart - 1;
657 if (fmtcnt < 0 || pcount > 0) {
658 PyErr_SetString(PyExc_ValueError,
659 "incomplete format key");
660 goto error;
661 }
662 key = PyBytes_FromStringAndSize(keystart,
663 keylen);
664 if (key == NULL)
665 goto error;
666 if (args_owned) {
667 Py_DECREF(args);
668 args_owned = 0;
669 }
670 args = PyObject_GetItem(dict, key);
671 Py_DECREF(key);
672 if (args == NULL) {
673 goto error;
674 }
675 args_owned = 1;
676 arglen = -1;
677 argidx = -2;
678 }
679 while (--fmtcnt >= 0) {
680 switch (c = *fmt++) {
681 case '-': flags |= F_LJUST; continue;
682 case '+': flags |= F_SIGN; continue;
683 case ' ': flags |= F_BLANK; continue;
684 case '#': flags |= F_ALT; continue;
685 case '0': flags |= F_ZERO; continue;
686 }
687 break;
688 }
689 if (c == '*') {
690 v = getnextarg(args, arglen, &argidx);
691 if (v == NULL)
692 goto error;
693 if (!PyLong_Check(v)) {
694 PyErr_SetString(PyExc_TypeError,
695 "* wants int");
696 goto error;
697 }
698 width = PyLong_AsSsize_t(v);
699 if (width == -1 && PyErr_Occurred())
700 goto error;
701 if (width < 0) {
702 flags |= F_LJUST;
703 width = -width;
704 }
705 if (--fmtcnt >= 0)
706 c = *fmt++;
707 }
708 else if (c >= 0 && isdigit(c)) {
709 width = c - '0';
710 while (--fmtcnt >= 0) {
711 c = Py_CHARMASK(*fmt++);
712 if (!isdigit(c))
713 break;
714 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
715 PyErr_SetString(
716 PyExc_ValueError,
717 "width too big");
718 goto error;
719 }
720 width = width*10 + (c - '0');
721 }
722 }
723 if (c == '.') {
724 prec = 0;
725 if (--fmtcnt >= 0)
726 c = *fmt++;
727 if (c == '*') {
728 v = getnextarg(args, arglen, &argidx);
729 if (v == NULL)
730 goto error;
731 if (!PyLong_Check(v)) {
732 PyErr_SetString(
733 PyExc_TypeError,
734 "* wants int");
735 goto error;
736 }
737 prec = PyLong_AsSsize_t(v);
738 if (prec == -1 && PyErr_Occurred())
739 goto error;
740 if (prec < 0)
741 prec = 0;
742 if (--fmtcnt >= 0)
743 c = *fmt++;
744 }
745 else if (c >= 0 && isdigit(c)) {
746 prec = c - '0';
747 while (--fmtcnt >= 0) {
748 c = Py_CHARMASK(*fmt++);
749 if (!isdigit(c))
750 break;
751 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
752 PyErr_SetString(
753 PyExc_ValueError,
754 "prec too big");
755 goto error;
756 }
757 prec = prec*10 + (c - '0');
758 }
759 }
760 } /* prec */
761 if (fmtcnt >= 0) {
762 if (c == 'h' || c == 'l' || c == 'L') {
763 if (--fmtcnt >= 0)
764 c = *fmt++;
765 }
766 }
767 if (fmtcnt < 0) {
768 PyErr_SetString(PyExc_ValueError,
769 "incomplete format");
770 goto error;
771 }
772 if (c != '%') {
773 v = getnextarg(args, arglen, &argidx);
774 if (v == NULL)
775 goto error;
776 }
777 sign = 0;
778 fill = ' ';
779 switch (c) {
780 case '%':
781 pbuf = "%";
782 len = 1;
783 break;
784 case 'a':
785 temp = PyObject_Repr(v);
786 if (temp == NULL)
787 goto error;
788 repr = PyUnicode_AsEncodedObject(temp, "ascii", "backslashreplace");
789 if (repr == NULL) {
790 Py_DECREF(temp);
791 goto error;
792 }
793 if (_getbuffer(repr, &buf) < 0) {
794 temp = format_obj(repr);
795 if (temp == NULL) {
796 Py_DECREF(repr);
797 goto error;
798 }
799 Py_DECREF(repr);
800 repr = temp;
801 }
802 pbuf = PyBytes_AS_STRING(repr);
803 len = PyBytes_GET_SIZE(repr);
804 Py_DECREF(repr);
805 if (prec >= 0 && len > prec)
806 len = prec;
807 break;
808 case 's':
809 // %s is only for 2/3 code; 3 only code should use %b
810 case 'b':
811 temp = format_obj(v);
812 if (temp == NULL)
813 goto error;
814 pbuf = PyBytes_AS_STRING(temp);
815 len = PyBytes_GET_SIZE(temp);
816 if (prec >= 0 && len > prec)
817 len = prec;
818 break;
819 case 'i':
820 case 'd':
821 case 'u':
822 case 'o':
823 case 'x':
824 case 'X':
825 if (c == 'i')
826 c = 'd';
827 isnumok = 0;
828 if (PyNumber_Check(v)) {
829 PyObject *iobj=NULL;
830
831 if ((PyLong_Check(v))) {
832 iobj = v;
833 Py_INCREF(iobj);
834 }
835 else {
836 iobj = PyNumber_Long(v);
837 }
838 if (iobj!=NULL) {
839 if (PyLong_Check(iobj)) {
840 int ilen;
841
842 isnumok = 1;
843 temp = format_long(iobj, flags, prec, c,
844 &pbuf, &ilen);
845 Py_DECREF(iobj);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800846 if (!temp)
847 goto error;
Benjamin Petersona8efc962015-01-26 09:23:41 -0500848 len = ilen;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800849 sign = 1;
850 }
851 else {
852 Py_DECREF(iobj);
853 }
854 }
855 }
856 if (!isnumok) {
857 PyErr_Format(PyExc_TypeError,
858 "%%%c format: a number is required, "
859 "not %.200s", c, Py_TYPE(v)->tp_name);
860 goto error;
861 }
862 if (flags & F_ZERO)
863 fill = '0';
864 break;
865 case 'e':
866 case 'E':
867 case 'f':
868 case 'F':
869 case 'g':
870 case 'G':
871 temp = formatfloat(v, flags, prec, c);
872 if (temp == NULL)
873 goto error;
874 pbuf = PyBytes_AS_STRING(temp);
875 len = PyBytes_GET_SIZE(temp);
876 sign = 1;
877 if (flags & F_ZERO)
878 fill = '0';
879 break;
880 case 'c':
881 pbuf = formatbuf;
882 len = formatchar(pbuf, sizeof(formatbuf), v);
883 if (len < 0)
884 goto error;
885 break;
886 default:
887 PyErr_Format(PyExc_ValueError,
888 "unsupported format character '%c' (0x%x) "
889 "at index %zd",
890 c, c,
891 (Py_ssize_t)(fmt - 1 -
892 PyBytes_AsString(format)));
893 goto error;
894 }
895 if (sign) {
896 if (*pbuf == '-' || *pbuf == '+') {
897 sign = *pbuf++;
898 len--;
899 }
900 else if (flags & F_SIGN)
901 sign = '+';
902 else if (flags & F_BLANK)
903 sign = ' ';
904 else
905 sign = 0;
906 }
907 if (width < len)
908 width = len;
909 if (rescnt - (sign != 0) < width) {
910 reslen -= rescnt;
911 rescnt = width + fmtcnt + 100;
912 reslen += rescnt;
913 if (reslen < 0) {
914 Py_DECREF(result);
915 PyBuffer_Release(&buf);
916 Py_XDECREF(temp);
917 return PyErr_NoMemory();
918 }
919 if (_PyBytes_Resize(&result, reslen)) {
920 PyBuffer_Release(&buf);
921 Py_XDECREF(temp);
922 return NULL;
923 }
924 res = PyBytes_AS_STRING(result)
925 + reslen - rescnt;
926 }
927 if (sign) {
928 if (fill != ' ')
929 *res++ = sign;
930 rescnt--;
931 if (width > len)
932 width--;
933 }
934 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
935 assert(pbuf[0] == '0');
936 assert(pbuf[1] == c);
937 if (fill != ' ') {
938 *res++ = *pbuf++;
939 *res++ = *pbuf++;
940 }
941 rescnt -= 2;
942 width -= 2;
943 if (width < 0)
944 width = 0;
945 len -= 2;
946 }
947 if (width > len && !(flags & F_LJUST)) {
948 do {
949 --rescnt;
950 *res++ = fill;
951 } while (--width > len);
952 }
953 if (fill == ' ') {
954 if (sign)
955 *res++ = sign;
956 if ((flags & F_ALT) &&
957 (c == 'x' || c == 'X')) {
958 assert(pbuf[0] == '0');
959 assert(pbuf[1] == c);
960 *res++ = *pbuf++;
961 *res++ = *pbuf++;
962 }
963 }
964 Py_MEMCPY(res, pbuf, len);
965 res += len;
966 rescnt -= len;
967 while (--width >= len) {
968 --rescnt;
969 *res++ = ' ';
970 }
971 if (dict && (argidx < arglen) && c != '%') {
972 PyErr_SetString(PyExc_TypeError,
973 "not all arguments converted during bytes formatting");
974 PyBuffer_Release(&buf);
975 Py_XDECREF(temp);
976 goto error;
977 }
978 PyBuffer_Release(&buf);
979 Py_XDECREF(temp);
980 } /* '%' */
981 } /* until end */
982 if (argidx < arglen && !dict) {
983 PyErr_SetString(PyExc_TypeError,
984 "not all arguments converted during bytes formatting");
985 goto error;
986 }
987 if (args_owned) {
988 Py_DECREF(args);
989 }
990 if (_PyBytes_Resize(&result, reslen - rescnt))
991 return NULL;
992 return result;
993
994 error:
995 Py_DECREF(result);
996 if (args_owned) {
997 Py_DECREF(args);
998 }
999 return NULL;
1000}
1001
1002/* =-= */
1003
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001004static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001005bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001006{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001007 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001008}
1009
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001010/* Unescape a backslash-escaped string. If unicode is non-zero,
1011 the string is a u-literal. If recode_encoding is non-zero,
1012 the string is UTF-8 encoded and should be re-encoded in the
1013 specified encoding. */
1014
1015PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001016 Py_ssize_t len,
1017 const char *errors,
1018 Py_ssize_t unicode,
1019 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001020{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001021 int c;
1022 char *p, *buf;
1023 const char *end;
1024 PyObject *v;
1025 Py_ssize_t newlen = recode_encoding ? 4*len:len;
1026 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
1027 if (v == NULL)
1028 return NULL;
1029 p = buf = PyBytes_AsString(v);
1030 end = s + len;
1031 while (s < end) {
1032 if (*s != '\\') {
1033 non_esc:
1034 if (recode_encoding && (*s & 0x80)) {
1035 PyObject *u, *w;
1036 char *r;
1037 const char* t;
1038 Py_ssize_t rn;
1039 t = s;
1040 /* Decode non-ASCII bytes as UTF-8. */
1041 while (t < end && (*t & 0x80)) t++;
1042 u = PyUnicode_DecodeUTF8(s, t - s, errors);
1043 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001044
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001045 /* Recode them in target encoding. */
1046 w = PyUnicode_AsEncodedString(
1047 u, recode_encoding, errors);
1048 Py_DECREF(u);
1049 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001050
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001051 /* Append bytes to output buffer. */
1052 assert(PyBytes_Check(w));
1053 r = PyBytes_AS_STRING(w);
1054 rn = PyBytes_GET_SIZE(w);
1055 Py_MEMCPY(p, r, rn);
1056 p += rn;
1057 Py_DECREF(w);
1058 s = t;
1059 } else {
1060 *p++ = *s++;
1061 }
1062 continue;
1063 }
1064 s++;
1065 if (s==end) {
1066 PyErr_SetString(PyExc_ValueError,
1067 "Trailing \\ in string");
1068 goto failed;
1069 }
1070 switch (*s++) {
1071 /* XXX This assumes ASCII! */
1072 case '\n': break;
1073 case '\\': *p++ = '\\'; break;
1074 case '\'': *p++ = '\''; break;
1075 case '\"': *p++ = '\"'; break;
1076 case 'b': *p++ = '\b'; break;
1077 case 'f': *p++ = '\014'; break; /* FF */
1078 case 't': *p++ = '\t'; break;
1079 case 'n': *p++ = '\n'; break;
1080 case 'r': *p++ = '\r'; break;
1081 case 'v': *p++ = '\013'; break; /* VT */
1082 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1083 case '0': case '1': case '2': case '3':
1084 case '4': case '5': case '6': case '7':
1085 c = s[-1] - '0';
1086 if (s < end && '0' <= *s && *s <= '7') {
1087 c = (c<<3) + *s++ - '0';
1088 if (s < end && '0' <= *s && *s <= '7')
1089 c = (c<<3) + *s++ - '0';
1090 }
1091 *p++ = c;
1092 break;
1093 case 'x':
David Malcolm96960882010-11-05 17:23:41 +00001094 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001095 unsigned int x = 0;
1096 c = Py_CHARMASK(*s);
1097 s++;
David Malcolm96960882010-11-05 17:23:41 +00001098 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001099 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001100 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001101 x = 10 + c - 'a';
1102 else
1103 x = 10 + c - 'A';
1104 x = x << 4;
1105 c = Py_CHARMASK(*s);
1106 s++;
David Malcolm96960882010-11-05 17:23:41 +00001107 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001109 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001110 x += 10 + c - 'a';
1111 else
1112 x += 10 + c - 'A';
1113 *p++ = x;
1114 break;
1115 }
1116 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001117 PyErr_Format(PyExc_ValueError,
1118 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001119 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001120 goto failed;
1121 }
1122 if (strcmp(errors, "replace") == 0) {
1123 *p++ = '?';
1124 } else if (strcmp(errors, "ignore") == 0)
1125 /* do nothing */;
1126 else {
1127 PyErr_Format(PyExc_ValueError,
1128 "decoding error; unknown "
1129 "error handling code: %.400s",
1130 errors);
1131 goto failed;
1132 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001133 /* skip \x */
1134 if (s < end && Py_ISXDIGIT(s[0]))
1135 s++; /* and a hexdigit */
1136 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001137 default:
1138 *p++ = '\\';
1139 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001140 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001141 UTF-8 bytes may follow. */
1142 }
1143 }
1144 if (p-buf < newlen)
1145 _PyBytes_Resize(&v, p - buf);
1146 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001147 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001148 Py_DECREF(v);
1149 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001150}
1151
1152/* -------------------------------------------------------------------- */
1153/* object api */
1154
1155Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001156PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001157{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001158 if (!PyBytes_Check(op)) {
1159 PyErr_Format(PyExc_TypeError,
1160 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1161 return -1;
1162 }
1163 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001164}
1165
1166char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001167PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001168{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001169 if (!PyBytes_Check(op)) {
1170 PyErr_Format(PyExc_TypeError,
1171 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1172 return NULL;
1173 }
1174 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001175}
1176
1177int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001178PyBytes_AsStringAndSize(PyObject *obj,
1179 char **s,
1180 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001181{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001182 if (s == NULL) {
1183 PyErr_BadInternalCall();
1184 return -1;
1185 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001187 if (!PyBytes_Check(obj)) {
1188 PyErr_Format(PyExc_TypeError,
1189 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1190 return -1;
1191 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001192
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001193 *s = PyBytes_AS_STRING(obj);
1194 if (len != NULL)
1195 *len = PyBytes_GET_SIZE(obj);
1196 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001197 PyErr_SetString(PyExc_ValueError,
1198 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001199 return -1;
1200 }
1201 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001202}
Neal Norwitz6968b052007-02-27 19:02:19 +00001203
1204/* -------------------------------------------------------------------- */
1205/* Methods */
1206
Eric Smith0923d1d2009-04-16 20:16:10 +00001207#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001208
1209#include "stringlib/fastsearch.h"
1210#include "stringlib/count.h"
1211#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001212#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001213#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001214#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001215#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001216
Eric Smith0f78bff2009-11-30 01:01:42 +00001217#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001218
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001219PyObject *
1220PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001221{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001222 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001223 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001224 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001225 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001226 unsigned char quote, *s, *p;
1227
1228 /* Compute size of output string */
1229 squotes = dquotes = 0;
1230 newsize = 3; /* b'' */
1231 s = (unsigned char*)op->ob_sval;
1232 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001233 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001234 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001235 case '\'': squotes++; break;
1236 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001237 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001238 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001239 default:
1240 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001241 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001242 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001243 if (newsize > PY_SSIZE_T_MAX - incr)
1244 goto overflow;
1245 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001246 }
1247 quote = '\'';
1248 if (smartquotes && squotes && !dquotes)
1249 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001250 if (squotes && quote == '\'') {
1251 if (newsize > PY_SSIZE_T_MAX - squotes)
1252 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001253 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001254 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001255
1256 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001257 if (v == NULL) {
1258 return NULL;
1259 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001260 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001261
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001262 *p++ = 'b', *p++ = quote;
1263 for (i = 0; i < length; i++) {
1264 unsigned char c = op->ob_sval[i];
1265 if (c == quote || c == '\\')
1266 *p++ = '\\', *p++ = c;
1267 else if (c == '\t')
1268 *p++ = '\\', *p++ = 't';
1269 else if (c == '\n')
1270 *p++ = '\\', *p++ = 'n';
1271 else if (c == '\r')
1272 *p++ = '\\', *p++ = 'r';
1273 else if (c < ' ' || c >= 0x7f) {
1274 *p++ = '\\';
1275 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001276 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1277 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001278 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001279 else
1280 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001281 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001282 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001283 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001284 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001285
1286 overflow:
1287 PyErr_SetString(PyExc_OverflowError,
1288 "bytes object is too large to make repr");
1289 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001290}
1291
Neal Norwitz6968b052007-02-27 19:02:19 +00001292static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001293bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001294{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001295 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001296}
1297
Neal Norwitz6968b052007-02-27 19:02:19 +00001298static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001299bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001300{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001301 if (Py_BytesWarningFlag) {
1302 if (PyErr_WarnEx(PyExc_BytesWarning,
1303 "str() on a bytes instance", 1))
1304 return NULL;
1305 }
1306 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001307}
1308
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001309static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001310bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001311{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001312 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001313}
Neal Norwitz6968b052007-02-27 19:02:19 +00001314
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001315/* This is also used by PyBytes_Concat() */
1316static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001317bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001318{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001319 Py_ssize_t size;
1320 Py_buffer va, vb;
1321 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001322
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001323 va.len = -1;
1324 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001325 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1326 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001327 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1328 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1329 goto done;
1330 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001331
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001332 /* Optimize end cases */
1333 if (va.len == 0 && PyBytes_CheckExact(b)) {
1334 result = b;
1335 Py_INCREF(result);
1336 goto done;
1337 }
1338 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1339 result = a;
1340 Py_INCREF(result);
1341 goto done;
1342 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001343
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001344 size = va.len + vb.len;
1345 if (size < 0) {
1346 PyErr_NoMemory();
1347 goto done;
1348 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001349
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001350 result = PyBytes_FromStringAndSize(NULL, size);
1351 if (result != NULL) {
1352 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1353 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1354 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001355
1356 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001357 if (va.len != -1)
1358 PyBuffer_Release(&va);
1359 if (vb.len != -1)
1360 PyBuffer_Release(&vb);
1361 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001362}
Neal Norwitz6968b052007-02-27 19:02:19 +00001363
1364static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001365bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001366{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001367 Py_ssize_t i;
1368 Py_ssize_t j;
1369 Py_ssize_t size;
1370 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001371 size_t nbytes;
1372 if (n < 0)
1373 n = 0;
1374 /* watch out for overflows: the size can overflow int,
1375 * and the # of bytes needed can overflow size_t
1376 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001377 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001378 PyErr_SetString(PyExc_OverflowError,
1379 "repeated bytes are too long");
1380 return NULL;
1381 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001382 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001383 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1384 Py_INCREF(a);
1385 return (PyObject *)a;
1386 }
1387 nbytes = (size_t)size;
1388 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1389 PyErr_SetString(PyExc_OverflowError,
1390 "repeated bytes are too long");
1391 return NULL;
1392 }
1393 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1394 if (op == NULL)
1395 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001396 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001397 op->ob_shash = -1;
1398 op->ob_sval[size] = '\0';
1399 if (Py_SIZE(a) == 1 && n > 0) {
1400 memset(op->ob_sval, a->ob_sval[0] , n);
1401 return (PyObject *) op;
1402 }
1403 i = 0;
1404 if (i < size) {
1405 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1406 i = Py_SIZE(a);
1407 }
1408 while (i < size) {
1409 j = (i <= size-i) ? i : size-i;
1410 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1411 i += j;
1412 }
1413 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001414}
1415
Guido van Rossum98297ee2007-11-06 21:34:58 +00001416static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001417bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001418{
1419 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1420 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001421 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +00001422 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +00001423 PyErr_Clear();
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001424 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
Antoine Pitroud1188562010-06-09 16:38:55 +00001425 return -1;
1426 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
1427 varg.buf, varg.len, 0);
1428 PyBuffer_Release(&varg);
1429 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001430 }
1431 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001432 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1433 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001434 }
1435
Antoine Pitrou0010d372010-08-15 17:12:55 +00001436 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001437}
1438
Neal Norwitz6968b052007-02-27 19:02:19 +00001439static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001440bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001441{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 if (i < 0 || i >= Py_SIZE(a)) {
1443 PyErr_SetString(PyExc_IndexError, "index out of range");
1444 return NULL;
1445 }
1446 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001447}
1448
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001449Py_LOCAL(int)
1450bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1451{
1452 int cmp;
1453 Py_ssize_t len;
1454
1455 len = Py_SIZE(a);
1456 if (Py_SIZE(b) != len)
1457 return 0;
1458
1459 if (a->ob_sval[0] != b->ob_sval[0])
1460 return 0;
1461
1462 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1463 return (cmp == 0);
1464}
1465
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001466static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001467bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001468{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001469 int c;
1470 Py_ssize_t len_a, len_b;
1471 Py_ssize_t min_len;
1472 PyObject *result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001473
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001474 /* Make sure both arguments are strings. */
1475 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1476 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
1477 (PyObject_IsInstance((PyObject*)a,
1478 (PyObject*)&PyUnicode_Type) ||
1479 PyObject_IsInstance((PyObject*)b,
1480 (PyObject*)&PyUnicode_Type))) {
1481 if (PyErr_WarnEx(PyExc_BytesWarning,
1482 "Comparison between bytes and string", 1))
1483 return NULL;
1484 }
1485 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001486 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001487 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001488 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001489 case Py_EQ:
1490 case Py_LE:
1491 case Py_GE:
1492 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001493 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001494 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001495 case Py_NE:
1496 case Py_LT:
1497 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001498 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001499 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001500 default:
1501 PyErr_BadArgument();
1502 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001503 }
1504 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001505 else if (op == Py_EQ || op == Py_NE) {
1506 int eq = bytes_compare_eq(a, b);
1507 eq ^= (op == Py_NE);
1508 result = eq ? Py_True : Py_False;
1509 }
1510 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001511 len_a = Py_SIZE(a);
1512 len_b = Py_SIZE(b);
1513 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001514 if (min_len > 0) {
1515 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001516 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001517 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001518 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001519 else
1520 c = 0;
1521 if (c == 0)
1522 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1523 switch (op) {
1524 case Py_LT: c = c < 0; break;
1525 case Py_LE: c = c <= 0; break;
1526 case Py_GT: c = c > 0; break;
1527 case Py_GE: c = c >= 0; break;
1528 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001529 PyErr_BadArgument();
1530 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001531 }
1532 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001533 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001534
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001535 Py_INCREF(result);
1536 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001537}
1538
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001539static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001540bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001541{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001542 if (a->ob_shash == -1) {
1543 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001544 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001545 }
1546 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001547}
1548
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001549static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001550bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001551{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001552 if (PyIndex_Check(item)) {
1553 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1554 if (i == -1 && PyErr_Occurred())
1555 return NULL;
1556 if (i < 0)
1557 i += PyBytes_GET_SIZE(self);
1558 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1559 PyErr_SetString(PyExc_IndexError,
1560 "index out of range");
1561 return NULL;
1562 }
1563 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1564 }
1565 else if (PySlice_Check(item)) {
1566 Py_ssize_t start, stop, step, slicelength, cur, i;
1567 char* source_buf;
1568 char* result_buf;
1569 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001570
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001571 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001572 PyBytes_GET_SIZE(self),
1573 &start, &stop, &step, &slicelength) < 0) {
1574 return NULL;
1575 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001576
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001577 if (slicelength <= 0) {
1578 return PyBytes_FromStringAndSize("", 0);
1579 }
1580 else if (start == 0 && step == 1 &&
1581 slicelength == PyBytes_GET_SIZE(self) &&
1582 PyBytes_CheckExact(self)) {
1583 Py_INCREF(self);
1584 return (PyObject *)self;
1585 }
1586 else if (step == 1) {
1587 return PyBytes_FromStringAndSize(
1588 PyBytes_AS_STRING(self) + start,
1589 slicelength);
1590 }
1591 else {
1592 source_buf = PyBytes_AS_STRING(self);
1593 result = PyBytes_FromStringAndSize(NULL, slicelength);
1594 if (result == NULL)
1595 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001596
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001597 result_buf = PyBytes_AS_STRING(result);
1598 for (cur = start, i = 0; i < slicelength;
1599 cur += step, i++) {
1600 result_buf[i] = source_buf[cur];
1601 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001602
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001603 return result;
1604 }
1605 }
1606 else {
1607 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001608 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001609 Py_TYPE(item)->tp_name);
1610 return NULL;
1611 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001612}
1613
1614static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001615bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001616{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001617 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1618 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001619}
1620
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001621static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001622 (lenfunc)bytes_length, /*sq_length*/
1623 (binaryfunc)bytes_concat, /*sq_concat*/
1624 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1625 (ssizeargfunc)bytes_item, /*sq_item*/
1626 0, /*sq_slice*/
1627 0, /*sq_ass_item*/
1628 0, /*sq_ass_slice*/
1629 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001630};
1631
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001632static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001633 (lenfunc)bytes_length,
1634 (binaryfunc)bytes_subscript,
1635 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001636};
1637
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001638static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001639 (getbufferproc)bytes_buffer_getbuffer,
1640 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001641};
1642
1643
1644#define LEFTSTRIP 0
1645#define RIGHTSTRIP 1
1646#define BOTHSTRIP 2
1647
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001648/*[clinic input]
1649bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001650
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001651 sep: object = None
1652 The delimiter according which to split the bytes.
1653 None (the default value) means split on ASCII whitespace characters
1654 (space, tab, return, newline, formfeed, vertical tab).
1655 maxsplit: Py_ssize_t = -1
1656 Maximum number of splits to do.
1657 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001658
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001659Return a list of the sections in the bytes, using sep as the delimiter.
1660[clinic start generated code]*/
1661
1662PyDoc_STRVAR(bytes_split__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001663"split($self, /, sep=None, maxsplit=-1)\n"
1664"--\n"
1665"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001666"Return a list of the sections in the bytes, using sep as the delimiter.\n"
1667"\n"
1668" sep\n"
1669" The delimiter according which to split the bytes.\n"
1670" None (the default value) means split on ASCII whitespace characters\n"
1671" (space, tab, return, newline, formfeed, vertical tab).\n"
1672" maxsplit\n"
1673" Maximum number of splits to do.\n"
1674" -1 (the default value) means no limit.");
1675
1676#define BYTES_SPLIT_METHODDEF \
1677 {"split", (PyCFunction)bytes_split, METH_VARARGS|METH_KEYWORDS, bytes_split__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001678
1679static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001680bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001681
1682static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001683bytes_split(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Neal Norwitz6968b052007-02-27 19:02:19 +00001684{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001685 PyObject *return_value = NULL;
1686 static char *_keywords[] = {"sep", "maxsplit", NULL};
1687 PyObject *sep = Py_None;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001688 Py_ssize_t maxsplit = -1;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001689
1690 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
1691 "|On:split", _keywords,
1692 &sep, &maxsplit))
1693 goto exit;
1694 return_value = bytes_split_impl(self, sep, maxsplit);
1695
1696exit:
1697 return return_value;
1698}
1699
1700static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001701bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
1702/*[clinic end generated code: output=c80a47afdd505975 input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001703{
1704 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001705 const char *s = PyBytes_AS_STRING(self), *sub;
1706 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001707 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001708
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001709 if (maxsplit < 0)
1710 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001711 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001712 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001713 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001714 return NULL;
1715 sub = vsub.buf;
1716 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001717
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001718 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1719 PyBuffer_Release(&vsub);
1720 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001721}
1722
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001723/*[clinic input]
1724bytes.partition
1725
1726 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001727 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001728 /
1729
1730Partition the bytes into three parts using the given separator.
1731
1732This will search for the separator sep in the bytes. If the separator is found,
1733returns a 3-tuple containing the part before the separator, the separator
1734itself, and the part after it.
1735
1736If the separator is not found, returns a 3-tuple containing the original bytes
1737object and two empty bytes objects.
1738[clinic start generated code]*/
1739
1740PyDoc_STRVAR(bytes_partition__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001741"partition($self, sep, /)\n"
1742"--\n"
1743"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001744"Partition the bytes into three parts using the given separator.\n"
1745"\n"
1746"This will search for the separator sep in the bytes. If the separator is found,\n"
1747"returns a 3-tuple containing the part before the separator, the separator\n"
1748"itself, and the part after it.\n"
1749"\n"
1750"If the separator is not found, returns a 3-tuple containing the original bytes\n"
1751"object and two empty bytes objects.");
1752
1753#define BYTES_PARTITION_METHODDEF \
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001754 {"partition", (PyCFunction)bytes_partition, METH_VARARGS, bytes_partition__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001755
1756static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001757bytes_partition_impl(PyBytesObject *self, Py_buffer *sep);
1758
1759static PyObject *
1760bytes_partition(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001761{
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001762 PyObject *return_value = NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001763 Py_buffer sep = {NULL, NULL};
Neal Norwitz6968b052007-02-27 19:02:19 +00001764
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001765 if (!PyArg_ParseTuple(args,
1766 "y*:partition",
1767 &sep))
1768 goto exit;
1769 return_value = bytes_partition_impl(self, &sep);
Neal Norwitz6968b052007-02-27 19:02:19 +00001770
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001771exit:
1772 /* Cleanup for sep */
1773 if (sep.obj)
1774 PyBuffer_Release(&sep);
1775
1776 return return_value;
Neal Norwitz6968b052007-02-27 19:02:19 +00001777}
1778
Neal Norwitz6968b052007-02-27 19:02:19 +00001779static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001780bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1781/*[clinic end generated code: output=3006727cfbf83aa4 input=bc855dc63ca949de]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001782{
Neal Norwitz6968b052007-02-27 19:02:19 +00001783 return stringlib_partition(
1784 (PyObject*) self,
1785 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001786 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001787 );
1788}
1789
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001790/*[clinic input]
1791bytes.rpartition
1792
1793 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001794 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001795 /
1796
1797Partition the bytes into three parts using the given separator.
1798
1799This will search for the separator sep in the bytes, starting and the end. If
1800the separator is found, returns a 3-tuple containing the part before the
1801separator, the separator itself, and the part after it.
1802
1803If the separator is not found, returns a 3-tuple containing two empty bytes
1804objects and the original bytes object.
1805[clinic start generated code]*/
1806
1807PyDoc_STRVAR(bytes_rpartition__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001808"rpartition($self, sep, /)\n"
1809"--\n"
1810"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001811"Partition the bytes into three parts using the given separator.\n"
1812"\n"
1813"This will search for the separator sep in the bytes, starting and the end. If\n"
1814"the separator is found, returns a 3-tuple containing the part before the\n"
1815"separator, the separator itself, and the part after it.\n"
1816"\n"
1817"If the separator is not found, returns a 3-tuple containing two empty bytes\n"
1818"objects and the original bytes object.");
1819
1820#define BYTES_RPARTITION_METHODDEF \
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001821 {"rpartition", (PyCFunction)bytes_rpartition, METH_VARARGS, bytes_rpartition__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00001822
1823static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001824bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep);
1825
1826static PyObject *
1827bytes_rpartition(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001828{
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001829 PyObject *return_value = NULL;
1830 Py_buffer sep = {NULL, NULL};
Neal Norwitz6968b052007-02-27 19:02:19 +00001831
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001832 if (!PyArg_ParseTuple(args,
1833 "y*:rpartition",
1834 &sep))
1835 goto exit;
1836 return_value = bytes_rpartition_impl(self, &sep);
Neal Norwitz6968b052007-02-27 19:02:19 +00001837
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001838exit:
1839 /* Cleanup for sep */
1840 if (sep.obj)
1841 PyBuffer_Release(&sep);
1842
1843 return return_value;
1844}
1845
1846static PyObject *
1847bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1848/*[clinic end generated code: output=57b169dc47fa90e8 input=6588fff262a9170e]*/
1849{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001850 return stringlib_rpartition(
1851 (PyObject*) self,
1852 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001853 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001854 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001855}
1856
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001857/*[clinic input]
1858bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001859
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001860Return a list of the sections in the bytes, using sep as the delimiter.
1861
1862Splitting is done starting at the end of the bytes and working to the front.
1863[clinic start generated code]*/
1864
1865PyDoc_STRVAR(bytes_rsplit__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001866"rsplit($self, /, sep=None, maxsplit=-1)\n"
1867"--\n"
1868"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001869"Return a list of the sections in the bytes, using sep as the delimiter.\n"
1870"\n"
1871" sep\n"
1872" The delimiter according which to split the bytes.\n"
1873" None (the default value) means split on ASCII whitespace characters\n"
1874" (space, tab, return, newline, formfeed, vertical tab).\n"
1875" maxsplit\n"
1876" Maximum number of splits to do.\n"
1877" -1 (the default value) means no limit.\n"
1878"\n"
1879"Splitting is done starting at the end of the bytes and working to the front.");
1880
1881#define BYTES_RSPLIT_METHODDEF \
1882 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS|METH_KEYWORDS, bytes_rsplit__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001883
Neal Norwitz6968b052007-02-27 19:02:19 +00001884static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001885bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001886
1887static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001888bytes_rsplit(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Neal Norwitz6968b052007-02-27 19:02:19 +00001889{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001890 PyObject *return_value = NULL;
1891 static char *_keywords[] = {"sep", "maxsplit", NULL};
1892 PyObject *sep = Py_None;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001893 Py_ssize_t maxsplit = -1;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001894
1895 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
1896 "|On:rsplit", _keywords,
1897 &sep, &maxsplit))
1898 goto exit;
1899 return_value = bytes_rsplit_impl(self, sep, maxsplit);
1900
1901exit:
1902 return return_value;
1903}
1904
1905static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001906bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
1907/*[clinic end generated code: output=f86feddedbd7b26d input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001908{
1909 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001910 const char *s = PyBytes_AS_STRING(self), *sub;
1911 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001912 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001913
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001914 if (maxsplit < 0)
1915 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001916 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001917 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001918 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001919 return NULL;
1920 sub = vsub.buf;
1921 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001922
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001923 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1924 PyBuffer_Release(&vsub);
1925 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001926}
1927
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001928
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001929/*[clinic input]
1930bytes.join
1931
1932 iterable_of_bytes: object
1933 /
1934
1935Concatenate any number of bytes objects.
1936
1937The bytes whose method is called is inserted in between each pair.
1938
1939The result is returned as a new bytes object.
1940
1941Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1942[clinic start generated code]*/
1943
1944PyDoc_STRVAR(bytes_join__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02001945"join($self, iterable_of_bytes, /)\n"
1946"--\n"
1947"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001948"Concatenate any number of bytes objects.\n"
1949"\n"
1950"The bytes whose method is called is inserted in between each pair.\n"
1951"\n"
1952"The result is returned as a new bytes object.\n"
1953"\n"
1954"Example: b\'.\'.join([b\'ab\', b\'pq\', b\'rs\']) -> b\'ab.pq.rs\'.");
1955
1956#define BYTES_JOIN_METHODDEF \
1957 {"join", (PyCFunction)bytes_join, METH_O, bytes_join__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001958
Neal Norwitz6968b052007-02-27 19:02:19 +00001959static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001960bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
1961/*[clinic end generated code: output=e541a14a8da97908 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001962{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001963 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001964}
1965
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001966PyObject *
1967_PyBytes_Join(PyObject *sep, PyObject *x)
1968{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001969 assert(sep != NULL && PyBytes_Check(sep));
1970 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001971 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001972}
1973
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001974/* helper macro to fixup start/end slice values */
1975#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001976 if (end > len) \
1977 end = len; \
1978 else if (end < 0) { \
1979 end += len; \
1980 if (end < 0) \
1981 end = 0; \
1982 } \
1983 if (start < 0) { \
1984 start += len; \
1985 if (start < 0) \
1986 start = 0; \
1987 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001988
1989Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001990bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001991{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001992 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001993 char byte;
1994 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001995 const char *sub;
1996 Py_ssize_t sub_len;
1997 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001998 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001999
Antoine Pitrouac65d962011-10-20 23:54:17 +02002000 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
2001 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002002 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002003
Antoine Pitrouac65d962011-10-20 23:54:17 +02002004 if (subobj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002005 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02002006 return -2;
2007
2008 sub = subbuf.buf;
2009 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002010 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02002011 else {
2012 sub = &byte;
2013 sub_len = 1;
2014 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002015
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002016 if (dir > 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02002017 res = stringlib_find_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002018 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2019 sub, sub_len, start, end);
2020 else
Antoine Pitrouac65d962011-10-20 23:54:17 +02002021 res = stringlib_rfind_slice(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002022 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2023 sub, sub_len, start, end);
Antoine Pitrouac65d962011-10-20 23:54:17 +02002024
2025 if (subobj)
2026 PyBuffer_Release(&subbuf);
2027
2028 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002029}
2030
2031
2032PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002033"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002034\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002035Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08002036such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002037arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002038\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002039Return -1 on failure.");
2040
Neal Norwitz6968b052007-02-27 19:02:19 +00002041static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002042bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00002043{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002044 Py_ssize_t result = bytes_find_internal(self, args, +1);
2045 if (result == -2)
2046 return NULL;
2047 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00002048}
2049
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002050
2051PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002052"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00002053\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002054Like B.find() but raise ValueError when the substring is not found.");
2055
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00002056static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002057bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00002058{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002059 Py_ssize_t result = bytes_find_internal(self, args, +1);
2060 if (result == -2)
2061 return NULL;
2062 if (result == -1) {
2063 PyErr_SetString(PyExc_ValueError,
2064 "substring not found");
2065 return NULL;
2066 }
2067 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00002068}
2069
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002070
2071PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002072"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002073\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002074Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08002075such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002076arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002077\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002078Return -1 on failure.");
2079
Neal Norwitz6968b052007-02-27 19:02:19 +00002080static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002081bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00002082{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002083 Py_ssize_t result = bytes_find_internal(self, args, -1);
2084 if (result == -2)
2085 return NULL;
2086 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00002087}
2088
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002089
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002090PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002091"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002092\n\
2093Like B.rfind() but raise ValueError when the substring is not found.");
2094
2095static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002096bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002097{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002098 Py_ssize_t result = bytes_find_internal(self, args, -1);
2099 if (result == -2)
2100 return NULL;
2101 if (result == -1) {
2102 PyErr_SetString(PyExc_ValueError,
2103 "substring not found");
2104 return NULL;
2105 }
2106 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002107}
2108
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002109
2110Py_LOCAL_INLINE(PyObject *)
2111do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002112{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002113 Py_buffer vsep;
2114 char *s = PyBytes_AS_STRING(self);
2115 Py_ssize_t len = PyBytes_GET_SIZE(self);
2116 char *sep;
2117 Py_ssize_t seplen;
2118 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002119
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002120 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002121 return NULL;
2122 sep = vsep.buf;
2123 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002124
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002125 i = 0;
2126 if (striptype != RIGHTSTRIP) {
2127 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2128 i++;
2129 }
2130 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002131
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002132 j = len;
2133 if (striptype != LEFTSTRIP) {
2134 do {
2135 j--;
2136 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2137 j++;
2138 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002139
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002140 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002141
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002142 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2143 Py_INCREF(self);
2144 return (PyObject*)self;
2145 }
2146 else
2147 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002148}
2149
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002150
2151Py_LOCAL_INLINE(PyObject *)
2152do_strip(PyBytesObject *self, int striptype)
2153{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002154 char *s = PyBytes_AS_STRING(self);
2155 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002156
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002157 i = 0;
2158 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00002159 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002160 i++;
2161 }
2162 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002163
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002164 j = len;
2165 if (striptype != LEFTSTRIP) {
2166 do {
2167 j--;
David Malcolm96960882010-11-05 17:23:41 +00002168 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002169 j++;
2170 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002171
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002172 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2173 Py_INCREF(self);
2174 return (PyObject*)self;
2175 }
2176 else
2177 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002178}
2179
2180
2181Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002182do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002183{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002184 if (bytes != NULL && bytes != Py_None) {
2185 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002186 }
2187 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002188}
2189
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002190/*[clinic input]
2191bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002192
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002193 self: self(type="PyBytesObject *")
2194 bytes: object = None
2195 /
2196
2197Strip leading and trailing bytes contained in the argument.
2198
2199If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2200[clinic start generated code]*/
2201
2202PyDoc_STRVAR(bytes_strip__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002203"strip($self, bytes=None, /)\n"
2204"--\n"
2205"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002206"Strip leading and trailing bytes contained in the argument.\n"
2207"\n"
2208"If the argument is omitted or None, strip leading and trailing ASCII whitespace.");
2209
2210#define BYTES_STRIP_METHODDEF \
2211 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, bytes_strip__doc__},
2212
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002213static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002214bytes_strip_impl(PyBytesObject *self, PyObject *bytes);
2215
2216static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002217bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002218{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002219 PyObject *return_value = NULL;
2220 PyObject *bytes = Py_None;
2221
2222 if (!PyArg_UnpackTuple(args, "strip",
2223 0, 1,
2224 &bytes))
2225 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02002226 return_value = bytes_strip_impl(self, bytes);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002227
2228exit:
2229 return return_value;
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002230}
2231
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002232static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002233bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Martin v. Löwis0efea322014-07-27 17:29:17 +02002234/*[clinic end generated code: output=c8234a599ba5ec35 input=37daa5fad1395d95]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002235{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002236 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002237}
2238
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002239/*[clinic input]
2240bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002241
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002242 self: self(type="PyBytesObject *")
2243 bytes: object = None
2244 /
2245
2246Strip leading bytes contained in the argument.
2247
2248If the argument is omitted or None, strip leading ASCII whitespace.
2249[clinic start generated code]*/
2250
2251PyDoc_STRVAR(bytes_lstrip__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002252"lstrip($self, bytes=None, /)\n"
2253"--\n"
2254"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002255"Strip leading bytes contained in the argument.\n"
2256"\n"
2257"If the argument is omitted or None, strip leading ASCII whitespace.");
2258
2259#define BYTES_LSTRIP_METHODDEF \
2260 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, bytes_lstrip__doc__},
2261
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002262static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002263bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes);
2264
2265static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002266bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002267{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002268 PyObject *return_value = NULL;
2269 PyObject *bytes = Py_None;
2270
2271 if (!PyArg_UnpackTuple(args, "lstrip",
2272 0, 1,
2273 &bytes))
2274 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02002275 return_value = bytes_lstrip_impl(self, bytes);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002276
2277exit:
2278 return return_value;
2279}
2280
2281static PyObject *
2282bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Martin v. Löwis0efea322014-07-27 17:29:17 +02002283/*[clinic end generated code: output=529e8511ab6f1115 input=88811b09dfbc2988]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002284{
2285 return do_argstrip(self, LEFTSTRIP, bytes);
2286}
2287
2288/*[clinic input]
2289bytes.rstrip
2290
2291 self: self(type="PyBytesObject *")
2292 bytes: object = None
2293 /
2294
2295Strip trailing bytes contained in the argument.
2296
2297If the argument is omitted or None, strip trailing ASCII whitespace.
2298[clinic start generated code]*/
2299
2300PyDoc_STRVAR(bytes_rstrip__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002301"rstrip($self, bytes=None, /)\n"
2302"--\n"
2303"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002304"Strip trailing bytes contained in the argument.\n"
2305"\n"
2306"If the argument is omitted or None, strip trailing ASCII whitespace.");
2307
2308#define BYTES_RSTRIP_METHODDEF \
2309 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, bytes_rstrip__doc__},
2310
2311static PyObject *
2312bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes);
2313
2314static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002315bytes_rstrip(PyBytesObject *self, PyObject *args)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002316{
2317 PyObject *return_value = NULL;
2318 PyObject *bytes = Py_None;
2319
2320 if (!PyArg_UnpackTuple(args, "rstrip",
2321 0, 1,
2322 &bytes))
2323 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02002324 return_value = bytes_rstrip_impl(self, bytes);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002325
2326exit:
2327 return return_value;
2328}
2329
2330static PyObject *
2331bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Martin v. Löwis0efea322014-07-27 17:29:17 +02002332/*[clinic end generated code: output=e98730bd133e6593 input=8f93c9cd361f0140]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002333{
2334 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002335}
Neal Norwitz6968b052007-02-27 19:02:19 +00002336
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002337
2338PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002339"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002340\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002341Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002342string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002343as in slice notation.");
2344
2345static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002346bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002347{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002348 PyObject *sub_obj;
2349 const char *str = PyBytes_AS_STRING(self), *sub;
2350 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02002351 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002352 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002353
Antoine Pitrouac65d962011-10-20 23:54:17 +02002354 Py_buffer vsub;
2355 PyObject *count_obj;
2356
2357 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
2358 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002359 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002360
Antoine Pitrouac65d962011-10-20 23:54:17 +02002361 if (sub_obj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002362 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02002363 return NULL;
2364
2365 sub = vsub.buf;
2366 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002367 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02002368 else {
2369 sub = &byte;
2370 sub_len = 1;
2371 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002372
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002373 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002374
Antoine Pitrouac65d962011-10-20 23:54:17 +02002375 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002376 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2377 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02002378
2379 if (sub_obj)
2380 PyBuffer_Release(&vsub);
2381
2382 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002383}
2384
2385
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002386/*[clinic input]
2387bytes.translate
2388
2389 self: self(type="PyBytesObject *")
Victor Stinner049e5092014-08-17 22:20:00 +02002390 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002391 Translation table, which must be a bytes object of length 256.
2392 [
2393 deletechars: object
2394 ]
2395 /
2396
2397Return a copy with each character mapped by the given translation table.
2398
2399All characters occurring in the optional argument deletechars are removed.
2400The remaining characters are mapped through the given translation table.
2401[clinic start generated code]*/
2402
2403PyDoc_STRVAR(bytes_translate__doc__,
2404"translate(table, [deletechars])\n"
2405"Return a copy with each character mapped by the given translation table.\n"
2406"\n"
2407" table\n"
2408" Translation table, which must be a bytes object of length 256.\n"
2409"\n"
2410"All characters occurring in the optional argument deletechars are removed.\n"
2411"The remaining characters are mapped through the given translation table.");
2412
2413#define BYTES_TRANSLATE_METHODDEF \
2414 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, bytes_translate__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002415
2416static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002417bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1, PyObject *deletechars);
2418
2419static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02002420bytes_translate(PyBytesObject *self, PyObject *args)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002421{
2422 PyObject *return_value = NULL;
2423 PyObject *table;
2424 int group_right_1 = 0;
2425 PyObject *deletechars = NULL;
2426
2427 switch (PyTuple_GET_SIZE(args)) {
2428 case 1:
2429 if (!PyArg_ParseTuple(args, "O:translate", &table))
Martin v. Löwis0efea322014-07-27 17:29:17 +02002430 goto exit;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002431 break;
2432 case 2:
2433 if (!PyArg_ParseTuple(args, "OO:translate", &table, &deletechars))
Martin v. Löwis0efea322014-07-27 17:29:17 +02002434 goto exit;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002435 group_right_1 = 1;
2436 break;
2437 default:
2438 PyErr_SetString(PyExc_TypeError, "bytes.translate requires 1 to 2 arguments");
Martin v. Löwis0efea322014-07-27 17:29:17 +02002439 goto exit;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002440 }
Martin v. Löwis0efea322014-07-27 17:29:17 +02002441 return_value = bytes_translate_impl(self, table, group_right_1, deletechars);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002442
Martin v. Löwis0efea322014-07-27 17:29:17 +02002443exit:
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002444 return return_value;
2445}
2446
2447static PyObject *
2448bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1, PyObject *deletechars)
Larry Hastingsdfbeb162014-10-13 10:39:41 +01002449/*[clinic end generated code: output=f0f29a57f41df5d8 input=d8fa5519d7cc4be7]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002450{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002451 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002452 Py_buffer table_view = {NULL, NULL};
2453 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002454 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002455 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002456 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002457 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002458 Py_ssize_t inlen, tablen, dellen = 0;
2459 PyObject *result;
2460 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002461
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002462 if (PyBytes_Check(table)) {
2463 table_chars = PyBytes_AS_STRING(table);
2464 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002465 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002466 else if (table == Py_None) {
2467 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002468 tablen = 256;
2469 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002470 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002471 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002472 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002473 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002474 tablen = table_view.len;
2475 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002476
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002477 if (tablen != 256) {
2478 PyErr_SetString(PyExc_ValueError,
2479 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002480 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002481 return NULL;
2482 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002483
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002484 if (deletechars != NULL) {
2485 if (PyBytes_Check(deletechars)) {
2486 del_table_chars = PyBytes_AS_STRING(deletechars);
2487 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002488 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002489 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002490 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002491 PyBuffer_Release(&table_view);
2492 return NULL;
2493 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002494 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002495 dellen = del_table_view.len;
2496 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002497 }
2498 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002499 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002500 dellen = 0;
2501 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002502
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002503 inlen = PyBytes_GET_SIZE(input_obj);
2504 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002505 if (result == NULL) {
2506 PyBuffer_Release(&del_table_view);
2507 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002508 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002509 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002510 output_start = output = PyBytes_AsString(result);
2511 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002512
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002513 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002514 /* If no deletions are required, use faster code */
2515 for (i = inlen; --i >= 0; ) {
2516 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002517 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002518 changed = 1;
2519 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002520 if (!changed && PyBytes_CheckExact(input_obj)) {
2521 Py_INCREF(input_obj);
2522 Py_DECREF(result);
2523 result = input_obj;
2524 }
2525 PyBuffer_Release(&del_table_view);
2526 PyBuffer_Release(&table_view);
2527 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002528 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002529
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002530 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002531 for (i = 0; i < 256; i++)
2532 trans_table[i] = Py_CHARMASK(i);
2533 } else {
2534 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002535 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002536 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002537 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002538
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002539 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002540 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002541 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002542
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002543 for (i = inlen; --i >= 0; ) {
2544 c = Py_CHARMASK(*input++);
2545 if (trans_table[c] != -1)
2546 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2547 continue;
2548 changed = 1;
2549 }
2550 if (!changed && PyBytes_CheckExact(input_obj)) {
2551 Py_DECREF(result);
2552 Py_INCREF(input_obj);
2553 return input_obj;
2554 }
2555 /* Fix the size of the resulting string */
2556 if (inlen > 0)
2557 _PyBytes_Resize(&result, output - output_start);
2558 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002559}
2560
2561
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002562/*[clinic input]
2563
2564@staticmethod
2565bytes.maketrans
2566
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002567 frm: Py_buffer
2568 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002569 /
2570
2571Return a translation table useable for the bytes or bytearray translate method.
2572
2573The returned table will be one where each byte in frm is mapped to the byte at
2574the same position in to.
2575
2576The bytes objects frm and to must be of the same length.
2577[clinic start generated code]*/
2578
2579PyDoc_STRVAR(bytes_maketrans__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02002580"maketrans(frm, to, /)\n"
2581"--\n"
2582"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002583"Return a translation table useable for the bytes or bytearray translate method.\n"
2584"\n"
2585"The returned table will be one where each byte in frm is mapped to the byte at\n"
2586"the same position in to.\n"
2587"\n"
2588"The bytes objects frm and to must be of the same length.");
2589
2590#define BYTES_MAKETRANS_METHODDEF \
2591 {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC, bytes_maketrans__doc__},
2592
Georg Brandlabc38772009-04-12 15:51:51 +00002593static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002594bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002595
2596static PyObject *
2597bytes_maketrans(void *null, PyObject *args)
Georg Brandlabc38772009-04-12 15:51:51 +00002598{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002599 PyObject *return_value = NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002600 Py_buffer frm = {NULL, NULL};
2601 Py_buffer to = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002602
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002603 if (!PyArg_ParseTuple(args,
2604 "y*y*:maketrans",
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002605 &frm, &to))
2606 goto exit;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002607 return_value = bytes_maketrans_impl(&frm, &to);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002608
2609exit:
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002610 /* Cleanup for frm */
2611 if (frm.obj)
2612 PyBuffer_Release(&frm);
2613 /* Cleanup for to */
2614 if (to.obj)
2615 PyBuffer_Release(&to);
2616
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002617 return return_value;
2618}
2619
2620static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002621bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2622/*[clinic end generated code: output=7df47390c476ac60 input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002623{
2624 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002625}
2626
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002627/* find and count characters and substrings */
2628
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002629#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002630 ((char *)memchr((const void *)(target), c, target_len))
2631
2632/* String ops must return a string. */
2633/* If the object is subclass of string, create a copy */
2634Py_LOCAL(PyBytesObject *)
2635return_self(PyBytesObject *self)
2636{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002637 if (PyBytes_CheckExact(self)) {
2638 Py_INCREF(self);
2639 return self;
2640 }
2641 return (PyBytesObject *)PyBytes_FromStringAndSize(
2642 PyBytes_AS_STRING(self),
2643 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002644}
2645
2646Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00002647countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002648{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002649 Py_ssize_t count=0;
2650 const char *start=target;
2651 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002652
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002653 while ( (start=findchar(start, end-start, c)) != NULL ) {
2654 count++;
2655 if (count >= maxcount)
2656 break;
2657 start += 1;
2658 }
2659 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002660}
2661
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002662
2663/* Algorithms for different cases of string replacement */
2664
2665/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2666Py_LOCAL(PyBytesObject *)
2667replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002668 const char *to_s, Py_ssize_t to_len,
2669 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002670{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002671 char *self_s, *result_s;
2672 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002673 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002674 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002675
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002676 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002677
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002678 /* 1 at the end plus 1 after every character;
2679 count = min(maxcount, self_len + 1) */
2680 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002681 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002682 else
2683 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2684 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002685
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002686 /* Check for overflow */
2687 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002688 assert(count > 0);
2689 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002690 PyErr_SetString(PyExc_OverflowError,
2691 "replacement bytes are too long");
2692 return NULL;
2693 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002694 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002695
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002696 if (! (result = (PyBytesObject *)
2697 PyBytes_FromStringAndSize(NULL, result_len)) )
2698 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002699
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002700 self_s = PyBytes_AS_STRING(self);
2701 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002702
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002703 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002704
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002705 /* Lay the first one down (guaranteed this will occur) */
2706 Py_MEMCPY(result_s, to_s, to_len);
2707 result_s += to_len;
2708 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002709
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002710 for (i=0; i<count; i++) {
2711 *result_s++ = *self_s++;
2712 Py_MEMCPY(result_s, to_s, to_len);
2713 result_s += to_len;
2714 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002715
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002716 /* Copy the rest of the original string */
2717 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002718
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002719 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002720}
2721
2722/* Special case for deleting a single character */
2723/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2724Py_LOCAL(PyBytesObject *)
2725replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002726 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002727{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002728 char *self_s, *result_s;
2729 char *start, *next, *end;
2730 Py_ssize_t self_len, result_len;
2731 Py_ssize_t count;
2732 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002733
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002734 self_len = PyBytes_GET_SIZE(self);
2735 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002736
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002737 count = countchar(self_s, self_len, from_c, maxcount);
2738 if (count == 0) {
2739 return return_self(self);
2740 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002741
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002742 result_len = self_len - count; /* from_len == 1 */
2743 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002744
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002745 if ( (result = (PyBytesObject *)
2746 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2747 return NULL;
2748 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002749
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002750 start = self_s;
2751 end = self_s + self_len;
2752 while (count-- > 0) {
2753 next = findchar(start, end-start, from_c);
2754 if (next == NULL)
2755 break;
2756 Py_MEMCPY(result_s, start, next-start);
2757 result_s += (next-start);
2758 start = next+1;
2759 }
2760 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002761
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002762 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002763}
2764
2765/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2766
2767Py_LOCAL(PyBytesObject *)
2768replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002769 const char *from_s, Py_ssize_t from_len,
2770 Py_ssize_t maxcount) {
2771 char *self_s, *result_s;
2772 char *start, *next, *end;
2773 Py_ssize_t self_len, result_len;
2774 Py_ssize_t count, offset;
2775 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002776
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002777 self_len = PyBytes_GET_SIZE(self);
2778 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002779
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002780 count = stringlib_count(self_s, self_len,
2781 from_s, from_len,
2782 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002783
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002784 if (count == 0) {
2785 /* no matches */
2786 return return_self(self);
2787 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002788
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002789 result_len = self_len - (count * from_len);
2790 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002791
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002792 if ( (result = (PyBytesObject *)
2793 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2794 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002795
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002796 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002797
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002798 start = self_s;
2799 end = self_s + self_len;
2800 while (count-- > 0) {
2801 offset = stringlib_find(start, end-start,
2802 from_s, from_len,
2803 0);
2804 if (offset == -1)
2805 break;
2806 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002807
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002808 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002809
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002810 result_s += (next-start);
2811 start = next+from_len;
2812 }
2813 Py_MEMCPY(result_s, start, end-start);
2814 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002815}
2816
2817/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2818Py_LOCAL(PyBytesObject *)
2819replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002820 char from_c, char to_c,
2821 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002822{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002823 char *self_s, *result_s, *start, *end, *next;
2824 Py_ssize_t self_len;
2825 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002826
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002827 /* The result string will be the same size */
2828 self_s = PyBytes_AS_STRING(self);
2829 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002830
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002831 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002832
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002833 if (next == NULL) {
2834 /* No matches; return the original string */
2835 return return_self(self);
2836 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002837
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002838 /* Need to make a new string */
2839 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2840 if (result == NULL)
2841 return NULL;
2842 result_s = PyBytes_AS_STRING(result);
2843 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002844
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002845 /* change everything in-place, starting with this one */
2846 start = result_s + (next-self_s);
2847 *start = to_c;
2848 start++;
2849 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002850
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002851 while (--maxcount > 0) {
2852 next = findchar(start, end-start, from_c);
2853 if (next == NULL)
2854 break;
2855 *next = to_c;
2856 start = next+1;
2857 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002858
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002859 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002860}
2861
2862/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2863Py_LOCAL(PyBytesObject *)
2864replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002865 const char *from_s, Py_ssize_t from_len,
2866 const char *to_s, Py_ssize_t to_len,
2867 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002868{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002869 char *result_s, *start, *end;
2870 char *self_s;
2871 Py_ssize_t self_len, offset;
2872 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002873
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002874 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002875
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002876 self_s = PyBytes_AS_STRING(self);
2877 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002878
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002879 offset = stringlib_find(self_s, self_len,
2880 from_s, from_len,
2881 0);
2882 if (offset == -1) {
2883 /* No matches; return the original string */
2884 return return_self(self);
2885 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002886
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002887 /* Need to make a new string */
2888 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2889 if (result == NULL)
2890 return NULL;
2891 result_s = PyBytes_AS_STRING(result);
2892 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002893
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002894 /* change everything in-place, starting with this one */
2895 start = result_s + offset;
2896 Py_MEMCPY(start, to_s, from_len);
2897 start += from_len;
2898 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002899
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002900 while ( --maxcount > 0) {
2901 offset = stringlib_find(start, end-start,
2902 from_s, from_len,
2903 0);
2904 if (offset==-1)
2905 break;
2906 Py_MEMCPY(start+offset, to_s, from_len);
2907 start += offset+from_len;
2908 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002909
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002910 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002911}
2912
2913/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2914Py_LOCAL(PyBytesObject *)
2915replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002916 char from_c,
2917 const char *to_s, Py_ssize_t to_len,
2918 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002919{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002920 char *self_s, *result_s;
2921 char *start, *next, *end;
2922 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002923 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002924 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002925
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002926 self_s = PyBytes_AS_STRING(self);
2927 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002928
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002929 count = countchar(self_s, self_len, from_c, maxcount);
2930 if (count == 0) {
2931 /* no matches, return unchanged */
2932 return return_self(self);
2933 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002934
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002935 /* use the difference between current and new, hence the "-1" */
2936 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002937 assert(count > 0);
2938 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002939 PyErr_SetString(PyExc_OverflowError,
2940 "replacement bytes are too long");
2941 return NULL;
2942 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002943 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002944
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002945 if ( (result = (PyBytesObject *)
2946 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2947 return NULL;
2948 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002949
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002950 start = self_s;
2951 end = self_s + self_len;
2952 while (count-- > 0) {
2953 next = findchar(start, end-start, from_c);
2954 if (next == NULL)
2955 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002956
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002957 if (next == start) {
2958 /* replace with the 'to' */
2959 Py_MEMCPY(result_s, to_s, to_len);
2960 result_s += to_len;
2961 start += 1;
2962 } else {
2963 /* copy the unchanged old then the 'to' */
2964 Py_MEMCPY(result_s, start, next-start);
2965 result_s += (next-start);
2966 Py_MEMCPY(result_s, to_s, to_len);
2967 result_s += to_len;
2968 start = next+1;
2969 }
2970 }
2971 /* Copy the remainder of the remaining string */
2972 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002973
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002974 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002975}
2976
2977/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2978Py_LOCAL(PyBytesObject *)
2979replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002980 const char *from_s, Py_ssize_t from_len,
2981 const char *to_s, Py_ssize_t to_len,
2982 Py_ssize_t maxcount) {
2983 char *self_s, *result_s;
2984 char *start, *next, *end;
2985 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002986 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002987 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002988
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002989 self_s = PyBytes_AS_STRING(self);
2990 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002991
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002992 count = stringlib_count(self_s, self_len,
2993 from_s, from_len,
2994 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002995
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002996 if (count == 0) {
2997 /* no matches, return unchanged */
2998 return return_self(self);
2999 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003000
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003001 /* Check for overflow */
3002 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00003003 assert(count > 0);
3004 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003005 PyErr_SetString(PyExc_OverflowError,
3006 "replacement bytes are too long");
3007 return NULL;
3008 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00003009 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003010
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003011 if ( (result = (PyBytesObject *)
3012 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
3013 return NULL;
3014 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003015
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003016 start = self_s;
3017 end = self_s + self_len;
3018 while (count-- > 0) {
3019 offset = stringlib_find(start, end-start,
3020 from_s, from_len,
3021 0);
3022 if (offset == -1)
3023 break;
3024 next = start+offset;
3025 if (next == start) {
3026 /* replace with the 'to' */
3027 Py_MEMCPY(result_s, to_s, to_len);
3028 result_s += to_len;
3029 start += from_len;
3030 } else {
3031 /* copy the unchanged old then the 'to' */
3032 Py_MEMCPY(result_s, start, next-start);
3033 result_s += (next-start);
3034 Py_MEMCPY(result_s, to_s, to_len);
3035 result_s += to_len;
3036 start = next+from_len;
3037 }
3038 }
3039 /* Copy the remainder of the remaining string */
3040 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003041
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003042 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003043}
3044
3045
3046Py_LOCAL(PyBytesObject *)
3047replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003048 const char *from_s, Py_ssize_t from_len,
3049 const char *to_s, Py_ssize_t to_len,
3050 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003051{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003052 if (maxcount < 0) {
3053 maxcount = PY_SSIZE_T_MAX;
3054 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
3055 /* nothing to do; return the original string */
3056 return return_self(self);
3057 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003058
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003059 if (maxcount == 0 ||
3060 (from_len == 0 && to_len == 0)) {
3061 /* nothing to do; return the original string */
3062 return return_self(self);
3063 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003065 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003066
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003067 if (from_len == 0) {
3068 /* insert the 'to' string everywhere. */
3069 /* >>> "Python".replace("", ".") */
3070 /* '.P.y.t.h.o.n.' */
3071 return replace_interleave(self, to_s, to_len, maxcount);
3072 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003073
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003074 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3075 /* point for an empty self string to generate a non-empty string */
3076 /* Special case so the remaining code always gets a non-empty string */
3077 if (PyBytes_GET_SIZE(self) == 0) {
3078 return return_self(self);
3079 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003080
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003081 if (to_len == 0) {
3082 /* delete all occurrences of 'from' string */
3083 if (from_len == 1) {
3084 return replace_delete_single_character(
3085 self, from_s[0], maxcount);
3086 } else {
3087 return replace_delete_substring(self, from_s,
3088 from_len, maxcount);
3089 }
3090 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003092 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003093
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003094 if (from_len == to_len) {
3095 if (from_len == 1) {
3096 return replace_single_character_in_place(
3097 self,
3098 from_s[0],
3099 to_s[0],
3100 maxcount);
3101 } else {
3102 return replace_substring_in_place(
3103 self, from_s, from_len, to_s, to_len,
3104 maxcount);
3105 }
3106 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003107
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003108 /* Otherwise use the more generic algorithms */
3109 if (from_len == 1) {
3110 return replace_single_character(self, from_s[0],
3111 to_s, to_len, maxcount);
3112 } else {
3113 /* len('from')>=2, len('to')>=1 */
3114 return replace_substring(self, from_s, from_len, to_s, to_len,
3115 maxcount);
3116 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003117}
3118
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003119
3120/*[clinic input]
3121bytes.replace
3122
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003123 old: Py_buffer
3124 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003125 count: Py_ssize_t = -1
3126 Maximum number of occurrences to replace.
3127 -1 (the default value) means replace all occurrences.
3128 /
3129
3130Return a copy with all occurrences of substring old replaced by new.
3131
3132If the optional argument count is given, only the first count occurrences are
3133replaced.
3134[clinic start generated code]*/
3135
3136PyDoc_STRVAR(bytes_replace__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02003137"replace($self, old, new, count=-1, /)\n"
3138"--\n"
3139"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003140"Return a copy with all occurrences of substring old replaced by new.\n"
3141"\n"
3142" count\n"
3143" Maximum number of occurrences to replace.\n"
3144" -1 (the default value) means replace all occurrences.\n"
3145"\n"
3146"If the optional argument count is given, only the first count occurrences are\n"
3147"replaced.");
3148
3149#define BYTES_REPLACE_METHODDEF \
3150 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, bytes_replace__doc__},
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003151
3152static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003153bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new, Py_ssize_t count);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003154
3155static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003156bytes_replace(PyBytesObject*self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003157{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003158 PyObject *return_value = NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02003159 Py_buffer old = {NULL, NULL};
3160 Py_buffer new = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003161 Py_ssize_t count = -1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003162
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003163 if (!PyArg_ParseTuple(args,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003164 "y*y*|n:replace",
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003165 &old, &new, &count))
3166 goto exit;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003167 return_value = bytes_replace_impl(self, &old, &new, count);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003168
3169exit:
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003170 /* Cleanup for old */
3171 if (old.obj)
3172 PyBuffer_Release(&old);
3173 /* Cleanup for new */
3174 if (new.obj)
3175 PyBuffer_Release(&new);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003176
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003177 return return_value;
3178}
3179
3180static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003181bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new, Py_ssize_t count)
3182/*[clinic end generated code: output=f07bd9ecf29ee8d8 input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003183{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003184 return (PyObject *)replace((PyBytesObject *) self,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003185 (const char *)old->buf, old->len,
3186 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003187}
3188
3189/** End DALKE **/
3190
3191/* Matches the end (direction >= 0) or start (direction < 0) of self
3192 * against substr, using the start and end arguments. Returns
3193 * -1 on error, 0 if not found and 1 if found.
3194 */
3195Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003196_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003197 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003198{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003199 Py_ssize_t len = PyBytes_GET_SIZE(self);
3200 Py_ssize_t slen;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02003201 Py_buffer sub_view = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003202 const char* sub;
3203 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003204
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003205 if (PyBytes_Check(substr)) {
3206 sub = PyBytes_AS_STRING(substr);
3207 slen = PyBytes_GET_SIZE(substr);
3208 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02003209 else {
3210 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
3211 return -1;
3212 sub = sub_view.buf;
3213 slen = sub_view.len;
3214 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003215 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003216
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003217 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003218
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003219 if (direction < 0) {
3220 /* startswith */
3221 if (start+slen > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02003222 goto notfound;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003223 } else {
3224 /* endswith */
3225 if (end-start < slen || start > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02003226 goto notfound;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003227
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003228 if (end-slen > start)
3229 start = end - slen;
3230 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02003231 if (end-start < slen)
3232 goto notfound;
3233 if (memcmp(str+start, sub, slen) != 0)
3234 goto notfound;
3235
3236 PyBuffer_Release(&sub_view);
3237 return 1;
3238
3239notfound:
3240 PyBuffer_Release(&sub_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003241 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003242}
3243
3244
3245PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003246"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003247\n\
3248Return True if B starts with the specified prefix, False otherwise.\n\
3249With optional start, test B beginning at that position.\n\
3250With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00003251prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003252
3253static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003254bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003255{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003256 Py_ssize_t start = 0;
3257 Py_ssize_t end = PY_SSIZE_T_MAX;
3258 PyObject *subobj;
3259 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003260
Jesus Ceaac451502011-04-20 17:09:23 +02003261 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003262 return NULL;
3263 if (PyTuple_Check(subobj)) {
3264 Py_ssize_t i;
3265 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3266 result = _bytes_tailmatch(self,
3267 PyTuple_GET_ITEM(subobj, i),
3268 start, end, -1);
3269 if (result == -1)
3270 return NULL;
3271 else if (result) {
3272 Py_RETURN_TRUE;
3273 }
3274 }
3275 Py_RETURN_FALSE;
3276 }
3277 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03003278 if (result == -1) {
3279 if (PyErr_ExceptionMatches(PyExc_TypeError))
3280 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
3281 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003282 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03003283 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003284 else
3285 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003286}
3287
3288
3289PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003290"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003291\n\
3292Return True if B ends with the specified suffix, False otherwise.\n\
3293With optional start, test B beginning at that position.\n\
3294With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00003295suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003296
3297static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003298bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003299{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003300 Py_ssize_t start = 0;
3301 Py_ssize_t end = PY_SSIZE_T_MAX;
3302 PyObject *subobj;
3303 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003304
Jesus Ceaac451502011-04-20 17:09:23 +02003305 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003306 return NULL;
3307 if (PyTuple_Check(subobj)) {
3308 Py_ssize_t i;
3309 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3310 result = _bytes_tailmatch(self,
3311 PyTuple_GET_ITEM(subobj, i),
3312 start, end, +1);
3313 if (result == -1)
3314 return NULL;
3315 else if (result) {
3316 Py_RETURN_TRUE;
3317 }
3318 }
3319 Py_RETURN_FALSE;
3320 }
3321 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03003322 if (result == -1) {
3323 if (PyErr_ExceptionMatches(PyExc_TypeError))
3324 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
3325 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003326 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03003327 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003328 else
3329 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003330}
3331
3332
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003333/*[clinic input]
3334bytes.decode
3335
3336 encoding: str(c_default="NULL") = 'utf-8'
3337 The encoding with which to decode the bytes.
3338 errors: str(c_default="NULL") = 'strict'
3339 The error handling scheme to use for the handling of decoding errors.
3340 The default is 'strict' meaning that decoding errors raise a
3341 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
3342 as well as any other name registered with codecs.register_error that
3343 can handle UnicodeDecodeErrors.
3344
3345Decode the bytes using the codec registered for encoding.
3346[clinic start generated code]*/
3347
3348PyDoc_STRVAR(bytes_decode__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02003349"decode($self, /, encoding=\'utf-8\', errors=\'strict\')\n"
3350"--\n"
3351"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003352"Decode the bytes using the codec registered for encoding.\n"
3353"\n"
3354" encoding\n"
3355" The encoding with which to decode the bytes.\n"
3356" errors\n"
3357" The error handling scheme to use for the handling of decoding errors.\n"
3358" The default is \'strict\' meaning that decoding errors raise a\n"
3359" UnicodeDecodeError. Other possible values are \'ignore\' and \'replace\'\n"
3360" as well as any other name registered with codecs.register_error that\n"
3361" can handle UnicodeDecodeErrors.");
3362
3363#define BYTES_DECODE_METHODDEF \
3364 {"decode", (PyCFunction)bytes_decode, METH_VARARGS|METH_KEYWORDS, bytes_decode__doc__},
3365
3366static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003367bytes_decode_impl(PyBytesObject*self, const char *encoding, const char *errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00003368
3369static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003370bytes_decode(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00003371{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003372 PyObject *return_value = NULL;
3373 static char *_keywords[] = {"encoding", "errors", NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003374 const char *encoding = NULL;
3375 const char *errors = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +00003376
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003377 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
3378 "|ss:decode", _keywords,
3379 &encoding, &errors))
3380 goto exit;
3381 return_value = bytes_decode_impl(self, encoding, errors);
3382
3383exit:
3384 return return_value;
3385}
3386
3387static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003388bytes_decode_impl(PyBytesObject*self, const char *encoding, const char *errors)
3389/*[clinic end generated code: output=61a80290bbfce696 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003390{
Martin v. Löwis0efea322014-07-27 17:29:17 +02003391 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00003392}
3393
Guido van Rossum20188312006-05-05 15:15:40 +00003394
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003395/*[clinic input]
3396bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003397
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003398 keepends: int(py_default="False") = 0
3399
3400Return a list of the lines in the bytes, breaking at line boundaries.
3401
3402Line breaks are not included in the resulting list unless keepends is given and
3403true.
3404[clinic start generated code]*/
3405
3406PyDoc_STRVAR(bytes_splitlines__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02003407"splitlines($self, /, keepends=False)\n"
3408"--\n"
3409"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003410"Return a list of the lines in the bytes, breaking at line boundaries.\n"
3411"\n"
3412"Line breaks are not included in the resulting list unless keepends is given and\n"
3413"true.");
3414
3415#define BYTES_SPLITLINES_METHODDEF \
3416 {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS|METH_KEYWORDS, bytes_splitlines__doc__},
3417
3418static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003419bytes_splitlines_impl(PyBytesObject*self, int keepends);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003420
3421static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003422bytes_splitlines(PyBytesObject*self, PyObject *args, PyObject *kwargs)
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003423{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003424 PyObject *return_value = NULL;
3425 static char *_keywords[] = {"keepends", NULL};
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003426 int keepends = 0;
3427
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003428 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
3429 "|i:splitlines", _keywords,
3430 &keepends))
3431 goto exit;
3432 return_value = bytes_splitlines_impl(self, keepends);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003433
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003434exit:
3435 return return_value;
3436}
3437
3438static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003439bytes_splitlines_impl(PyBytesObject*self, int keepends)
3440/*[clinic end generated code: output=79da057d05d126de input=ddb93e3351080c8c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003441{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003442 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00003443 (PyObject*) self, PyBytes_AS_STRING(self),
3444 PyBytes_GET_SIZE(self), keepends
3445 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003446}
3447
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003448static int
Victor Stinner6430fd52011-09-29 04:02:13 +02003449hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003450{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003451 if (c >= 128)
3452 return -1;
David Malcolm96960882010-11-05 17:23:41 +00003453 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003454 return c - '0';
3455 else {
David Malcolm96960882010-11-05 17:23:41 +00003456 if (Py_ISUPPER(c))
3457 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003458 if (c >= 'a' && c <= 'f')
3459 return c - 'a' + 10;
3460 }
3461 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003462}
3463
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003464/*[clinic input]
3465@classmethod
3466bytes.fromhex
3467
3468 string: unicode
3469 /
3470
3471Create a bytes object from a string of hexadecimal numbers.
3472
3473Spaces between two numbers are accepted.
3474Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
3475[clinic start generated code]*/
3476
3477PyDoc_STRVAR(bytes_fromhex__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02003478"fromhex($type, string, /)\n"
3479"--\n"
3480"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003481"Create a bytes object from a string of hexadecimal numbers.\n"
3482"\n"
3483"Spaces between two numbers are accepted.\n"
Martin v. Löwis0efea322014-07-27 17:29:17 +02003484"Example: bytes.fromhex(\'B9 01EF\') -> b\'\\\\xb9\\\\x01\\\\xef\'.");
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003485
3486#define BYTES_FROMHEX_METHODDEF \
3487 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS, bytes_fromhex__doc__},
3488
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003489static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003490bytes_fromhex_impl(PyTypeObject *type, PyObject *string);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003491
3492static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003493bytes_fromhex(PyTypeObject *type, PyObject *args)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003494{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003495 PyObject *return_value = NULL;
3496 PyObject *string;
3497
3498 if (!PyArg_ParseTuple(args,
3499 "U:fromhex",
3500 &string))
3501 goto exit;
Martin v. Löwis0efea322014-07-27 17:29:17 +02003502 return_value = bytes_fromhex_impl(type, string);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003503
3504exit:
3505 return return_value;
3506}
3507
3508static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003509bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
3510/*[clinic end generated code: output=09e6cbef56cbbb65 input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003511{
3512 PyObject *newstring;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003513 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003514 Py_ssize_t hexlen, byteslen, i, j;
3515 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003516 void *data;
3517 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003518
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003519 assert(PyUnicode_Check(string));
3520 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003521 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003522 kind = PyUnicode_KIND(string);
3523 data = PyUnicode_DATA(string);
3524 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003525
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003526 byteslen = hexlen/2; /* This overestimates if there are spaces */
3527 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
3528 if (!newstring)
3529 return NULL;
3530 buf = PyBytes_AS_STRING(newstring);
3531 for (i = j = 0; i < hexlen; i += 2) {
3532 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003533 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003534 i++;
3535 if (i >= hexlen)
3536 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003537 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
3538 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003539 if (top == -1 || bot == -1) {
3540 PyErr_Format(PyExc_ValueError,
3541 "non-hexadecimal number found in "
3542 "fromhex() arg at position %zd", i);
3543 goto error;
3544 }
3545 buf[j++] = (top << 4) + bot;
3546 }
3547 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
3548 goto error;
3549 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003550
3551 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003552 Py_XDECREF(newstring);
3553 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003554}
3555
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003556/*[clinic input]
3557bytes.__sizeof__ as bytes_sizeof
3558
3559 self: self(type="PyBytesObject *")
3560
3561Returns the size of the bytes object in memory, in bytes.
3562[clinic start generated code]*/
3563
3564PyDoc_STRVAR(bytes_sizeof__doc__,
Martin v. Löwis0efea322014-07-27 17:29:17 +02003565"__sizeof__($self, /)\n"
3566"--\n"
3567"\n"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003568"Returns the size of the bytes object in memory, in bytes.");
3569
3570#define BYTES_SIZEOF_METHODDEF \
3571 {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS, bytes_sizeof__doc__},
Martin v. Löwis00709aa2008-06-04 14:18:43 +00003572
3573static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003574bytes_sizeof_impl(PyBytesObject *self);
3575
3576static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003577bytes_sizeof(PyBytesObject *self, PyObject *Py_UNUSED(ignored))
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003578{
Martin v. Löwis0efea322014-07-27 17:29:17 +02003579 return bytes_sizeof_impl(self);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003580}
3581
3582static PyObject *
3583bytes_sizeof_impl(PyBytesObject *self)
Martin v. Löwis0efea322014-07-27 17:29:17 +02003584/*[clinic end generated code: output=44933279343f24ae input=bee4c64bb42078ed]*/
Martin v. Löwis00709aa2008-06-04 14:18:43 +00003585{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003586 Py_ssize_t res;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003587 res = PyBytesObject_SIZE + Py_SIZE(self) * Py_TYPE(self)->tp_itemsize;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003588 return PyLong_FromSsize_t(res);
Martin v. Löwis00709aa2008-06-04 14:18:43 +00003589}
3590
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003591
3592static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003593bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003594{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003595 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003596}
3597
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003598
3599static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003600bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003601 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
3602 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3603 _Py_capitalize__doc__},
3604 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3605 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003606 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003607 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
3608 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02003609 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003610 expandtabs__doc__},
3611 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003612 BYTES_FROMHEX_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003613 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3614 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3615 _Py_isalnum__doc__},
3616 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3617 _Py_isalpha__doc__},
3618 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3619 _Py_isdigit__doc__},
3620 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3621 _Py_islower__doc__},
3622 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3623 _Py_isspace__doc__},
3624 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3625 _Py_istitle__doc__},
3626 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3627 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003628 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003629 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3630 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003631 BYTES_LSTRIP_METHODDEF
3632 BYTES_MAKETRANS_METHODDEF
3633 BYTES_PARTITION_METHODDEF
3634 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003635 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3636 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3637 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003638 BYTES_RPARTITION_METHODDEF
3639 BYTES_RSPLIT_METHODDEF
3640 BYTES_RSTRIP_METHODDEF
3641 BYTES_SPLIT_METHODDEF
3642 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003643 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
3644 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003645 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003646 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3647 _Py_swapcase__doc__},
3648 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003649 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003650 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3651 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003652 BYTES_SIZEOF_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003653 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003654};
3655
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003656static PyObject *
Ethan Furmanb95b5612015-01-23 20:05:18 -08003657bytes_mod(PyObject *v, PyObject *w)
3658{
3659 if (!PyBytes_Check(v))
3660 Py_RETURN_NOTIMPLEMENTED;
3661 return _PyBytes_Format(v, w);
3662}
3663
3664static PyNumberMethods bytes_as_number = {
3665 0, /*nb_add*/
3666 0, /*nb_subtract*/
3667 0, /*nb_multiply*/
3668 bytes_mod, /*nb_remainder*/
3669};
3670
3671static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003672str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3673
3674static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003675bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003676{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003677 PyObject *x = NULL;
3678 const char *encoding = NULL;
3679 const char *errors = NULL;
3680 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003681 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003682 Py_ssize_t size;
3683 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003684 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003685
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003686 if (type != &PyBytes_Type)
3687 return str_subtype_new(type, args, kwds);
3688 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3689 &encoding, &errors))
3690 return NULL;
3691 if (x == NULL) {
3692 if (encoding != NULL || errors != NULL) {
3693 PyErr_SetString(PyExc_TypeError,
3694 "encoding or errors without sequence "
3695 "argument");
3696 return NULL;
3697 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003698 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003699 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003700
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003701 if (PyUnicode_Check(x)) {
3702 /* Encode via the codec registry */
3703 if (encoding == NULL) {
3704 PyErr_SetString(PyExc_TypeError,
3705 "string argument without an encoding");
3706 return NULL;
3707 }
3708 new = PyUnicode_AsEncodedString(x, encoding, errors);
3709 if (new == NULL)
3710 return NULL;
3711 assert(PyBytes_Check(new));
3712 return new;
3713 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003714
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003715 /* If it's not unicode, there can't be encoding or errors */
3716 if (encoding != NULL || errors != NULL) {
3717 PyErr_SetString(PyExc_TypeError,
3718 "encoding or errors without a string argument");
3719 return NULL;
3720 }
3721
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003722 /* We'd like to call PyObject_Bytes here, but we need to check for an
3723 integer argument before deferring to PyBytes_FromObject, something
3724 PyObject_Bytes doesn't do. */
3725 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3726 if (func != NULL) {
3727 new = PyObject_CallFunctionObjArgs(func, NULL);
3728 Py_DECREF(func);
3729 if (new == NULL)
3730 return NULL;
3731 if (!PyBytes_Check(new)) {
3732 PyErr_Format(PyExc_TypeError,
3733 "__bytes__ returned non-bytes (type %.200s)",
3734 Py_TYPE(new)->tp_name);
3735 Py_DECREF(new);
3736 return NULL;
3737 }
3738 return new;
3739 }
3740 else if (PyErr_Occurred())
3741 return NULL;
3742
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003743 /* Is it an integer? */
3744 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3745 if (size == -1 && PyErr_Occurred()) {
3746 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3747 return NULL;
3748 PyErr_Clear();
3749 }
3750 else if (size < 0) {
3751 PyErr_SetString(PyExc_ValueError, "negative count");
3752 return NULL;
3753 }
3754 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003755 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003756 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003757 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003758 return new;
3759 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003760
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003761 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003762}
3763
3764PyObject *
3765PyBytes_FromObject(PyObject *x)
3766{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003767 PyObject *new, *it;
3768 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003769
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003770 if (x == NULL) {
3771 PyErr_BadInternalCall();
3772 return NULL;
3773 }
Larry Hastingsca28e992012-05-24 22:58:30 -07003774
3775 if (PyBytes_CheckExact(x)) {
3776 Py_INCREF(x);
3777 return x;
3778 }
3779
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003780 /* Use the modern buffer interface */
3781 if (PyObject_CheckBuffer(x)) {
3782 Py_buffer view;
3783 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3784 return NULL;
3785 new = PyBytes_FromStringAndSize(NULL, view.len);
3786 if (!new)
3787 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003788 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3789 &view, view.len, 'C') < 0)
3790 goto fail;
3791 PyBuffer_Release(&view);
3792 return new;
3793 fail:
3794 Py_XDECREF(new);
3795 PyBuffer_Release(&view);
3796 return NULL;
3797 }
3798 if (PyUnicode_Check(x)) {
3799 PyErr_SetString(PyExc_TypeError,
3800 "cannot convert unicode object to bytes");
3801 return NULL;
3802 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003803
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003804 if (PyList_CheckExact(x)) {
3805 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3806 if (new == NULL)
3807 return NULL;
3808 for (i = 0; i < Py_SIZE(x); i++) {
3809 Py_ssize_t value = PyNumber_AsSsize_t(
3810 PyList_GET_ITEM(x, i), PyExc_ValueError);
3811 if (value == -1 && PyErr_Occurred()) {
3812 Py_DECREF(new);
3813 return NULL;
3814 }
3815 if (value < 0 || value >= 256) {
3816 PyErr_SetString(PyExc_ValueError,
3817 "bytes must be in range(0, 256)");
3818 Py_DECREF(new);
3819 return NULL;
3820 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003821 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003822 }
3823 return new;
3824 }
3825 if (PyTuple_CheckExact(x)) {
3826 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3827 if (new == NULL)
3828 return NULL;
3829 for (i = 0; i < Py_SIZE(x); i++) {
3830 Py_ssize_t value = PyNumber_AsSsize_t(
3831 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
3832 if (value == -1 && PyErr_Occurred()) {
3833 Py_DECREF(new);
3834 return NULL;
3835 }
3836 if (value < 0 || value >= 256) {
3837 PyErr_SetString(PyExc_ValueError,
3838 "bytes must be in range(0, 256)");
3839 Py_DECREF(new);
3840 return NULL;
3841 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003842 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003843 }
3844 return new;
3845 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00003846
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003847 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003848 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003849 if (size == -1 && PyErr_Occurred())
3850 return NULL;
3851 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
3852 returning a shared empty bytes string. This required because we
3853 want to call _PyBytes_Resize() the returned object, which we can
3854 only do on bytes objects with refcount == 1. */
Victor Stinner88d146b2014-08-17 21:12:18 +02003855 if (size == 0)
3856 size = 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003857 new = PyBytes_FromStringAndSize(NULL, size);
3858 if (new == NULL)
3859 return NULL;
Victor Stinner88d146b2014-08-17 21:12:18 +02003860 assert(Py_REFCNT(new) == 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003861
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003862 /* Get the iterator */
3863 it = PyObject_GetIter(x);
3864 if (it == NULL)
3865 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003866
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003867 /* Run the iterator to exhaustion */
3868 for (i = 0; ; i++) {
3869 PyObject *item;
3870 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003871
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003872 /* Get the next item */
3873 item = PyIter_Next(it);
3874 if (item == NULL) {
3875 if (PyErr_Occurred())
3876 goto error;
3877 break;
3878 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003879
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003880 /* Interpret it as an int (__index__) */
3881 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3882 Py_DECREF(item);
3883 if (value == -1 && PyErr_Occurred())
3884 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003885
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003886 /* Range check */
3887 if (value < 0 || value >= 256) {
3888 PyErr_SetString(PyExc_ValueError,
3889 "bytes must be in range(0, 256)");
3890 goto error;
3891 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003892
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003893 /* Append the byte */
3894 if (i >= size) {
3895 size = 2 * size + 1;
3896 if (_PyBytes_Resize(&new, size) < 0)
3897 goto error;
3898 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003899 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003900 }
3901 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003902
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003903 /* Clean up and return success */
3904 Py_DECREF(it);
3905 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003906
3907 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003908 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01003909 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003910 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003911}
3912
3913static PyObject *
3914str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3915{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003916 PyObject *tmp, *pnew;
3917 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003918
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003919 assert(PyType_IsSubtype(type, &PyBytes_Type));
3920 tmp = bytes_new(&PyBytes_Type, args, kwds);
3921 if (tmp == NULL)
3922 return NULL;
3923 assert(PyBytes_CheckExact(tmp));
3924 n = PyBytes_GET_SIZE(tmp);
3925 pnew = type->tp_alloc(type, n);
3926 if (pnew != NULL) {
3927 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3928 PyBytes_AS_STRING(tmp), n+1);
3929 ((PyBytesObject *)pnew)->ob_shash =
3930 ((PyBytesObject *)tmp)->ob_shash;
3931 }
3932 Py_DECREF(tmp);
3933 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003934}
3935
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003936PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003937"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003938bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003939bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003940bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3941bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003942\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003943Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003944 - an iterable yielding integers in range(256)\n\
3945 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003946 - any object implementing the buffer API.\n\
3947 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003948
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003949static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003950
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003951PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003952 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3953 "bytes",
3954 PyBytesObject_SIZE,
3955 sizeof(char),
3956 bytes_dealloc, /* tp_dealloc */
3957 0, /* tp_print */
3958 0, /* tp_getattr */
3959 0, /* tp_setattr */
3960 0, /* tp_reserved */
3961 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08003962 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003963 &bytes_as_sequence, /* tp_as_sequence */
3964 &bytes_as_mapping, /* tp_as_mapping */
3965 (hashfunc)bytes_hash, /* tp_hash */
3966 0, /* tp_call */
3967 bytes_str, /* tp_str */
3968 PyObject_GenericGetAttr, /* tp_getattro */
3969 0, /* tp_setattro */
3970 &bytes_as_buffer, /* tp_as_buffer */
3971 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3972 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3973 bytes_doc, /* tp_doc */
3974 0, /* tp_traverse */
3975 0, /* tp_clear */
3976 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3977 0, /* tp_weaklistoffset */
3978 bytes_iter, /* tp_iter */
3979 0, /* tp_iternext */
3980 bytes_methods, /* tp_methods */
3981 0, /* tp_members */
3982 0, /* tp_getset */
3983 &PyBaseObject_Type, /* tp_base */
3984 0, /* tp_dict */
3985 0, /* tp_descr_get */
3986 0, /* tp_descr_set */
3987 0, /* tp_dictoffset */
3988 0, /* tp_init */
3989 0, /* tp_alloc */
3990 bytes_new, /* tp_new */
3991 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003992};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003993
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003994void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003995PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003996{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003997 assert(pv != NULL);
3998 if (*pv == NULL)
3999 return;
4000 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02004001 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004002 return;
4003 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02004004
4005 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
4006 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05004007 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02004008 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02004009
Antoine Pitrou161d6952014-05-01 14:36:20 +02004010 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02004011 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02004012 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
4013 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
4014 Py_CLEAR(*pv);
4015 return;
4016 }
4017
4018 oldsize = PyBytes_GET_SIZE(*pv);
4019 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
4020 PyErr_NoMemory();
4021 goto error;
4022 }
4023 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
4024 goto error;
4025
4026 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
4027 PyBuffer_Release(&wb);
4028 return;
4029
4030 error:
4031 PyBuffer_Release(&wb);
4032 Py_CLEAR(*pv);
4033 return;
4034 }
4035
4036 else {
4037 /* Multiple references, need to create new object */
4038 PyObject *v;
4039 v = bytes_concat(*pv, w);
4040 Py_DECREF(*pv);
4041 *pv = v;
4042 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004043}
4044
4045void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02004046PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004047{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004048 PyBytes_Concat(pv, w);
4049 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004050}
4051
4052
Ethan Furmanb95b5612015-01-23 20:05:18 -08004053/* The following function breaks the notion that bytes are immutable:
4054 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004055 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08004056 as creating a new bytes object and destroying the old one, only
4057 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004058 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08004059 Note that if there's not enough memory to resize the bytes object, the
4060 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004061 memory" exception is set, and -1 is returned. Else (on success) 0 is
4062 returned, and the value in *pv may or may not be the same as on input.
4063 As always, an extra byte is allocated for a trailing \0 byte (newsize
4064 does *not* include that), and a trailing \0 byte is stored.
4065*/
4066
4067int
4068_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
4069{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02004070 PyObject *v;
4071 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004072 v = *pv;
4073 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
4074 *pv = 0;
4075 Py_DECREF(v);
4076 PyErr_BadInternalCall();
4077 return -1;
4078 }
4079 /* XXX UNREF/NEWREF interface should be more symmetrical */
4080 _Py_DEC_REFTOTAL;
4081 _Py_ForgetReference(v);
4082 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03004083 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004084 if (*pv == NULL) {
4085 PyObject_Del(v);
4086 PyErr_NoMemory();
4087 return -1;
4088 }
4089 _Py_NewReference(*pv);
4090 sv = (PyBytesObject *) *pv;
4091 Py_SIZE(sv) = newsize;
4092 sv->ob_sval[newsize] = '\0';
4093 sv->ob_shash = -1; /* invalidate cached hash value */
4094 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004095}
4096
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004097void
4098PyBytes_Fini(void)
4099{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004100 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02004101 for (i = 0; i < UCHAR_MAX + 1; i++)
4102 Py_CLEAR(characters[i]);
4103 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004104}
4105
Benjamin Peterson4116f362008-05-27 00:36:20 +00004106/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004107
4108typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004109 PyObject_HEAD
4110 Py_ssize_t it_index;
4111 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004112} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004113
4114static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004115striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004116{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004117 _PyObject_GC_UNTRACK(it);
4118 Py_XDECREF(it->it_seq);
4119 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004120}
4121
4122static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004123striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004124{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004125 Py_VISIT(it->it_seq);
4126 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004127}
4128
4129static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004130striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004131{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004132 PyBytesObject *seq;
4133 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004134
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004135 assert(it != NULL);
4136 seq = it->it_seq;
4137 if (seq == NULL)
4138 return NULL;
4139 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004141 if (it->it_index < PyBytes_GET_SIZE(seq)) {
4142 item = PyLong_FromLong(
4143 (unsigned char)seq->ob_sval[it->it_index]);
4144 if (item != NULL)
4145 ++it->it_index;
4146 return item;
4147 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004149 Py_DECREF(seq);
4150 it->it_seq = NULL;
4151 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004152}
4153
4154static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004155striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004156{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004157 Py_ssize_t len = 0;
4158 if (it->it_seq)
4159 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
4160 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004161}
4162
4163PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004164 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004165
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004166static PyObject *
4167striter_reduce(striterobject *it)
4168{
4169 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02004170 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004171 it->it_seq, it->it_index);
4172 } else {
4173 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
4174 if (u == NULL)
4175 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02004176 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004177 }
4178}
4179
4180PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
4181
4182static PyObject *
4183striter_setstate(striterobject *it, PyObject *state)
4184{
4185 Py_ssize_t index = PyLong_AsSsize_t(state);
4186 if (index == -1 && PyErr_Occurred())
4187 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00004188 if (it->it_seq != NULL) {
4189 if (index < 0)
4190 index = 0;
4191 else if (index > PyBytes_GET_SIZE(it->it_seq))
4192 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
4193 it->it_index = index;
4194 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004195 Py_RETURN_NONE;
4196}
4197
4198PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
4199
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004200static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004201 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
4202 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00004203 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
4204 reduce_doc},
4205 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
4206 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004207 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004208};
4209
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004210PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004211 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4212 "bytes_iterator", /* tp_name */
4213 sizeof(striterobject), /* tp_basicsize */
4214 0, /* tp_itemsize */
4215 /* methods */
4216 (destructor)striter_dealloc, /* tp_dealloc */
4217 0, /* tp_print */
4218 0, /* tp_getattr */
4219 0, /* tp_setattr */
4220 0, /* tp_reserved */
4221 0, /* tp_repr */
4222 0, /* tp_as_number */
4223 0, /* tp_as_sequence */
4224 0, /* tp_as_mapping */
4225 0, /* tp_hash */
4226 0, /* tp_call */
4227 0, /* tp_str */
4228 PyObject_GenericGetAttr, /* tp_getattro */
4229 0, /* tp_setattro */
4230 0, /* tp_as_buffer */
4231 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
4232 0, /* tp_doc */
4233 (traverseproc)striter_traverse, /* tp_traverse */
4234 0, /* tp_clear */
4235 0, /* tp_richcompare */
4236 0, /* tp_weaklistoffset */
4237 PyObject_SelfIter, /* tp_iter */
4238 (iternextfunc)striter_next, /* tp_iternext */
4239 striter_methods, /* tp_methods */
4240 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004241};
4242
4243static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00004244bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004245{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004246 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004247
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00004248 if (!PyBytes_Check(seq)) {
4249 PyErr_BadInternalCall();
4250 return NULL;
4251 }
4252 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
4253 if (it == NULL)
4254 return NULL;
4255 it->it_index = 0;
4256 Py_INCREF(seq);
4257 it->it_seq = (PyBytesObject *)seq;
4258 _PyObject_GC_TRACK(it);
4259 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00004260}