blob: 075edf8c67c114aa0ac231917a790427679fa4be [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +00008#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00009#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000010
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020011/*[clinic input]
Martin v. Löwis0efea322014-07-27 17:29:17 +020012class bytes "PyBytesObject*" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020013[clinic start generated code]*/
Martin v. Löwis0efea322014-07-27 17:29:17 +020014/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030016#include "clinic/bytesobject.c.h"
17
Christian Heimes2c9c7a52008-05-26 13:42:13 +000018#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000019Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000020#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000021
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022static PyBytesObject *characters[UCHAR_MAX + 1];
23static PyBytesObject *nullstring;
24
Mark Dickinsonfd24b322008-12-06 15:33:31 +000025/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
26 for a string of length n should request PyBytesObject_SIZE + n bytes.
27
28 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29 3 bytes per string allocation on a typical system.
30*/
31#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32
Christian Heimes2c9c7a52008-05-26 13:42:13 +000033/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000034 For PyBytes_FromString(), the parameter `str' points to a null-terminated
35 string containing exactly `size' bytes.
36
37 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
38 either NULL or else points to a string containing at least `size' bytes.
39 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
40 not have to be null-terminated. (Therefore it is safe to construct a
41 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
42 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
43 bytes (setting the last byte to the null terminating character) and you can
44 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000045 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000046 alter the data yourself, since the strings may be shared.
47
48 The PyObject member `op->ob_size', which denotes the number of "extra
49 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020050 allocated for string data, not counting the null terminating character.
51 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000052 PyBytes_FromStringAndSize()) or the length of the string in the `str'
53 parameter (for PyBytes_FromString()).
54*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020055static PyObject *
56_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000057{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020058 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020059 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020060
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000061 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000062#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000063 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000064#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 Py_INCREF(op);
66 return (PyObject *)op;
67 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000068
Victor Stinner049e5092014-08-17 22:20:00 +020069 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000070 PyErr_SetString(PyExc_OverflowError,
71 "byte string is too large");
72 return NULL;
73 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000074
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000075 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020076 if (use_calloc)
77 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
78 else
79 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 if (op == NULL)
81 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010082 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020084 if (!use_calloc)
85 op->ob_sval[size] = '\0';
86 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 if (size == 0) {
88 nullstring = op;
89 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020090 }
91 return (PyObject *) op;
92}
93
94PyObject *
95PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
96{
97 PyBytesObject *op;
98 if (size < 0) {
99 PyErr_SetString(PyExc_SystemError,
100 "Negative size passed to PyBytes_FromStringAndSize");
101 return NULL;
102 }
103 if (size == 1 && str != NULL &&
104 (op = characters[*str & UCHAR_MAX]) != NULL)
105 {
106#ifdef COUNT_ALLOCS
107 one_strings++;
108#endif
109 Py_INCREF(op);
110 return (PyObject *)op;
111 }
112
113 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
114 if (op == NULL)
115 return NULL;
116 if (str == NULL)
117 return (PyObject *) op;
118
119 Py_MEMCPY(op->ob_sval, str, size);
120 /* share short strings */
121 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 characters[*str & UCHAR_MAX] = op;
123 Py_INCREF(op);
124 }
125 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000126}
127
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000128PyObject *
129PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000130{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200131 size_t size;
132 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 assert(str != NULL);
135 size = strlen(str);
136 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
137 PyErr_SetString(PyExc_OverflowError,
138 "byte string is too long");
139 return NULL;
140 }
141 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000144#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
148 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000151#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 Py_INCREF(op);
153 return (PyObject *)op;
154 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 /* Inline PyObject_NewVar */
157 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
158 if (op == NULL)
159 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100160 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 op->ob_shash = -1;
162 Py_MEMCPY(op->ob_sval, str, size+1);
163 /* share short strings */
164 if (size == 0) {
165 nullstring = op;
166 Py_INCREF(op);
167 } else if (size == 1) {
168 characters[*str & UCHAR_MAX] = op;
169 Py_INCREF(op);
170 }
171 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000172}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000173
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000174PyObject *
175PyBytes_FromFormatV(const char *format, va_list vargs)
176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 va_list count;
178 Py_ssize_t n = 0;
179 const char* f;
180 char *s;
181 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000182
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000183 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 /* step 1: figure out how large a buffer we need */
185 for (f = format; *f; f++) {
186 if (*f == '%') {
187 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000188 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
192 * they don't affect the amount of space we reserve.
193 */
194 if ((*f == 'l' || *f == 'z') &&
195 (f[1] == 'd' || f[1] == 'u'))
196 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000197
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000198 switch (*f) {
199 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100200 {
201 int c = va_arg(count, int);
202 if (c < 0 || c > 255) {
203 PyErr_SetString(PyExc_OverflowError,
204 "PyBytes_FromFormatV(): %c format "
205 "expects an integer in range [0; 255]");
206 return NULL;
207 }
208 n++;
209 break;
210 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 case '%':
212 n++;
213 break;
214 case 'd': case 'u': case 'i': case 'x':
215 (void) va_arg(count, int);
216 /* 20 bytes is enough to hold a 64-bit
217 integer. Decimal takes the most space.
218 This isn't enough for octal. */
219 n += 20;
220 break;
221 case 's':
222 s = va_arg(count, char*);
223 n += strlen(s);
224 break;
225 case 'p':
226 (void) va_arg(count, int);
227 /* maximum 64-bit pointer representation:
228 * 0xffffffffffffffff
229 * so 19 characters is enough.
230 * XXX I count 18 -- what's the extra for?
231 */
232 n += 19;
233 break;
234 default:
235 /* if we stumble upon an unknown
236 formatting code, copy the rest of
237 the format string to the output
238 string. (we cannot just skip the
239 code, since there's no way to know
240 what's in the argument list) */
241 n += strlen(p);
242 goto expand;
243 }
244 } else
245 n++;
246 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000247 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 /* step 2: fill the buffer */
249 /* Since we've analyzed how much space we need for the worst case,
250 use sprintf directly instead of the slower PyOS_snprintf. */
251 string = PyBytes_FromStringAndSize(NULL, n);
252 if (!string)
253 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000254
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000256
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000257 for (f = format; *f; f++) {
258 if (*f == '%') {
259 const char* p = f++;
260 Py_ssize_t i;
261 int longflag = 0;
262 int size_tflag = 0;
263 /* parse the width.precision part (we're only
264 interested in the precision value, if any) */
265 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000266 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000267 n = (n*10) + *f++ - '0';
268 if (*f == '.') {
269 f++;
270 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000271 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 n = (n*10) + *f++ - '0';
273 }
David Malcolm96960882010-11-05 17:23:41 +0000274 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 f++;
276 /* handle the long flag, but only for %ld and %lu.
277 others can be added when necessary. */
278 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
279 longflag = 1;
280 ++f;
281 }
282 /* handle the size_t flag. */
283 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
284 size_tflag = 1;
285 ++f;
286 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000287
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000288 switch (*f) {
289 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100290 {
291 int c = va_arg(vargs, int);
292 /* c has been checked for overflow in the first step */
293 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000294 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100295 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 case 'd':
297 if (longflag)
298 sprintf(s, "%ld", va_arg(vargs, long));
299 else if (size_tflag)
300 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
301 va_arg(vargs, Py_ssize_t));
302 else
303 sprintf(s, "%d", va_arg(vargs, int));
304 s += strlen(s);
305 break;
306 case 'u':
307 if (longflag)
308 sprintf(s, "%lu",
309 va_arg(vargs, unsigned long));
310 else if (size_tflag)
311 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
312 va_arg(vargs, size_t));
313 else
314 sprintf(s, "%u",
315 va_arg(vargs, unsigned int));
316 s += strlen(s);
317 break;
318 case 'i':
319 sprintf(s, "%i", va_arg(vargs, int));
320 s += strlen(s);
321 break;
322 case 'x':
323 sprintf(s, "%x", va_arg(vargs, int));
324 s += strlen(s);
325 break;
326 case 's':
327 p = va_arg(vargs, char*);
328 i = strlen(p);
329 if (n > 0 && i > n)
330 i = n;
331 Py_MEMCPY(s, p, i);
332 s += i;
333 break;
334 case 'p':
335 sprintf(s, "%p", va_arg(vargs, void*));
336 /* %p is ill-defined: ensure leading 0x. */
337 if (s[1] == 'X')
338 s[1] = 'x';
339 else if (s[1] != 'x') {
340 memmove(s+2, s, strlen(s)+1);
341 s[0] = '0';
342 s[1] = 'x';
343 }
344 s += strlen(s);
345 break;
346 case '%':
347 *s++ = '%';
348 break;
349 default:
350 strcpy(s, p);
351 s += strlen(s);
352 goto end;
353 }
354 } else
355 *s++ = *f;
356 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000357
358 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
360 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361}
362
363PyObject *
364PyBytes_FromFormat(const char *format, ...)
365{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 PyObject* ret;
367 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368
369#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000373#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000374 ret = PyBytes_FromFormatV(format, vargs);
375 va_end(vargs);
376 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000377}
378
Ethan Furmanb95b5612015-01-23 20:05:18 -0800379/* Helpers for formatstring */
380
381Py_LOCAL_INLINE(PyObject *)
382getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
383{
384 Py_ssize_t argidx = *p_argidx;
385 if (argidx < arglen) {
386 (*p_argidx)++;
387 if (arglen < 0)
388 return args;
389 else
390 return PyTuple_GetItem(args, argidx);
391 }
392 PyErr_SetString(PyExc_TypeError,
393 "not enough arguments for format string");
394 return NULL;
395}
396
397/* Format codes
398 * F_LJUST '-'
399 * F_SIGN '+'
400 * F_BLANK ' '
401 * F_ALT '#'
402 * F_ZERO '0'
403 */
404#define F_LJUST (1<<0)
405#define F_SIGN (1<<1)
406#define F_BLANK (1<<2)
407#define F_ALT (1<<3)
408#define F_ZERO (1<<4)
409
410/* Returns a new reference to a PyBytes object, or NULL on failure. */
411
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200412static char*
413formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200414 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800415{
416 char *p;
417 PyObject *result;
418 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200419 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800420
421 x = PyFloat_AsDouble(v);
422 if (x == -1.0 && PyErr_Occurred()) {
423 PyErr_Format(PyExc_TypeError, "float argument required, "
424 "not %.200s", Py_TYPE(v)->tp_name);
425 return NULL;
426 }
427
428 if (prec < 0)
429 prec = 6;
430
431 p = PyOS_double_to_string(x, type, prec,
432 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
433
434 if (p == NULL)
435 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200436
437 len = strlen(p);
438 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200439 str = _PyBytesWriter_Prepare(writer, str, len);
440 if (str == NULL)
441 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200442 Py_MEMCPY(str, p, len);
443 str += len;
444 return str;
445 }
446
447 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800448 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200449 *p_result = result;
450 return str;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800451}
452
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300453static PyObject *
454formatlong(PyObject *v, int flags, int prec, int type)
455{
456 PyObject *result, *iobj;
457 if (type == 'i')
458 type = 'd';
459 if (PyLong_Check(v))
460 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
461 if (PyNumber_Check(v)) {
462 /* make sure number is a type of integer for o, x, and X */
463 if (type == 'o' || type == 'x' || type == 'X')
464 iobj = PyNumber_Index(v);
465 else
466 iobj = PyNumber_Long(v);
467 if (iobj == NULL) {
468 if (!PyErr_ExceptionMatches(PyExc_TypeError))
469 return NULL;
470 }
471 else if (!PyLong_Check(iobj))
472 Py_CLEAR(iobj);
473 if (iobj != NULL) {
474 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
475 Py_DECREF(iobj);
476 return result;
477 }
478 }
479 PyErr_Format(PyExc_TypeError,
480 "%%%c format: %s is required, not %.200s", type,
481 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
482 : "a number",
483 Py_TYPE(v)->tp_name);
484 return NULL;
485}
486
487static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200488byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800489{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200490 if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) {
491 *p = PyBytes_AS_STRING(arg)[0];
492 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800493 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200494 else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) {
495 *p = PyByteArray_AS_STRING(arg)[0];
496 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800497 }
498 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300499 PyObject *iobj;
500 long ival;
501 int overflow;
502 /* make sure number is a type of integer */
503 if (PyLong_Check(arg)) {
504 ival = PyLong_AsLongAndOverflow(arg, &overflow);
505 }
506 else {
507 iobj = PyNumber_Index(arg);
508 if (iobj == NULL) {
509 if (!PyErr_ExceptionMatches(PyExc_TypeError))
510 return 0;
511 goto onError;
512 }
513 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
514 Py_DECREF(iobj);
515 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300516 if (!overflow && ival == -1 && PyErr_Occurred())
517 goto onError;
518 if (overflow || !(0 <= ival && ival <= 255)) {
519 PyErr_SetString(PyExc_OverflowError,
520 "%c arg not in range(256)");
521 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800522 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300523 *p = (char)ival;
524 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800525 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300526 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200527 PyErr_SetString(PyExc_TypeError,
528 "%c requires an integer in range(256) or a single byte");
529 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800530}
531
532static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200533format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800534{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200535 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800536 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800537 /* is it a bytes object? */
538 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200539 *pbuf = PyBytes_AS_STRING(v);
540 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800541 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200542 return v;
543 }
544 if (PyByteArray_Check(v)) {
545 *pbuf = PyByteArray_AS_STRING(v);
546 *plen = PyByteArray_GET_SIZE(v);
547 Py_INCREF(v);
548 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800549 }
550 /* does it support __bytes__? */
551 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
552 if (func != NULL) {
553 result = PyObject_CallFunctionObjArgs(func, NULL);
554 Py_DECREF(func);
555 if (result == NULL)
556 return NULL;
557 if (!PyBytes_Check(result)) {
558 PyErr_Format(PyExc_TypeError,
559 "__bytes__ returned non-bytes (type %.200s)",
560 Py_TYPE(result)->tp_name);
561 Py_DECREF(result);
562 return NULL;
563 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200564 *pbuf = PyBytes_AS_STRING(result);
565 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800566 return result;
567 }
568 PyErr_Format(PyExc_TypeError,
569 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
570 Py_TYPE(v)->tp_name);
571 return NULL;
572}
573
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200574/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800575
576PyObject *
577_PyBytes_Format(PyObject *format, PyObject *args)
578{
579 char *fmt, *res;
580 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200581 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800582 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800583 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200584 _PyBytesWriter writer;
585
Ethan Furmanb95b5612015-01-23 20:05:18 -0800586 if (format == NULL || !PyBytes_Check(format) || args == NULL) {
587 PyErr_BadInternalCall();
588 return NULL;
589 }
590 fmt = PyBytes_AS_STRING(format);
591 fmtcnt = PyBytes_GET_SIZE(format);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200592
593 _PyBytesWriter_Init(&writer);
594
595 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
596 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800597 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200598 writer.overallocate = 1;
599
Ethan Furmanb95b5612015-01-23 20:05:18 -0800600 if (PyTuple_Check(args)) {
601 arglen = PyTuple_GET_SIZE(args);
602 argidx = 0;
603 }
604 else {
605 arglen = -1;
606 argidx = -2;
607 }
608 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
609 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
610 !PyByteArray_Check(args)) {
611 dict = args;
612 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200613
Ethan Furmanb95b5612015-01-23 20:05:18 -0800614 while (--fmtcnt >= 0) {
615 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200616 Py_ssize_t len;
617 char *pos;
618
619 pos = strchr(fmt + 1, '%');
620 if (pos != NULL)
621 len = pos - fmt;
622 else {
623 len = PyBytes_GET_SIZE(format);
624 len -= (fmt - PyBytes_AS_STRING(format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800625 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200626 assert(len != 0);
627
628 Py_MEMCPY(res, fmt, len);
629 res += len;
630 fmt += len;
631 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800632 }
633 else {
634 /* Got a format specifier */
635 int flags = 0;
636 Py_ssize_t width = -1;
637 int prec = -1;
638 int c = '\0';
639 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800640 PyObject *v = NULL;
641 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200642 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800643 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200644 Py_ssize_t len = 0;
645 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200646 Py_ssize_t alloc;
647#ifdef Py_DEBUG
648 char *before;
649#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -0800650
Ethan Furmanb95b5612015-01-23 20:05:18 -0800651 fmt++;
652 if (*fmt == '(') {
653 char *keystart;
654 Py_ssize_t keylen;
655 PyObject *key;
656 int pcount = 1;
657
658 if (dict == NULL) {
659 PyErr_SetString(PyExc_TypeError,
660 "format requires a mapping");
661 goto error;
662 }
663 ++fmt;
664 --fmtcnt;
665 keystart = fmt;
666 /* Skip over balanced parentheses */
667 while (pcount > 0 && --fmtcnt >= 0) {
668 if (*fmt == ')')
669 --pcount;
670 else if (*fmt == '(')
671 ++pcount;
672 fmt++;
673 }
674 keylen = fmt - keystart - 1;
675 if (fmtcnt < 0 || pcount > 0) {
676 PyErr_SetString(PyExc_ValueError,
677 "incomplete format key");
678 goto error;
679 }
680 key = PyBytes_FromStringAndSize(keystart,
681 keylen);
682 if (key == NULL)
683 goto error;
684 if (args_owned) {
685 Py_DECREF(args);
686 args_owned = 0;
687 }
688 args = PyObject_GetItem(dict, key);
689 Py_DECREF(key);
690 if (args == NULL) {
691 goto error;
692 }
693 args_owned = 1;
694 arglen = -1;
695 argidx = -2;
696 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200697
698 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800699 while (--fmtcnt >= 0) {
700 switch (c = *fmt++) {
701 case '-': flags |= F_LJUST; continue;
702 case '+': flags |= F_SIGN; continue;
703 case ' ': flags |= F_BLANK; continue;
704 case '#': flags |= F_ALT; continue;
705 case '0': flags |= F_ZERO; continue;
706 }
707 break;
708 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200709
710 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800711 if (c == '*') {
712 v = getnextarg(args, arglen, &argidx);
713 if (v == NULL)
714 goto error;
715 if (!PyLong_Check(v)) {
716 PyErr_SetString(PyExc_TypeError,
717 "* wants int");
718 goto error;
719 }
720 width = PyLong_AsSsize_t(v);
721 if (width == -1 && PyErr_Occurred())
722 goto error;
723 if (width < 0) {
724 flags |= F_LJUST;
725 width = -width;
726 }
727 if (--fmtcnt >= 0)
728 c = *fmt++;
729 }
730 else if (c >= 0 && isdigit(c)) {
731 width = c - '0';
732 while (--fmtcnt >= 0) {
733 c = Py_CHARMASK(*fmt++);
734 if (!isdigit(c))
735 break;
736 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
737 PyErr_SetString(
738 PyExc_ValueError,
739 "width too big");
740 goto error;
741 }
742 width = width*10 + (c - '0');
743 }
744 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200745
746 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800747 if (c == '.') {
748 prec = 0;
749 if (--fmtcnt >= 0)
750 c = *fmt++;
751 if (c == '*') {
752 v = getnextarg(args, arglen, &argidx);
753 if (v == NULL)
754 goto error;
755 if (!PyLong_Check(v)) {
756 PyErr_SetString(
757 PyExc_TypeError,
758 "* wants int");
759 goto error;
760 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200761 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800762 if (prec == -1 && PyErr_Occurred())
763 goto error;
764 if (prec < 0)
765 prec = 0;
766 if (--fmtcnt >= 0)
767 c = *fmt++;
768 }
769 else if (c >= 0 && isdigit(c)) {
770 prec = c - '0';
771 while (--fmtcnt >= 0) {
772 c = Py_CHARMASK(*fmt++);
773 if (!isdigit(c))
774 break;
775 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
776 PyErr_SetString(
777 PyExc_ValueError,
778 "prec too big");
779 goto error;
780 }
781 prec = prec*10 + (c - '0');
782 }
783 }
784 } /* prec */
785 if (fmtcnt >= 0) {
786 if (c == 'h' || c == 'l' || c == 'L') {
787 if (--fmtcnt >= 0)
788 c = *fmt++;
789 }
790 }
791 if (fmtcnt < 0) {
792 PyErr_SetString(PyExc_ValueError,
793 "incomplete format");
794 goto error;
795 }
796 if (c != '%') {
797 v = getnextarg(args, arglen, &argidx);
798 if (v == NULL)
799 goto error;
800 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200801
802 if (fmtcnt < 0) {
803 /* last writer: disable writer overallocation */
804 writer.overallocate = 0;
805 }
806
Ethan Furmanb95b5612015-01-23 20:05:18 -0800807 sign = 0;
808 fill = ' ';
809 switch (c) {
810 case '%':
811 pbuf = "%";
812 len = 1;
813 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200814
Ethan Furman62e977f2015-03-11 08:17:00 -0700815 case 'r':
816 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800817 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200818 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800819 if (temp == NULL)
820 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200821 assert(PyUnicode_IS_ASCII(temp));
822 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
823 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800824 if (prec >= 0 && len > prec)
825 len = prec;
826 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200827
Ethan Furmanb95b5612015-01-23 20:05:18 -0800828 case 's':
829 // %s is only for 2/3 code; 3 only code should use %b
830 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200831 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800832 if (temp == NULL)
833 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800834 if (prec >= 0 && len > prec)
835 len = prec;
836 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200837
Ethan Furmanb95b5612015-01-23 20:05:18 -0800838 case 'i':
839 case 'd':
840 case 'u':
841 case 'o':
842 case 'x':
843 case 'X':
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300844 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200845 if (!temp)
846 goto error;
847 assert(PyUnicode_IS_ASCII(temp));
848 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
849 len = PyUnicode_GET_LENGTH(temp);
850 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800851 if (flags & F_ZERO)
852 fill = '0';
853 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200854
Ethan Furmanb95b5612015-01-23 20:05:18 -0800855 case 'e':
856 case 'E':
857 case 'f':
858 case 'F':
859 case 'g':
860 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200861 if (width == -1 && prec == -1
862 && !(flags & (F_SIGN | F_BLANK)))
863 {
864 /* Fast path */
Victor Stinnerad771582015-10-09 12:38:53 +0200865 writer.min_size -= 2; /* size preallocated by "%f" */
866 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200867 if (res == NULL)
868 goto error;
869 continue;
870 }
871
Victor Stinnerad771582015-10-09 12:38:53 +0200872 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800873 goto error;
874 pbuf = PyBytes_AS_STRING(temp);
875 len = PyBytes_GET_SIZE(temp);
876 sign = 1;
877 if (flags & F_ZERO)
878 fill = '0';
879 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200880
Ethan Furmanb95b5612015-01-23 20:05:18 -0800881 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200882 pbuf = &onechar;
883 len = byte_converter(v, &onechar);
884 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800885 goto error;
886 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200887
Ethan Furmanb95b5612015-01-23 20:05:18 -0800888 default:
889 PyErr_Format(PyExc_ValueError,
890 "unsupported format character '%c' (0x%x) "
891 "at index %zd",
892 c, c,
893 (Py_ssize_t)(fmt - 1 -
894 PyBytes_AsString(format)));
895 goto error;
896 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200897
Ethan Furmanb95b5612015-01-23 20:05:18 -0800898 if (sign) {
899 if (*pbuf == '-' || *pbuf == '+') {
900 sign = *pbuf++;
901 len--;
902 }
903 else if (flags & F_SIGN)
904 sign = '+';
905 else if (flags & F_BLANK)
906 sign = ' ';
907 else
908 sign = 0;
909 }
910 if (width < len)
911 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200912
913 alloc = width;
914 if (sign != 0 && len == width)
915 alloc++;
916 if (alloc > 1) {
917 res = _PyBytesWriter_Prepare(&writer, res, alloc - 1);
918 if (res == NULL)
919 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800920 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200921#ifdef Py_DEBUG
922 before = res;
923#endif
924
925 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800926 if (sign) {
927 if (fill != ' ')
928 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800929 if (width > len)
930 width--;
931 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200932
933 /* Write the numeric prefix for "x", "X" and "o" formats
934 if the alternate form is used.
935 For example, write "0x" for the "%#x" format. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800936 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
937 assert(pbuf[0] == '0');
938 assert(pbuf[1] == c);
939 if (fill != ' ') {
940 *res++ = *pbuf++;
941 *res++ = *pbuf++;
942 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800943 width -= 2;
944 if (width < 0)
945 width = 0;
946 len -= 2;
947 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200948
949 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800950 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200951 memset(res, fill, width - len);
952 res += (width - len);
953 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800954 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200955
956 /* If padding with spaces: write sign if needed and/or numeric
957 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800958 if (fill == ' ') {
959 if (sign)
960 *res++ = sign;
961 if ((flags & F_ALT) &&
962 (c == 'x' || c == 'X')) {
963 assert(pbuf[0] == '0');
964 assert(pbuf[1] == c);
965 *res++ = *pbuf++;
966 *res++ = *pbuf++;
967 }
968 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200969
970 /* Copy bytes */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800971 Py_MEMCPY(res, pbuf, len);
972 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200973
974 /* Pad right with the fill character if needed */
975 if (width > len) {
976 memset(res, ' ', width - len);
977 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800978 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200979
Ethan Furmanb95b5612015-01-23 20:05:18 -0800980 if (dict && (argidx < arglen) && c != '%') {
981 PyErr_SetString(PyExc_TypeError,
982 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -0800983 Py_XDECREF(temp);
984 goto error;
985 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800986 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200987
988#ifdef Py_DEBUG
989 /* check that we computed the exact size for this write */
990 assert((res - before) == alloc);
991#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -0800992 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200993
994 /* If overallocation was disabled, ensure that it was the last
995 write. Otherwise, we missed an optimization */
996 assert(writer.overallocate || fmtcnt < 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800997 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200998
Ethan Furmanb95b5612015-01-23 20:05:18 -0800999 if (argidx < arglen && !dict) {
1000 PyErr_SetString(PyExc_TypeError,
1001 "not all arguments converted during bytes formatting");
1002 goto error;
1003 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001004
Ethan Furmanb95b5612015-01-23 20:05:18 -08001005 if (args_owned) {
1006 Py_DECREF(args);
1007 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001008 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001009
1010 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001011 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001012 if (args_owned) {
1013 Py_DECREF(args);
1014 }
1015 return NULL;
1016}
1017
1018/* =-= */
1019
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001020static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001021bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001022{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001023 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001024}
1025
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001026/* Unescape a backslash-escaped string. If unicode is non-zero,
1027 the string is a u-literal. If recode_encoding is non-zero,
1028 the string is UTF-8 encoded and should be re-encoded in the
1029 specified encoding. */
1030
1031PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001032 Py_ssize_t len,
1033 const char *errors,
1034 Py_ssize_t unicode,
1035 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001036{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001037 int c;
1038 char *p, *buf;
1039 const char *end;
1040 PyObject *v;
1041 Py_ssize_t newlen = recode_encoding ? 4*len:len;
1042 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
1043 if (v == NULL)
1044 return NULL;
1045 p = buf = PyBytes_AsString(v);
1046 end = s + len;
1047 while (s < end) {
1048 if (*s != '\\') {
1049 non_esc:
1050 if (recode_encoding && (*s & 0x80)) {
1051 PyObject *u, *w;
1052 char *r;
1053 const char* t;
1054 Py_ssize_t rn;
1055 t = s;
1056 /* Decode non-ASCII bytes as UTF-8. */
1057 while (t < end && (*t & 0x80)) t++;
1058 u = PyUnicode_DecodeUTF8(s, t - s, errors);
1059 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001060
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001061 /* Recode them in target encoding. */
1062 w = PyUnicode_AsEncodedString(
1063 u, recode_encoding, errors);
1064 Py_DECREF(u);
1065 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001066
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001067 /* Append bytes to output buffer. */
1068 assert(PyBytes_Check(w));
1069 r = PyBytes_AS_STRING(w);
1070 rn = PyBytes_GET_SIZE(w);
1071 Py_MEMCPY(p, r, rn);
1072 p += rn;
1073 Py_DECREF(w);
1074 s = t;
1075 } else {
1076 *p++ = *s++;
1077 }
1078 continue;
1079 }
1080 s++;
1081 if (s==end) {
1082 PyErr_SetString(PyExc_ValueError,
1083 "Trailing \\ in string");
1084 goto failed;
1085 }
1086 switch (*s++) {
1087 /* XXX This assumes ASCII! */
1088 case '\n': break;
1089 case '\\': *p++ = '\\'; break;
1090 case '\'': *p++ = '\''; break;
1091 case '\"': *p++ = '\"'; break;
1092 case 'b': *p++ = '\b'; break;
1093 case 'f': *p++ = '\014'; break; /* FF */
1094 case 't': *p++ = '\t'; break;
1095 case 'n': *p++ = '\n'; break;
1096 case 'r': *p++ = '\r'; break;
1097 case 'v': *p++ = '\013'; break; /* VT */
1098 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1099 case '0': case '1': case '2': case '3':
1100 case '4': case '5': case '6': case '7':
1101 c = s[-1] - '0';
1102 if (s < end && '0' <= *s && *s <= '7') {
1103 c = (c<<3) + *s++ - '0';
1104 if (s < end && '0' <= *s && *s <= '7')
1105 c = (c<<3) + *s++ - '0';
1106 }
1107 *p++ = c;
1108 break;
1109 case 'x':
David Malcolm96960882010-11-05 17:23:41 +00001110 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001111 unsigned int x = 0;
1112 c = Py_CHARMASK(*s);
1113 s++;
David Malcolm96960882010-11-05 17:23:41 +00001114 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001115 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001116 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001117 x = 10 + c - 'a';
1118 else
1119 x = 10 + c - 'A';
1120 x = x << 4;
1121 c = Py_CHARMASK(*s);
1122 s++;
David Malcolm96960882010-11-05 17:23:41 +00001123 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001124 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001125 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001126 x += 10 + c - 'a';
1127 else
1128 x += 10 + c - 'A';
1129 *p++ = x;
1130 break;
1131 }
1132 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001133 PyErr_Format(PyExc_ValueError,
1134 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001135 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001136 goto failed;
1137 }
1138 if (strcmp(errors, "replace") == 0) {
1139 *p++ = '?';
1140 } else if (strcmp(errors, "ignore") == 0)
1141 /* do nothing */;
1142 else {
1143 PyErr_Format(PyExc_ValueError,
1144 "decoding error; unknown "
1145 "error handling code: %.400s",
1146 errors);
1147 goto failed;
1148 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001149 /* skip \x */
1150 if (s < end && Py_ISXDIGIT(s[0]))
1151 s++; /* and a hexdigit */
1152 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001153 default:
1154 *p++ = '\\';
1155 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001156 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001157 UTF-8 bytes may follow. */
1158 }
1159 }
1160 if (p-buf < newlen)
1161 _PyBytes_Resize(&v, p - buf);
1162 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001163 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001164 Py_DECREF(v);
1165 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001166}
1167
1168/* -------------------------------------------------------------------- */
1169/* object api */
1170
1171Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001172PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001173{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001174 if (!PyBytes_Check(op)) {
1175 PyErr_Format(PyExc_TypeError,
1176 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1177 return -1;
1178 }
1179 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001180}
1181
1182char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001183PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001184{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001185 if (!PyBytes_Check(op)) {
1186 PyErr_Format(PyExc_TypeError,
1187 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1188 return NULL;
1189 }
1190 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001191}
1192
1193int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001194PyBytes_AsStringAndSize(PyObject *obj,
1195 char **s,
1196 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001197{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001198 if (s == NULL) {
1199 PyErr_BadInternalCall();
1200 return -1;
1201 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001202
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001203 if (!PyBytes_Check(obj)) {
1204 PyErr_Format(PyExc_TypeError,
1205 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1206 return -1;
1207 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001208
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001209 *s = PyBytes_AS_STRING(obj);
1210 if (len != NULL)
1211 *len = PyBytes_GET_SIZE(obj);
1212 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001213 PyErr_SetString(PyExc_ValueError,
1214 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001215 return -1;
1216 }
1217 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001218}
Neal Norwitz6968b052007-02-27 19:02:19 +00001219
1220/* -------------------------------------------------------------------- */
1221/* Methods */
1222
Eric Smith0923d1d2009-04-16 20:16:10 +00001223#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001224
1225#include "stringlib/fastsearch.h"
1226#include "stringlib/count.h"
1227#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001228#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001229#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001230#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001231#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001232
Eric Smith0f78bff2009-11-30 01:01:42 +00001233#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001234
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001235PyObject *
1236PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001237{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001238 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001239 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001240 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001241 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001242 unsigned char quote, *s, *p;
1243
1244 /* Compute size of output string */
1245 squotes = dquotes = 0;
1246 newsize = 3; /* b'' */
1247 s = (unsigned char*)op->ob_sval;
1248 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001249 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001250 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001251 case '\'': squotes++; break;
1252 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001253 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001254 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001255 default:
1256 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001257 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001258 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001259 if (newsize > PY_SSIZE_T_MAX - incr)
1260 goto overflow;
1261 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001262 }
1263 quote = '\'';
1264 if (smartquotes && squotes && !dquotes)
1265 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001266 if (squotes && quote == '\'') {
1267 if (newsize > PY_SSIZE_T_MAX - squotes)
1268 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001269 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001270 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001271
1272 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001273 if (v == NULL) {
1274 return NULL;
1275 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001276 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001277
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001278 *p++ = 'b', *p++ = quote;
1279 for (i = 0; i < length; i++) {
1280 unsigned char c = op->ob_sval[i];
1281 if (c == quote || c == '\\')
1282 *p++ = '\\', *p++ = c;
1283 else if (c == '\t')
1284 *p++ = '\\', *p++ = 't';
1285 else if (c == '\n')
1286 *p++ = '\\', *p++ = 'n';
1287 else if (c == '\r')
1288 *p++ = '\\', *p++ = 'r';
1289 else if (c < ' ' || c >= 0x7f) {
1290 *p++ = '\\';
1291 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001292 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1293 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001294 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001295 else
1296 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001297 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001298 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001299 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001300 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001301
1302 overflow:
1303 PyErr_SetString(PyExc_OverflowError,
1304 "bytes object is too large to make repr");
1305 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001306}
1307
Neal Norwitz6968b052007-02-27 19:02:19 +00001308static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001309bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001310{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001311 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001312}
1313
Neal Norwitz6968b052007-02-27 19:02:19 +00001314static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001315bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001316{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001317 if (Py_BytesWarningFlag) {
1318 if (PyErr_WarnEx(PyExc_BytesWarning,
1319 "str() on a bytes instance", 1))
1320 return NULL;
1321 }
1322 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001323}
1324
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001325static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001326bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001327{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001328 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001329}
Neal Norwitz6968b052007-02-27 19:02:19 +00001330
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001331/* This is also used by PyBytes_Concat() */
1332static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001333bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001334{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001335 Py_ssize_t size;
1336 Py_buffer va, vb;
1337 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001338
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001339 va.len = -1;
1340 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001341 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1342 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001343 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1344 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1345 goto done;
1346 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001347
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001348 /* Optimize end cases */
1349 if (va.len == 0 && PyBytes_CheckExact(b)) {
1350 result = b;
1351 Py_INCREF(result);
1352 goto done;
1353 }
1354 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1355 result = a;
1356 Py_INCREF(result);
1357 goto done;
1358 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001359
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001360 size = va.len + vb.len;
1361 if (size < 0) {
1362 PyErr_NoMemory();
1363 goto done;
1364 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001365
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001366 result = PyBytes_FromStringAndSize(NULL, size);
1367 if (result != NULL) {
1368 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1369 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1370 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001371
1372 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001373 if (va.len != -1)
1374 PyBuffer_Release(&va);
1375 if (vb.len != -1)
1376 PyBuffer_Release(&vb);
1377 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001378}
Neal Norwitz6968b052007-02-27 19:02:19 +00001379
1380static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001381bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001382{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001383 Py_ssize_t i;
1384 Py_ssize_t j;
1385 Py_ssize_t size;
1386 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001387 size_t nbytes;
1388 if (n < 0)
1389 n = 0;
1390 /* watch out for overflows: the size can overflow int,
1391 * and the # of bytes needed can overflow size_t
1392 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001393 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 PyErr_SetString(PyExc_OverflowError,
1395 "repeated bytes are too long");
1396 return NULL;
1397 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001398 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001399 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1400 Py_INCREF(a);
1401 return (PyObject *)a;
1402 }
1403 nbytes = (size_t)size;
1404 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1405 PyErr_SetString(PyExc_OverflowError,
1406 "repeated bytes are too long");
1407 return NULL;
1408 }
1409 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1410 if (op == NULL)
1411 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001412 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001413 op->ob_shash = -1;
1414 op->ob_sval[size] = '\0';
1415 if (Py_SIZE(a) == 1 && n > 0) {
1416 memset(op->ob_sval, a->ob_sval[0] , n);
1417 return (PyObject *) op;
1418 }
1419 i = 0;
1420 if (i < size) {
1421 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1422 i = Py_SIZE(a);
1423 }
1424 while (i < size) {
1425 j = (i <= size-i) ? i : size-i;
1426 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1427 i += j;
1428 }
1429 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001430}
1431
Guido van Rossum98297ee2007-11-06 21:34:58 +00001432static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001433bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001434{
1435 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1436 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001437 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +00001438 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +00001439 PyErr_Clear();
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001440 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
Antoine Pitroud1188562010-06-09 16:38:55 +00001441 return -1;
1442 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
1443 varg.buf, varg.len, 0);
1444 PyBuffer_Release(&varg);
1445 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001446 }
1447 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001448 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1449 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001450 }
1451
Antoine Pitrou0010d372010-08-15 17:12:55 +00001452 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001453}
1454
Neal Norwitz6968b052007-02-27 19:02:19 +00001455static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001456bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001457{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001458 if (i < 0 || i >= Py_SIZE(a)) {
1459 PyErr_SetString(PyExc_IndexError, "index out of range");
1460 return NULL;
1461 }
1462 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001463}
1464
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001465Py_LOCAL(int)
1466bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1467{
1468 int cmp;
1469 Py_ssize_t len;
1470
1471 len = Py_SIZE(a);
1472 if (Py_SIZE(b) != len)
1473 return 0;
1474
1475 if (a->ob_sval[0] != b->ob_sval[0])
1476 return 0;
1477
1478 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1479 return (cmp == 0);
1480}
1481
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001482static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001483bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001484{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001485 int c;
1486 Py_ssize_t len_a, len_b;
1487 Py_ssize_t min_len;
1488 PyObject *result;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001489 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001490
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001491 /* Make sure both arguments are strings. */
1492 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001493 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001494 rc = PyObject_IsInstance((PyObject*)a,
1495 (PyObject*)&PyUnicode_Type);
1496 if (!rc)
1497 rc = PyObject_IsInstance((PyObject*)b,
1498 (PyObject*)&PyUnicode_Type);
1499 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001500 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001501 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001502 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001503 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001504 return NULL;
1505 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001506 else {
1507 rc = PyObject_IsInstance((PyObject*)a,
1508 (PyObject*)&PyLong_Type);
1509 if (!rc)
1510 rc = PyObject_IsInstance((PyObject*)b,
1511 (PyObject*)&PyLong_Type);
1512 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001513 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001514 if (rc) {
1515 if (PyErr_WarnEx(PyExc_BytesWarning,
1516 "Comparison between bytes and int", 1))
1517 return NULL;
1518 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001519 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001520 }
1521 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001522 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001523 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001524 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001525 case Py_EQ:
1526 case Py_LE:
1527 case Py_GE:
1528 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001529 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001530 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001531 case Py_NE:
1532 case Py_LT:
1533 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001534 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001535 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001536 default:
1537 PyErr_BadArgument();
1538 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001539 }
1540 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001541 else if (op == Py_EQ || op == Py_NE) {
1542 int eq = bytes_compare_eq(a, b);
1543 eq ^= (op == Py_NE);
1544 result = eq ? Py_True : Py_False;
1545 }
1546 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001547 len_a = Py_SIZE(a);
1548 len_b = Py_SIZE(b);
1549 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001550 if (min_len > 0) {
1551 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001552 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001553 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001554 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001555 else
1556 c = 0;
1557 if (c == 0)
1558 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1559 switch (op) {
1560 case Py_LT: c = c < 0; break;
1561 case Py_LE: c = c <= 0; break;
1562 case Py_GT: c = c > 0; break;
1563 case Py_GE: c = c >= 0; break;
1564 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001565 PyErr_BadArgument();
1566 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001567 }
1568 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001569 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001570
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001571 Py_INCREF(result);
1572 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001573}
1574
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001575static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001576bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001577{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001578 if (a->ob_shash == -1) {
1579 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001580 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001581 }
1582 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001583}
1584
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001585static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001586bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001587{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001588 if (PyIndex_Check(item)) {
1589 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1590 if (i == -1 && PyErr_Occurred())
1591 return NULL;
1592 if (i < 0)
1593 i += PyBytes_GET_SIZE(self);
1594 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1595 PyErr_SetString(PyExc_IndexError,
1596 "index out of range");
1597 return NULL;
1598 }
1599 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1600 }
1601 else if (PySlice_Check(item)) {
1602 Py_ssize_t start, stop, step, slicelength, cur, i;
1603 char* source_buf;
1604 char* result_buf;
1605 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001606
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001607 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001608 PyBytes_GET_SIZE(self),
1609 &start, &stop, &step, &slicelength) < 0) {
1610 return NULL;
1611 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001612
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001613 if (slicelength <= 0) {
1614 return PyBytes_FromStringAndSize("", 0);
1615 }
1616 else if (start == 0 && step == 1 &&
1617 slicelength == PyBytes_GET_SIZE(self) &&
1618 PyBytes_CheckExact(self)) {
1619 Py_INCREF(self);
1620 return (PyObject *)self;
1621 }
1622 else if (step == 1) {
1623 return PyBytes_FromStringAndSize(
1624 PyBytes_AS_STRING(self) + start,
1625 slicelength);
1626 }
1627 else {
1628 source_buf = PyBytes_AS_STRING(self);
1629 result = PyBytes_FromStringAndSize(NULL, slicelength);
1630 if (result == NULL)
1631 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001632
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001633 result_buf = PyBytes_AS_STRING(result);
1634 for (cur = start, i = 0; i < slicelength;
1635 cur += step, i++) {
1636 result_buf[i] = source_buf[cur];
1637 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001638
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001639 return result;
1640 }
1641 }
1642 else {
1643 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001644 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001645 Py_TYPE(item)->tp_name);
1646 return NULL;
1647 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001648}
1649
1650static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001651bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001652{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001653 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1654 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001655}
1656
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001657static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001658 (lenfunc)bytes_length, /*sq_length*/
1659 (binaryfunc)bytes_concat, /*sq_concat*/
1660 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1661 (ssizeargfunc)bytes_item, /*sq_item*/
1662 0, /*sq_slice*/
1663 0, /*sq_ass_item*/
1664 0, /*sq_ass_slice*/
1665 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001666};
1667
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001668static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001669 (lenfunc)bytes_length,
1670 (binaryfunc)bytes_subscript,
1671 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001672};
1673
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001674static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001675 (getbufferproc)bytes_buffer_getbuffer,
1676 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001677};
1678
1679
1680#define LEFTSTRIP 0
1681#define RIGHTSTRIP 1
1682#define BOTHSTRIP 2
1683
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001684/*[clinic input]
1685bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001686
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001687 sep: object = None
1688 The delimiter according which to split the bytes.
1689 None (the default value) means split on ASCII whitespace characters
1690 (space, tab, return, newline, formfeed, vertical tab).
1691 maxsplit: Py_ssize_t = -1
1692 Maximum number of splits to do.
1693 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001694
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001695Return a list of the sections in the bytes, using sep as the delimiter.
1696[clinic start generated code]*/
1697
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001698static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001699bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001700/*[clinic end generated code: output=8bde44dacb36ef2e input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001701{
1702 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001703 const char *s = PyBytes_AS_STRING(self), *sub;
1704 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001705 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001706
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001707 if (maxsplit < 0)
1708 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001709 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001710 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001711 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001712 return NULL;
1713 sub = vsub.buf;
1714 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001715
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001716 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1717 PyBuffer_Release(&vsub);
1718 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001719}
1720
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001721/*[clinic input]
1722bytes.partition
1723
1724 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001725 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001726 /
1727
1728Partition the bytes into three parts using the given separator.
1729
1730This will search for the separator sep in the bytes. If the separator is found,
1731returns a 3-tuple containing the part before the separator, the separator
1732itself, and the part after it.
1733
1734If the separator is not found, returns a 3-tuple containing the original bytes
1735object and two empty bytes objects.
1736[clinic start generated code]*/
1737
Neal Norwitz6968b052007-02-27 19:02:19 +00001738static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001739bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001740/*[clinic end generated code: output=f532b392a17ff695 input=bc855dc63ca949de]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001741{
Neal Norwitz6968b052007-02-27 19:02:19 +00001742 return stringlib_partition(
1743 (PyObject*) self,
1744 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001745 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001746 );
1747}
1748
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001749/*[clinic input]
1750bytes.rpartition
1751
1752 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001753 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001754 /
1755
1756Partition the bytes into three parts using the given separator.
1757
1758This will search for the separator sep in the bytes, starting and the end. If
1759the separator is found, returns a 3-tuple containing the part before the
1760separator, the separator itself, and the part after it.
1761
1762If the separator is not found, returns a 3-tuple containing two empty bytes
1763objects and the original bytes object.
1764[clinic start generated code]*/
1765
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001766static PyObject *
1767bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001768/*[clinic end generated code: output=191b114cbb028e50 input=6588fff262a9170e]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001769{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001770 return stringlib_rpartition(
1771 (PyObject*) self,
1772 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001773 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001774 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001775}
1776
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001777/*[clinic input]
1778bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001779
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001780Return a list of the sections in the bytes, using sep as the delimiter.
1781
1782Splitting is done starting at the end of the bytes and working to the front.
1783[clinic start generated code]*/
1784
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001785static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001786bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001787/*[clinic end generated code: output=0b6570b977911d88 input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001788{
1789 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001790 const char *s = PyBytes_AS_STRING(self), *sub;
1791 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001792 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001793
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001794 if (maxsplit < 0)
1795 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001796 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001797 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001798 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001799 return NULL;
1800 sub = vsub.buf;
1801 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001802
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001803 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1804 PyBuffer_Release(&vsub);
1805 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001806}
1807
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001808
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001809/*[clinic input]
1810bytes.join
1811
1812 iterable_of_bytes: object
1813 /
1814
1815Concatenate any number of bytes objects.
1816
1817The bytes whose method is called is inserted in between each pair.
1818
1819The result is returned as a new bytes object.
1820
1821Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1822[clinic start generated code]*/
1823
Neal Norwitz6968b052007-02-27 19:02:19 +00001824static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001825bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001826/*[clinic end generated code: output=634aff14764ff997 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001827{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001828 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001829}
1830
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001831PyObject *
1832_PyBytes_Join(PyObject *sep, PyObject *x)
1833{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001834 assert(sep != NULL && PyBytes_Check(sep));
1835 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001836 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001837}
1838
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001839/* helper macro to fixup start/end slice values */
1840#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001841 if (end > len) \
1842 end = len; \
1843 else if (end < 0) { \
1844 end += len; \
1845 if (end < 0) \
1846 end = 0; \
1847 } \
1848 if (start < 0) { \
1849 start += len; \
1850 if (start < 0) \
1851 start = 0; \
1852 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001853
1854Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001855bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001856{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001857 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001858 char byte;
1859 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001860 const char *sub;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001861 Py_ssize_t len, sub_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001862 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001863 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001864
Antoine Pitrouac65d962011-10-20 23:54:17 +02001865 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1866 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001867 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001868
Antoine Pitrouac65d962011-10-20 23:54:17 +02001869 if (subobj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001870 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001871 return -2;
1872
1873 sub = subbuf.buf;
1874 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001875 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001876 else {
1877 sub = &byte;
1878 sub_len = 1;
1879 }
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001880 len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001881
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001882 ADJUST_INDICES(start, end, len);
1883 if (end - start < sub_len)
1884 res = -1;
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001885 else if (sub_len == 1
1886#ifndef HAVE_MEMRCHR
1887 && dir > 0
1888#endif
1889 ) {
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001890 unsigned char needle = *sub;
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001891 int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001892 res = stringlib_fastsearch_memchr_1char(
1893 PyBytes_AS_STRING(self) + start, end - start,
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001894 needle, needle, mode);
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001895 if (res >= 0)
1896 res += start;
1897 }
1898 else {
1899 if (dir > 0)
1900 res = stringlib_find_slice(
1901 PyBytes_AS_STRING(self), len,
1902 sub, sub_len, start, end);
1903 else
1904 res = stringlib_rfind_slice(
1905 PyBytes_AS_STRING(self), len,
1906 sub, sub_len, start, end);
1907 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001908
1909 if (subobj)
1910 PyBuffer_Release(&subbuf);
1911
1912 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001913}
1914
1915
1916PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001917"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001918\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001919Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001920such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001921arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001922\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001923Return -1 on failure.");
1924
Neal Norwitz6968b052007-02-27 19:02:19 +00001925static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001926bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001927{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001928 Py_ssize_t result = bytes_find_internal(self, args, +1);
1929 if (result == -2)
1930 return NULL;
1931 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001932}
1933
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001934
1935PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001936"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001937\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001938Like B.find() but raise ValueError when the substring is not found.");
1939
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001940static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001941bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001942{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001943 Py_ssize_t result = bytes_find_internal(self, args, +1);
1944 if (result == -2)
1945 return NULL;
1946 if (result == -1) {
1947 PyErr_SetString(PyExc_ValueError,
1948 "substring not found");
1949 return NULL;
1950 }
1951 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001952}
1953
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001954
1955PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001956"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001957\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001958Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001959such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001960arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001961\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001962Return -1 on failure.");
1963
Neal Norwitz6968b052007-02-27 19:02:19 +00001964static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001965bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001966{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001967 Py_ssize_t result = bytes_find_internal(self, args, -1);
1968 if (result == -2)
1969 return NULL;
1970 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001971}
1972
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001973
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001974PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001975"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001976\n\
1977Like B.rfind() but raise ValueError when the substring is not found.");
1978
1979static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001980bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001981{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001982 Py_ssize_t result = bytes_find_internal(self, args, -1);
1983 if (result == -2)
1984 return NULL;
1985 if (result == -1) {
1986 PyErr_SetString(PyExc_ValueError,
1987 "substring not found");
1988 return NULL;
1989 }
1990 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001991}
1992
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001993
1994Py_LOCAL_INLINE(PyObject *)
1995do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001996{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001997 Py_buffer vsep;
1998 char *s = PyBytes_AS_STRING(self);
1999 Py_ssize_t len = PyBytes_GET_SIZE(self);
2000 char *sep;
2001 Py_ssize_t seplen;
2002 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002003
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002004 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002005 return NULL;
2006 sep = vsep.buf;
2007 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002008
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002009 i = 0;
2010 if (striptype != RIGHTSTRIP) {
2011 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2012 i++;
2013 }
2014 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002015
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002016 j = len;
2017 if (striptype != LEFTSTRIP) {
2018 do {
2019 j--;
2020 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2021 j++;
2022 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002023
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002024 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002025
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002026 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2027 Py_INCREF(self);
2028 return (PyObject*)self;
2029 }
2030 else
2031 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002032}
2033
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002034
2035Py_LOCAL_INLINE(PyObject *)
2036do_strip(PyBytesObject *self, int striptype)
2037{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002038 char *s = PyBytes_AS_STRING(self);
2039 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002040
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002041 i = 0;
2042 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00002043 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002044 i++;
2045 }
2046 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002047
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002048 j = len;
2049 if (striptype != LEFTSTRIP) {
2050 do {
2051 j--;
David Malcolm96960882010-11-05 17:23:41 +00002052 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002053 j++;
2054 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002055
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002056 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2057 Py_INCREF(self);
2058 return (PyObject*)self;
2059 }
2060 else
2061 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002062}
2063
2064
2065Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002066do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002067{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002068 if (bytes != NULL && bytes != Py_None) {
2069 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002070 }
2071 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002072}
2073
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002074/*[clinic input]
2075bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002076
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002077 self: self(type="PyBytesObject *")
2078 bytes: object = None
2079 /
2080
2081Strip leading and trailing bytes contained in the argument.
2082
2083If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2084[clinic start generated code]*/
2085
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002086static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002087bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002088/*[clinic end generated code: output=c7c228d3bd104a1b input=37daa5fad1395d95]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002089{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002090 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002091}
2092
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002093/*[clinic input]
2094bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002095
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002096 self: self(type="PyBytesObject *")
2097 bytes: object = None
2098 /
2099
2100Strip leading bytes contained in the argument.
2101
2102If the argument is omitted or None, strip leading ASCII whitespace.
2103[clinic start generated code]*/
2104
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002105static PyObject *
2106bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002107/*[clinic end generated code: output=28602e586f524e82 input=88811b09dfbc2988]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002108{
2109 return do_argstrip(self, LEFTSTRIP, bytes);
2110}
2111
2112/*[clinic input]
2113bytes.rstrip
2114
2115 self: self(type="PyBytesObject *")
2116 bytes: object = None
2117 /
2118
2119Strip trailing bytes contained in the argument.
2120
2121If the argument is omitted or None, strip trailing ASCII whitespace.
2122[clinic start generated code]*/
2123
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002124static PyObject *
2125bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002126/*[clinic end generated code: output=547e3815c95447da input=8f93c9cd361f0140]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002127{
2128 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002129}
Neal Norwitz6968b052007-02-27 19:02:19 +00002130
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002131
2132PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002133"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002134\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002135Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002136string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002137as in slice notation.");
2138
2139static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002140bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002141{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002142 PyObject *sub_obj;
2143 const char *str = PyBytes_AS_STRING(self), *sub;
2144 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02002145 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002146 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002147
Antoine Pitrouac65d962011-10-20 23:54:17 +02002148 Py_buffer vsub;
2149 PyObject *count_obj;
2150
2151 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
2152 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002153 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002154
Antoine Pitrouac65d962011-10-20 23:54:17 +02002155 if (sub_obj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002156 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02002157 return NULL;
2158
2159 sub = vsub.buf;
2160 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002161 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02002162 else {
2163 sub = &byte;
2164 sub_len = 1;
2165 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002166
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002167 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002168
Antoine Pitrouac65d962011-10-20 23:54:17 +02002169 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002170 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2171 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02002172
2173 if (sub_obj)
2174 PyBuffer_Release(&vsub);
2175
2176 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002177}
2178
2179
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002180/*[clinic input]
2181bytes.translate
2182
2183 self: self(type="PyBytesObject *")
Victor Stinner049e5092014-08-17 22:20:00 +02002184 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002185 Translation table, which must be a bytes object of length 256.
2186 [
2187 deletechars: object
2188 ]
2189 /
2190
2191Return a copy with each character mapped by the given translation table.
2192
2193All characters occurring in the optional argument deletechars are removed.
2194The remaining characters are mapped through the given translation table.
2195[clinic start generated code]*/
2196
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002197static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002198bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1,
2199 PyObject *deletechars)
2200/*[clinic end generated code: output=233df850eb50bf8d input=d8fa5519d7cc4be7]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002201{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002202 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002203 Py_buffer table_view = {NULL, NULL};
2204 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002205 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002206 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002207 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002208 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002209 Py_ssize_t inlen, tablen, dellen = 0;
2210 PyObject *result;
2211 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002212
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002213 if (PyBytes_Check(table)) {
2214 table_chars = PyBytes_AS_STRING(table);
2215 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002216 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002217 else if (table == Py_None) {
2218 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002219 tablen = 256;
2220 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002221 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002222 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002223 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002224 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002225 tablen = table_view.len;
2226 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002227
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002228 if (tablen != 256) {
2229 PyErr_SetString(PyExc_ValueError,
2230 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002231 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002232 return NULL;
2233 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002234
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002235 if (deletechars != NULL) {
2236 if (PyBytes_Check(deletechars)) {
2237 del_table_chars = PyBytes_AS_STRING(deletechars);
2238 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002239 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002240 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002241 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002242 PyBuffer_Release(&table_view);
2243 return NULL;
2244 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002245 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002246 dellen = del_table_view.len;
2247 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002248 }
2249 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002250 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002251 dellen = 0;
2252 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002253
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002254 inlen = PyBytes_GET_SIZE(input_obj);
2255 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002256 if (result == NULL) {
2257 PyBuffer_Release(&del_table_view);
2258 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002259 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002260 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002261 output_start = output = PyBytes_AsString(result);
2262 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002263
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002264 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002265 /* If no deletions are required, use faster code */
2266 for (i = inlen; --i >= 0; ) {
2267 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002268 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002269 changed = 1;
2270 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002271 if (!changed && PyBytes_CheckExact(input_obj)) {
2272 Py_INCREF(input_obj);
2273 Py_DECREF(result);
2274 result = input_obj;
2275 }
2276 PyBuffer_Release(&del_table_view);
2277 PyBuffer_Release(&table_view);
2278 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002279 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002280
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002281 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002282 for (i = 0; i < 256; i++)
2283 trans_table[i] = Py_CHARMASK(i);
2284 } else {
2285 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002286 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002287 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002288 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002289
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002290 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002291 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002292 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002293
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002294 for (i = inlen; --i >= 0; ) {
2295 c = Py_CHARMASK(*input++);
2296 if (trans_table[c] != -1)
2297 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2298 continue;
2299 changed = 1;
2300 }
2301 if (!changed && PyBytes_CheckExact(input_obj)) {
2302 Py_DECREF(result);
2303 Py_INCREF(input_obj);
2304 return input_obj;
2305 }
2306 /* Fix the size of the resulting string */
2307 if (inlen > 0)
2308 _PyBytes_Resize(&result, output - output_start);
2309 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002310}
2311
2312
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002313/*[clinic input]
2314
2315@staticmethod
2316bytes.maketrans
2317
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002318 frm: Py_buffer
2319 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002320 /
2321
2322Return a translation table useable for the bytes or bytearray translate method.
2323
2324The returned table will be one where each byte in frm is mapped to the byte at
2325the same position in to.
2326
2327The bytes objects frm and to must be of the same length.
2328[clinic start generated code]*/
2329
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002330static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002331bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002332/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002333{
2334 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002335}
2336
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002337/* find and count characters and substrings */
2338
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002339#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002340 ((char *)memchr((const void *)(target), c, target_len))
2341
2342/* String ops must return a string. */
2343/* If the object is subclass of string, create a copy */
2344Py_LOCAL(PyBytesObject *)
2345return_self(PyBytesObject *self)
2346{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002347 if (PyBytes_CheckExact(self)) {
2348 Py_INCREF(self);
2349 return self;
2350 }
2351 return (PyBytesObject *)PyBytes_FromStringAndSize(
2352 PyBytes_AS_STRING(self),
2353 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002354}
2355
2356Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00002357countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002358{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002359 Py_ssize_t count=0;
2360 const char *start=target;
2361 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002362
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002363 while ( (start=findchar(start, end-start, c)) != NULL ) {
2364 count++;
2365 if (count >= maxcount)
2366 break;
2367 start += 1;
2368 }
2369 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002370}
2371
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002372
2373/* Algorithms for different cases of string replacement */
2374
2375/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2376Py_LOCAL(PyBytesObject *)
2377replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002378 const char *to_s, Py_ssize_t to_len,
2379 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002380{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002381 char *self_s, *result_s;
2382 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002383 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002384 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002385
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002386 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002387
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002388 /* 1 at the end plus 1 after every character;
2389 count = min(maxcount, self_len + 1) */
2390 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002391 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002392 else
2393 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2394 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002396 /* Check for overflow */
2397 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002398 assert(count > 0);
2399 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002400 PyErr_SetString(PyExc_OverflowError,
2401 "replacement bytes are too long");
2402 return NULL;
2403 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002404 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002405
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002406 if (! (result = (PyBytesObject *)
2407 PyBytes_FromStringAndSize(NULL, result_len)) )
2408 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002409
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002410 self_s = PyBytes_AS_STRING(self);
2411 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002412
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002413 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002414
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002415 /* Lay the first one down (guaranteed this will occur) */
2416 Py_MEMCPY(result_s, to_s, to_len);
2417 result_s += to_len;
2418 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002419
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002420 for (i=0; i<count; i++) {
2421 *result_s++ = *self_s++;
2422 Py_MEMCPY(result_s, to_s, to_len);
2423 result_s += to_len;
2424 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002425
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002426 /* Copy the rest of the original string */
2427 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002428
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002429 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002430}
2431
2432/* Special case for deleting a single character */
2433/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2434Py_LOCAL(PyBytesObject *)
2435replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002436 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002437{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002438 char *self_s, *result_s;
2439 char *start, *next, *end;
2440 Py_ssize_t self_len, result_len;
2441 Py_ssize_t count;
2442 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002443
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002444 self_len = PyBytes_GET_SIZE(self);
2445 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002446
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002447 count = countchar(self_s, self_len, from_c, maxcount);
2448 if (count == 0) {
2449 return return_self(self);
2450 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002451
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002452 result_len = self_len - count; /* from_len == 1 */
2453 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002454
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002455 if ( (result = (PyBytesObject *)
2456 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2457 return NULL;
2458 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002459
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002460 start = self_s;
2461 end = self_s + self_len;
2462 while (count-- > 0) {
2463 next = findchar(start, end-start, from_c);
2464 if (next == NULL)
2465 break;
2466 Py_MEMCPY(result_s, start, next-start);
2467 result_s += (next-start);
2468 start = next+1;
2469 }
2470 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002471
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002472 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002473}
2474
2475/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2476
2477Py_LOCAL(PyBytesObject *)
2478replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002479 const char *from_s, Py_ssize_t from_len,
2480 Py_ssize_t maxcount) {
2481 char *self_s, *result_s;
2482 char *start, *next, *end;
2483 Py_ssize_t self_len, result_len;
2484 Py_ssize_t count, offset;
2485 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002486
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002487 self_len = PyBytes_GET_SIZE(self);
2488 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002489
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002490 count = stringlib_count(self_s, self_len,
2491 from_s, from_len,
2492 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002493
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002494 if (count == 0) {
2495 /* no matches */
2496 return return_self(self);
2497 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002498
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002499 result_len = self_len - (count * from_len);
2500 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002501
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002502 if ( (result = (PyBytesObject *)
2503 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2504 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002505
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002506 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002507
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002508 start = self_s;
2509 end = self_s + self_len;
2510 while (count-- > 0) {
2511 offset = stringlib_find(start, end-start,
2512 from_s, from_len,
2513 0);
2514 if (offset == -1)
2515 break;
2516 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002517
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002518 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002519
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002520 result_s += (next-start);
2521 start = next+from_len;
2522 }
2523 Py_MEMCPY(result_s, start, end-start);
2524 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002525}
2526
2527/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2528Py_LOCAL(PyBytesObject *)
2529replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002530 char from_c, char to_c,
2531 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002532{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002533 char *self_s, *result_s, *start, *end, *next;
2534 Py_ssize_t self_len;
2535 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002536
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002537 /* The result string will be the same size */
2538 self_s = PyBytes_AS_STRING(self);
2539 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002540
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002541 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002542
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002543 if (next == NULL) {
2544 /* No matches; return the original string */
2545 return return_self(self);
2546 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002547
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002548 /* Need to make a new string */
2549 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2550 if (result == NULL)
2551 return NULL;
2552 result_s = PyBytes_AS_STRING(result);
2553 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002554
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002555 /* change everything in-place, starting with this one */
2556 start = result_s + (next-self_s);
2557 *start = to_c;
2558 start++;
2559 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002560
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002561 while (--maxcount > 0) {
2562 next = findchar(start, end-start, from_c);
2563 if (next == NULL)
2564 break;
2565 *next = to_c;
2566 start = next+1;
2567 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002568
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002569 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002570}
2571
2572/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2573Py_LOCAL(PyBytesObject *)
2574replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002575 const char *from_s, Py_ssize_t from_len,
2576 const char *to_s, Py_ssize_t to_len,
2577 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002578{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002579 char *result_s, *start, *end;
2580 char *self_s;
2581 Py_ssize_t self_len, offset;
2582 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002583
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002584 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002585
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002586 self_s = PyBytes_AS_STRING(self);
2587 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002588
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002589 offset = stringlib_find(self_s, self_len,
2590 from_s, from_len,
2591 0);
2592 if (offset == -1) {
2593 /* No matches; return the original string */
2594 return return_self(self);
2595 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002596
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002597 /* Need to make a new string */
2598 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2599 if (result == NULL)
2600 return NULL;
2601 result_s = PyBytes_AS_STRING(result);
2602 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002603
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002604 /* change everything in-place, starting with this one */
2605 start = result_s + offset;
2606 Py_MEMCPY(start, to_s, from_len);
2607 start += from_len;
2608 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002609
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002610 while ( --maxcount > 0) {
2611 offset = stringlib_find(start, end-start,
2612 from_s, from_len,
2613 0);
2614 if (offset==-1)
2615 break;
2616 Py_MEMCPY(start+offset, to_s, from_len);
2617 start += offset+from_len;
2618 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002619
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002620 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002621}
2622
2623/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2624Py_LOCAL(PyBytesObject *)
2625replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002626 char from_c,
2627 const char *to_s, Py_ssize_t to_len,
2628 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002629{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002630 char *self_s, *result_s;
2631 char *start, *next, *end;
2632 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002633 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002634 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002635
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002636 self_s = PyBytes_AS_STRING(self);
2637 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002638
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002639 count = countchar(self_s, self_len, from_c, maxcount);
2640 if (count == 0) {
2641 /* no matches, return unchanged */
2642 return return_self(self);
2643 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002644
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002645 /* use the difference between current and new, hence the "-1" */
2646 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002647 assert(count > 0);
2648 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002649 PyErr_SetString(PyExc_OverflowError,
2650 "replacement bytes are too long");
2651 return NULL;
2652 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002653 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002654
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002655 if ( (result = (PyBytesObject *)
2656 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2657 return NULL;
2658 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002659
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002660 start = self_s;
2661 end = self_s + self_len;
2662 while (count-- > 0) {
2663 next = findchar(start, end-start, from_c);
2664 if (next == NULL)
2665 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002666
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002667 if (next == start) {
2668 /* replace with the 'to' */
2669 Py_MEMCPY(result_s, to_s, to_len);
2670 result_s += to_len;
2671 start += 1;
2672 } else {
2673 /* copy the unchanged old then the 'to' */
2674 Py_MEMCPY(result_s, start, next-start);
2675 result_s += (next-start);
2676 Py_MEMCPY(result_s, to_s, to_len);
2677 result_s += to_len;
2678 start = next+1;
2679 }
2680 }
2681 /* Copy the remainder of the remaining string */
2682 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002684 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002685}
2686
2687/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2688Py_LOCAL(PyBytesObject *)
2689replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002690 const char *from_s, Py_ssize_t from_len,
2691 const char *to_s, Py_ssize_t to_len,
2692 Py_ssize_t maxcount) {
2693 char *self_s, *result_s;
2694 char *start, *next, *end;
2695 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002696 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002697 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002699 self_s = PyBytes_AS_STRING(self);
2700 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002701
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002702 count = stringlib_count(self_s, self_len,
2703 from_s, from_len,
2704 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002705
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002706 if (count == 0) {
2707 /* no matches, return unchanged */
2708 return return_self(self);
2709 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002710
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002711 /* Check for overflow */
2712 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002713 assert(count > 0);
2714 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002715 PyErr_SetString(PyExc_OverflowError,
2716 "replacement bytes are too long");
2717 return NULL;
2718 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002719 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002720
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002721 if ( (result = (PyBytesObject *)
2722 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2723 return NULL;
2724 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002725
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002726 start = self_s;
2727 end = self_s + self_len;
2728 while (count-- > 0) {
2729 offset = stringlib_find(start, end-start,
2730 from_s, from_len,
2731 0);
2732 if (offset == -1)
2733 break;
2734 next = start+offset;
2735 if (next == start) {
2736 /* replace with the 'to' */
2737 Py_MEMCPY(result_s, to_s, to_len);
2738 result_s += to_len;
2739 start += from_len;
2740 } else {
2741 /* copy the unchanged old then the 'to' */
2742 Py_MEMCPY(result_s, start, next-start);
2743 result_s += (next-start);
2744 Py_MEMCPY(result_s, to_s, to_len);
2745 result_s += to_len;
2746 start = next+from_len;
2747 }
2748 }
2749 /* Copy the remainder of the remaining string */
2750 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002751
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002752 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002753}
2754
2755
2756Py_LOCAL(PyBytesObject *)
2757replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002758 const char *from_s, Py_ssize_t from_len,
2759 const char *to_s, Py_ssize_t to_len,
2760 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002761{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002762 if (maxcount < 0) {
2763 maxcount = PY_SSIZE_T_MAX;
2764 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2765 /* nothing to do; return the original string */
2766 return return_self(self);
2767 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002768
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002769 if (maxcount == 0 ||
2770 (from_len == 0 && to_len == 0)) {
2771 /* nothing to do; return the original string */
2772 return return_self(self);
2773 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002774
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002775 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002776
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002777 if (from_len == 0) {
2778 /* insert the 'to' string everywhere. */
2779 /* >>> "Python".replace("", ".") */
2780 /* '.P.y.t.h.o.n.' */
2781 return replace_interleave(self, to_s, to_len, maxcount);
2782 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002783
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002784 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2785 /* point for an empty self string to generate a non-empty string */
2786 /* Special case so the remaining code always gets a non-empty string */
2787 if (PyBytes_GET_SIZE(self) == 0) {
2788 return return_self(self);
2789 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002790
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002791 if (to_len == 0) {
2792 /* delete all occurrences of 'from' string */
2793 if (from_len == 1) {
2794 return replace_delete_single_character(
2795 self, from_s[0], maxcount);
2796 } else {
2797 return replace_delete_substring(self, from_s,
2798 from_len, maxcount);
2799 }
2800 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002801
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002802 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002803
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002804 if (from_len == to_len) {
2805 if (from_len == 1) {
2806 return replace_single_character_in_place(
2807 self,
2808 from_s[0],
2809 to_s[0],
2810 maxcount);
2811 } else {
2812 return replace_substring_in_place(
2813 self, from_s, from_len, to_s, to_len,
2814 maxcount);
2815 }
2816 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002817
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002818 /* Otherwise use the more generic algorithms */
2819 if (from_len == 1) {
2820 return replace_single_character(self, from_s[0],
2821 to_s, to_len, maxcount);
2822 } else {
2823 /* len('from')>=2, len('to')>=1 */
2824 return replace_substring(self, from_s, from_len, to_s, to_len,
2825 maxcount);
2826 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002827}
2828
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002829
2830/*[clinic input]
2831bytes.replace
2832
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002833 old: Py_buffer
2834 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002835 count: Py_ssize_t = -1
2836 Maximum number of occurrences to replace.
2837 -1 (the default value) means replace all occurrences.
2838 /
2839
2840Return a copy with all occurrences of substring old replaced by new.
2841
2842If the optional argument count is given, only the first count occurrences are
2843replaced.
2844[clinic start generated code]*/
2845
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002846static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002847bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new,
2848 Py_ssize_t count)
2849/*[clinic end generated code: output=403dc9d7a83c5a1d input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002850{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002851 return (PyObject *)replace((PyBytesObject *) self,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002852 (const char *)old->buf, old->len,
2853 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002854}
2855
2856/** End DALKE **/
2857
2858/* Matches the end (direction >= 0) or start (direction < 0) of self
2859 * against substr, using the start and end arguments. Returns
2860 * -1 on error, 0 if not found and 1 if found.
2861 */
2862Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002863_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002864 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002865{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002866 Py_ssize_t len = PyBytes_GET_SIZE(self);
2867 Py_ssize_t slen;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002868 Py_buffer sub_view = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002869 const char* sub;
2870 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002871
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002872 if (PyBytes_Check(substr)) {
2873 sub = PyBytes_AS_STRING(substr);
2874 slen = PyBytes_GET_SIZE(substr);
2875 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002876 else {
2877 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
2878 return -1;
2879 sub = sub_view.buf;
2880 slen = sub_view.len;
2881 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002882 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002883
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002884 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002885
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002886 if (direction < 0) {
2887 /* startswith */
2888 if (start+slen > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002889 goto notfound;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002890 } else {
2891 /* endswith */
2892 if (end-start < slen || start > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002893 goto notfound;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002894
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002895 if (end-slen > start)
2896 start = end - slen;
2897 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002898 if (end-start < slen)
2899 goto notfound;
2900 if (memcmp(str+start, sub, slen) != 0)
2901 goto notfound;
2902
2903 PyBuffer_Release(&sub_view);
2904 return 1;
2905
2906notfound:
2907 PyBuffer_Release(&sub_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002908 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002909}
2910
2911
2912PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002913"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002914\n\
2915Return True if B starts with the specified prefix, False otherwise.\n\
2916With optional start, test B beginning at that position.\n\
2917With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002918prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002919
2920static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002921bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002922{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002923 Py_ssize_t start = 0;
2924 Py_ssize_t end = PY_SSIZE_T_MAX;
2925 PyObject *subobj;
2926 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002927
Jesus Ceaac451502011-04-20 17:09:23 +02002928 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002929 return NULL;
2930 if (PyTuple_Check(subobj)) {
2931 Py_ssize_t i;
2932 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2933 result = _bytes_tailmatch(self,
2934 PyTuple_GET_ITEM(subobj, i),
2935 start, end, -1);
2936 if (result == -1)
2937 return NULL;
2938 else if (result) {
2939 Py_RETURN_TRUE;
2940 }
2941 }
2942 Py_RETURN_FALSE;
2943 }
2944 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002945 if (result == -1) {
2946 if (PyErr_ExceptionMatches(PyExc_TypeError))
2947 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2948 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002949 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002950 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002951 else
2952 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002953}
2954
2955
2956PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002957"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002958\n\
2959Return True if B ends with the specified suffix, False otherwise.\n\
2960With optional start, test B beginning at that position.\n\
2961With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002962suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002963
2964static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002965bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002966{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002967 Py_ssize_t start = 0;
2968 Py_ssize_t end = PY_SSIZE_T_MAX;
2969 PyObject *subobj;
2970 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002971
Jesus Ceaac451502011-04-20 17:09:23 +02002972 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002973 return NULL;
2974 if (PyTuple_Check(subobj)) {
2975 Py_ssize_t i;
2976 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2977 result = _bytes_tailmatch(self,
2978 PyTuple_GET_ITEM(subobj, i),
2979 start, end, +1);
2980 if (result == -1)
2981 return NULL;
2982 else if (result) {
2983 Py_RETURN_TRUE;
2984 }
2985 }
2986 Py_RETURN_FALSE;
2987 }
2988 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002989 if (result == -1) {
2990 if (PyErr_ExceptionMatches(PyExc_TypeError))
2991 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2992 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002993 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002994 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002995 else
2996 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002997}
2998
2999
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003000/*[clinic input]
3001bytes.decode
3002
3003 encoding: str(c_default="NULL") = 'utf-8'
3004 The encoding with which to decode the bytes.
3005 errors: str(c_default="NULL") = 'strict'
3006 The error handling scheme to use for the handling of decoding errors.
3007 The default is 'strict' meaning that decoding errors raise a
3008 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
3009 as well as any other name registered with codecs.register_error that
3010 can handle UnicodeDecodeErrors.
3011
3012Decode the bytes using the codec registered for encoding.
3013[clinic start generated code]*/
3014
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003015static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04003016bytes_decode_impl(PyBytesObject*self, const char *encoding,
3017 const char *errors)
3018/*[clinic end generated code: output=2d2016ff8e0bb176 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003019{
Martin v. Löwis0efea322014-07-27 17:29:17 +02003020 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00003021}
3022
Guido van Rossum20188312006-05-05 15:15:40 +00003023
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003024/*[clinic input]
3025bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003026
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03003027 keepends: int(c_default="0") = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003028
3029Return a list of the lines in the bytes, breaking at line boundaries.
3030
3031Line breaks are not included in the resulting list unless keepends is given and
3032true.
3033[clinic start generated code]*/
3034
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003035static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003036bytes_splitlines_impl(PyBytesObject*self, int keepends)
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03003037/*[clinic end generated code: output=995c3598f7833cad input=7f4aac67144f9944]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003038{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003039 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00003040 (PyObject*) self, PyBytes_AS_STRING(self),
3041 PyBytes_GET_SIZE(self), keepends
3042 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003043}
3044
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003045static int
Victor Stinner6430fd52011-09-29 04:02:13 +02003046hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003047{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003048 if (c >= 128)
3049 return -1;
David Malcolm96960882010-11-05 17:23:41 +00003050 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003051 return c - '0';
3052 else {
David Malcolm96960882010-11-05 17:23:41 +00003053 if (Py_ISUPPER(c))
3054 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003055 if (c >= 'a' && c <= 'f')
3056 return c - 'a' + 10;
3057 }
3058 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003059}
3060
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003061/*[clinic input]
3062@classmethod
3063bytes.fromhex
3064
3065 string: unicode
3066 /
3067
3068Create a bytes object from a string of hexadecimal numbers.
3069
3070Spaces between two numbers are accepted.
3071Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
3072[clinic start generated code]*/
3073
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003074static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003075bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03003076/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003077{
3078 PyObject *newstring;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003079 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003080 Py_ssize_t hexlen, byteslen, i, j;
3081 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003082 void *data;
3083 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003084
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003085 assert(PyUnicode_Check(string));
3086 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003087 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003088 kind = PyUnicode_KIND(string);
3089 data = PyUnicode_DATA(string);
3090 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003092 byteslen = hexlen/2; /* This overestimates if there are spaces */
3093 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
3094 if (!newstring)
3095 return NULL;
3096 buf = PyBytes_AS_STRING(newstring);
3097 for (i = j = 0; i < hexlen; i += 2) {
3098 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003099 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003100 i++;
3101 if (i >= hexlen)
3102 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003103 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
3104 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003105 if (top == -1 || bot == -1) {
3106 PyErr_Format(PyExc_ValueError,
3107 "non-hexadecimal number found in "
3108 "fromhex() arg at position %zd", i);
3109 goto error;
3110 }
3111 buf[j++] = (top << 4) + bot;
3112 }
3113 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
3114 goto error;
3115 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003116
3117 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003118 Py_XDECREF(newstring);
3119 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003120}
3121
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003122PyDoc_STRVAR(hex__doc__,
3123"B.hex() -> string\n\
3124\n\
3125Create a string of hexadecimal numbers from a bytes object.\n\
3126Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
3127
3128static PyObject *
3129bytes_hex(PyBytesObject *self)
3130{
3131 char* argbuf = PyBytes_AS_STRING(self);
3132 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
3133 return _Py_strhex(argbuf, arglen);
3134}
3135
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003136static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003137bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003138{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003139 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003140}
3141
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003142
3143static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003144bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003145 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
3146 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3147 _Py_capitalize__doc__},
3148 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3149 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003150 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003151 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
3152 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02003153 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003154 expandtabs__doc__},
3155 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003156 BYTES_FROMHEX_METHODDEF
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003157 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003158 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3159 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3160 _Py_isalnum__doc__},
3161 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3162 _Py_isalpha__doc__},
3163 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3164 _Py_isdigit__doc__},
3165 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3166 _Py_islower__doc__},
3167 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3168 _Py_isspace__doc__},
3169 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3170 _Py_istitle__doc__},
3171 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3172 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003173 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003174 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3175 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003176 BYTES_LSTRIP_METHODDEF
3177 BYTES_MAKETRANS_METHODDEF
3178 BYTES_PARTITION_METHODDEF
3179 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003180 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3181 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3182 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003183 BYTES_RPARTITION_METHODDEF
3184 BYTES_RSPLIT_METHODDEF
3185 BYTES_RSTRIP_METHODDEF
3186 BYTES_SPLIT_METHODDEF
3187 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003188 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
3189 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003190 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003191 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3192 _Py_swapcase__doc__},
3193 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003194 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003195 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3196 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003197 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003198};
3199
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003200static PyObject *
Ethan Furmanb95b5612015-01-23 20:05:18 -08003201bytes_mod(PyObject *v, PyObject *w)
3202{
3203 if (!PyBytes_Check(v))
3204 Py_RETURN_NOTIMPLEMENTED;
3205 return _PyBytes_Format(v, w);
3206}
3207
3208static PyNumberMethods bytes_as_number = {
3209 0, /*nb_add*/
3210 0, /*nb_subtract*/
3211 0, /*nb_multiply*/
3212 bytes_mod, /*nb_remainder*/
3213};
3214
3215static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003216str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3217
3218static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003219bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003220{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003221 PyObject *x = NULL;
3222 const char *encoding = NULL;
3223 const char *errors = NULL;
3224 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003225 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003226 Py_ssize_t size;
3227 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003228 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003229
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003230 if (type != &PyBytes_Type)
3231 return str_subtype_new(type, args, kwds);
3232 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3233 &encoding, &errors))
3234 return NULL;
3235 if (x == NULL) {
3236 if (encoding != NULL || errors != NULL) {
3237 PyErr_SetString(PyExc_TypeError,
3238 "encoding or errors without sequence "
3239 "argument");
3240 return NULL;
3241 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003242 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003243 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003244
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003245 if (PyUnicode_Check(x)) {
3246 /* Encode via the codec registry */
3247 if (encoding == NULL) {
3248 PyErr_SetString(PyExc_TypeError,
3249 "string argument without an encoding");
3250 return NULL;
3251 }
3252 new = PyUnicode_AsEncodedString(x, encoding, errors);
3253 if (new == NULL)
3254 return NULL;
3255 assert(PyBytes_Check(new));
3256 return new;
3257 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003258
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003259 /* If it's not unicode, there can't be encoding or errors */
3260 if (encoding != NULL || errors != NULL) {
3261 PyErr_SetString(PyExc_TypeError,
3262 "encoding or errors without a string argument");
3263 return NULL;
3264 }
3265
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003266 /* We'd like to call PyObject_Bytes here, but we need to check for an
3267 integer argument before deferring to PyBytes_FromObject, something
3268 PyObject_Bytes doesn't do. */
3269 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3270 if (func != NULL) {
3271 new = PyObject_CallFunctionObjArgs(func, NULL);
3272 Py_DECREF(func);
3273 if (new == NULL)
3274 return NULL;
3275 if (!PyBytes_Check(new)) {
3276 PyErr_Format(PyExc_TypeError,
3277 "__bytes__ returned non-bytes (type %.200s)",
3278 Py_TYPE(new)->tp_name);
3279 Py_DECREF(new);
3280 return NULL;
3281 }
3282 return new;
3283 }
3284 else if (PyErr_Occurred())
3285 return NULL;
3286
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003287 /* Is it an integer? */
3288 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3289 if (size == -1 && PyErr_Occurred()) {
3290 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3291 return NULL;
3292 PyErr_Clear();
3293 }
3294 else if (size < 0) {
3295 PyErr_SetString(PyExc_ValueError, "negative count");
3296 return NULL;
3297 }
3298 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003299 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003300 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003301 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003302 return new;
3303 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003304
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003305 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003306}
3307
3308PyObject *
3309PyBytes_FromObject(PyObject *x)
3310{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003311 PyObject *new, *it;
3312 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003313
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003314 if (x == NULL) {
3315 PyErr_BadInternalCall();
3316 return NULL;
3317 }
Larry Hastingsca28e992012-05-24 22:58:30 -07003318
3319 if (PyBytes_CheckExact(x)) {
3320 Py_INCREF(x);
3321 return x;
3322 }
3323
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003324 /* Use the modern buffer interface */
3325 if (PyObject_CheckBuffer(x)) {
3326 Py_buffer view;
3327 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3328 return NULL;
3329 new = PyBytes_FromStringAndSize(NULL, view.len);
3330 if (!new)
3331 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003332 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3333 &view, view.len, 'C') < 0)
3334 goto fail;
3335 PyBuffer_Release(&view);
3336 return new;
3337 fail:
3338 Py_XDECREF(new);
3339 PyBuffer_Release(&view);
3340 return NULL;
3341 }
3342 if (PyUnicode_Check(x)) {
3343 PyErr_SetString(PyExc_TypeError,
3344 "cannot convert unicode object to bytes");
3345 return NULL;
3346 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003347
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003348 if (PyList_CheckExact(x)) {
3349 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3350 if (new == NULL)
3351 return NULL;
3352 for (i = 0; i < Py_SIZE(x); i++) {
3353 Py_ssize_t value = PyNumber_AsSsize_t(
3354 PyList_GET_ITEM(x, i), PyExc_ValueError);
3355 if (value == -1 && PyErr_Occurred()) {
3356 Py_DECREF(new);
3357 return NULL;
3358 }
3359 if (value < 0 || value >= 256) {
3360 PyErr_SetString(PyExc_ValueError,
3361 "bytes must be in range(0, 256)");
3362 Py_DECREF(new);
3363 return NULL;
3364 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003365 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003366 }
3367 return new;
3368 }
3369 if (PyTuple_CheckExact(x)) {
3370 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3371 if (new == NULL)
3372 return NULL;
3373 for (i = 0; i < Py_SIZE(x); i++) {
3374 Py_ssize_t value = PyNumber_AsSsize_t(
3375 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
3376 if (value == -1 && PyErr_Occurred()) {
3377 Py_DECREF(new);
3378 return NULL;
3379 }
3380 if (value < 0 || value >= 256) {
3381 PyErr_SetString(PyExc_ValueError,
3382 "bytes must be in range(0, 256)");
3383 Py_DECREF(new);
3384 return NULL;
3385 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003386 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003387 }
3388 return new;
3389 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00003390
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003391 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003392 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003393 if (size == -1 && PyErr_Occurred())
3394 return NULL;
3395 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
3396 returning a shared empty bytes string. This required because we
3397 want to call _PyBytes_Resize() the returned object, which we can
3398 only do on bytes objects with refcount == 1. */
Victor Stinner88d146b2014-08-17 21:12:18 +02003399 if (size == 0)
3400 size = 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003401 new = PyBytes_FromStringAndSize(NULL, size);
3402 if (new == NULL)
3403 return NULL;
Victor Stinner88d146b2014-08-17 21:12:18 +02003404 assert(Py_REFCNT(new) == 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003405
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003406 /* Get the iterator */
3407 it = PyObject_GetIter(x);
3408 if (it == NULL)
3409 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003410
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003411 /* Run the iterator to exhaustion */
3412 for (i = 0; ; i++) {
3413 PyObject *item;
3414 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003415
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003416 /* Get the next item */
3417 item = PyIter_Next(it);
3418 if (item == NULL) {
3419 if (PyErr_Occurred())
3420 goto error;
3421 break;
3422 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003423
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003424 /* Interpret it as an int (__index__) */
3425 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3426 Py_DECREF(item);
3427 if (value == -1 && PyErr_Occurred())
3428 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003429
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003430 /* Range check */
3431 if (value < 0 || value >= 256) {
3432 PyErr_SetString(PyExc_ValueError,
3433 "bytes must be in range(0, 256)");
3434 goto error;
3435 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003436
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003437 /* Append the byte */
3438 if (i >= size) {
3439 size = 2 * size + 1;
3440 if (_PyBytes_Resize(&new, size) < 0)
3441 goto error;
3442 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003443 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003444 }
3445 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003446
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003447 /* Clean up and return success */
3448 Py_DECREF(it);
3449 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003450
3451 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003452 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01003453 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003454 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003455}
3456
3457static PyObject *
3458str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3459{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003460 PyObject *tmp, *pnew;
3461 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003462
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003463 assert(PyType_IsSubtype(type, &PyBytes_Type));
3464 tmp = bytes_new(&PyBytes_Type, args, kwds);
3465 if (tmp == NULL)
3466 return NULL;
3467 assert(PyBytes_CheckExact(tmp));
3468 n = PyBytes_GET_SIZE(tmp);
3469 pnew = type->tp_alloc(type, n);
3470 if (pnew != NULL) {
3471 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3472 PyBytes_AS_STRING(tmp), n+1);
3473 ((PyBytesObject *)pnew)->ob_shash =
3474 ((PyBytesObject *)tmp)->ob_shash;
3475 }
3476 Py_DECREF(tmp);
3477 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003478}
3479
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003480PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003481"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003482bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003483bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003484bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3485bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003486\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003487Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003488 - an iterable yielding integers in range(256)\n\
3489 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003490 - any object implementing the buffer API.\n\
3491 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003492
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003493static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003494
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003495PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003496 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3497 "bytes",
3498 PyBytesObject_SIZE,
3499 sizeof(char),
3500 bytes_dealloc, /* tp_dealloc */
3501 0, /* tp_print */
3502 0, /* tp_getattr */
3503 0, /* tp_setattr */
3504 0, /* tp_reserved */
3505 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08003506 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003507 &bytes_as_sequence, /* tp_as_sequence */
3508 &bytes_as_mapping, /* tp_as_mapping */
3509 (hashfunc)bytes_hash, /* tp_hash */
3510 0, /* tp_call */
3511 bytes_str, /* tp_str */
3512 PyObject_GenericGetAttr, /* tp_getattro */
3513 0, /* tp_setattro */
3514 &bytes_as_buffer, /* tp_as_buffer */
3515 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3516 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3517 bytes_doc, /* tp_doc */
3518 0, /* tp_traverse */
3519 0, /* tp_clear */
3520 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3521 0, /* tp_weaklistoffset */
3522 bytes_iter, /* tp_iter */
3523 0, /* tp_iternext */
3524 bytes_methods, /* tp_methods */
3525 0, /* tp_members */
3526 0, /* tp_getset */
3527 &PyBaseObject_Type, /* tp_base */
3528 0, /* tp_dict */
3529 0, /* tp_descr_get */
3530 0, /* tp_descr_set */
3531 0, /* tp_dictoffset */
3532 0, /* tp_init */
3533 0, /* tp_alloc */
3534 bytes_new, /* tp_new */
3535 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003536};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003537
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003538void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003539PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003540{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003541 assert(pv != NULL);
3542 if (*pv == NULL)
3543 return;
3544 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003545 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003546 return;
3547 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003548
3549 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3550 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003551 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003552 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02003553
Antoine Pitrou161d6952014-05-01 14:36:20 +02003554 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003555 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02003556 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3557 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3558 Py_CLEAR(*pv);
3559 return;
3560 }
3561
3562 oldsize = PyBytes_GET_SIZE(*pv);
3563 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3564 PyErr_NoMemory();
3565 goto error;
3566 }
3567 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3568 goto error;
3569
3570 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3571 PyBuffer_Release(&wb);
3572 return;
3573
3574 error:
3575 PyBuffer_Release(&wb);
3576 Py_CLEAR(*pv);
3577 return;
3578 }
3579
3580 else {
3581 /* Multiple references, need to create new object */
3582 PyObject *v;
3583 v = bytes_concat(*pv, w);
3584 Py_DECREF(*pv);
3585 *pv = v;
3586 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003587}
3588
3589void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003590PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003591{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003592 PyBytes_Concat(pv, w);
3593 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003594}
3595
3596
Ethan Furmanb95b5612015-01-23 20:05:18 -08003597/* The following function breaks the notion that bytes are immutable:
3598 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003599 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08003600 as creating a new bytes object and destroying the old one, only
3601 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003602 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08003603 Note that if there's not enough memory to resize the bytes object, the
3604 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003605 memory" exception is set, and -1 is returned. Else (on success) 0 is
3606 returned, and the value in *pv may or may not be the same as on input.
3607 As always, an extra byte is allocated for a trailing \0 byte (newsize
3608 does *not* include that), and a trailing \0 byte is stored.
3609*/
3610
3611int
3612_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3613{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003614 PyObject *v;
3615 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003616 v = *pv;
3617 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3618 *pv = 0;
3619 Py_DECREF(v);
3620 PyErr_BadInternalCall();
3621 return -1;
3622 }
3623 /* XXX UNREF/NEWREF interface should be more symmetrical */
3624 _Py_DEC_REFTOTAL;
3625 _Py_ForgetReference(v);
3626 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003627 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003628 if (*pv == NULL) {
3629 PyObject_Del(v);
3630 PyErr_NoMemory();
3631 return -1;
3632 }
3633 _Py_NewReference(*pv);
3634 sv = (PyBytesObject *) *pv;
3635 Py_SIZE(sv) = newsize;
3636 sv->ob_sval[newsize] = '\0';
3637 sv->ob_shash = -1; /* invalidate cached hash value */
3638 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003639}
3640
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003641void
3642PyBytes_Fini(void)
3643{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003644 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003645 for (i = 0; i < UCHAR_MAX + 1; i++)
3646 Py_CLEAR(characters[i]);
3647 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003648}
3649
Benjamin Peterson4116f362008-05-27 00:36:20 +00003650/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003651
3652typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003653 PyObject_HEAD
3654 Py_ssize_t it_index;
3655 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003656} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003657
3658static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003659striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003660{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003661 _PyObject_GC_UNTRACK(it);
3662 Py_XDECREF(it->it_seq);
3663 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003664}
3665
3666static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003667striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003668{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003669 Py_VISIT(it->it_seq);
3670 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003671}
3672
3673static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003674striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003675{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003676 PyBytesObject *seq;
3677 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003678
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003679 assert(it != NULL);
3680 seq = it->it_seq;
3681 if (seq == NULL)
3682 return NULL;
3683 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003684
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003685 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3686 item = PyLong_FromLong(
3687 (unsigned char)seq->ob_sval[it->it_index]);
3688 if (item != NULL)
3689 ++it->it_index;
3690 return item;
3691 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003692
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003693 Py_DECREF(seq);
3694 it->it_seq = NULL;
3695 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003696}
3697
3698static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003699striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003700{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003701 Py_ssize_t len = 0;
3702 if (it->it_seq)
3703 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3704 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003705}
3706
3707PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003708 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003709
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003710static PyObject *
3711striter_reduce(striterobject *it)
3712{
3713 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003714 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003715 it->it_seq, it->it_index);
3716 } else {
3717 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3718 if (u == NULL)
3719 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02003720 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003721 }
3722}
3723
3724PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3725
3726static PyObject *
3727striter_setstate(striterobject *it, PyObject *state)
3728{
3729 Py_ssize_t index = PyLong_AsSsize_t(state);
3730 if (index == -1 && PyErr_Occurred())
3731 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003732 if (it->it_seq != NULL) {
3733 if (index < 0)
3734 index = 0;
3735 else if (index > PyBytes_GET_SIZE(it->it_seq))
3736 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3737 it->it_index = index;
3738 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003739 Py_RETURN_NONE;
3740}
3741
3742PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3743
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003744static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003745 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3746 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003747 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3748 reduce_doc},
3749 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3750 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003751 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003752};
3753
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003754PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003755 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3756 "bytes_iterator", /* tp_name */
3757 sizeof(striterobject), /* tp_basicsize */
3758 0, /* tp_itemsize */
3759 /* methods */
3760 (destructor)striter_dealloc, /* tp_dealloc */
3761 0, /* tp_print */
3762 0, /* tp_getattr */
3763 0, /* tp_setattr */
3764 0, /* tp_reserved */
3765 0, /* tp_repr */
3766 0, /* tp_as_number */
3767 0, /* tp_as_sequence */
3768 0, /* tp_as_mapping */
3769 0, /* tp_hash */
3770 0, /* tp_call */
3771 0, /* tp_str */
3772 PyObject_GenericGetAttr, /* tp_getattro */
3773 0, /* tp_setattro */
3774 0, /* tp_as_buffer */
3775 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3776 0, /* tp_doc */
3777 (traverseproc)striter_traverse, /* tp_traverse */
3778 0, /* tp_clear */
3779 0, /* tp_richcompare */
3780 0, /* tp_weaklistoffset */
3781 PyObject_SelfIter, /* tp_iter */
3782 (iternextfunc)striter_next, /* tp_iternext */
3783 striter_methods, /* tp_methods */
3784 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003785};
3786
3787static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003788bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003789{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003790 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003791
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003792 if (!PyBytes_Check(seq)) {
3793 PyErr_BadInternalCall();
3794 return NULL;
3795 }
3796 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3797 if (it == NULL)
3798 return NULL;
3799 it->it_index = 0;
3800 Py_INCREF(seq);
3801 it->it_seq = (PyBytesObject *)seq;
3802 _PyObject_GC_TRACK(it);
3803 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003804}
Victor Stinner00165072015-10-09 01:53:21 +02003805
3806
3807/* _PyBytesWriter API */
3808
3809#ifdef MS_WINDOWS
3810 /* On Windows, overallocate by 50% is the best factor */
3811# define OVERALLOCATE_FACTOR 2
3812#else
3813 /* On Linux, overallocate by 25% is the best factor */
3814# define OVERALLOCATE_FACTOR 4
3815#endif
3816
3817void
3818_PyBytesWriter_Init(_PyBytesWriter *writer)
3819{
3820 writer->buffer = NULL;
3821 writer->allocated = 0;
Victor Stinner53926a12015-10-09 12:37:03 +02003822 writer->min_size = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003823 writer->overallocate = 0;
Victor Stinnerb3653a32015-10-09 03:38:24 +02003824 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003825#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003826 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003827#endif
3828}
3829
3830void
3831_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3832{
3833 Py_CLEAR(writer->buffer);
3834}
3835
3836Py_LOCAL_INLINE(char*)
3837_PyBytesWriter_AsString(_PyBytesWriter *writer)
3838{
Victor Stinnerb3653a32015-10-09 03:38:24 +02003839 if (!writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003840 assert(writer->buffer != NULL);
3841 return PyBytes_AS_STRING(writer->buffer);
3842 }
3843 else {
3844 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003845 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003846 }
3847}
3848
3849Py_LOCAL_INLINE(Py_ssize_t)
3850_PyBytesWriter_GetPos(_PyBytesWriter *writer, char *str)
3851{
3852 char *start = _PyBytesWriter_AsString(writer);
3853 assert(str != NULL);
3854 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003855 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003856 return str - start;
3857}
3858
3859Py_LOCAL_INLINE(void)
3860_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3861{
3862#ifdef Py_DEBUG
3863 char *start, *end;
3864
Victor Stinnerb3653a32015-10-09 03:38:24 +02003865 if (!writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003866 assert(writer->buffer != NULL);
3867 assert(PyBytes_CheckExact(writer->buffer));
3868 assert(Py_REFCNT(writer->buffer) == 1);
3869 }
3870 else {
3871 assert(writer->buffer == NULL);
3872 }
3873
3874 start = _PyBytesWriter_AsString(writer);
Victor Stinner53926a12015-10-09 12:37:03 +02003875 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003876 /* the last byte must always be null */
3877 assert(start[writer->allocated] == 0);
3878
3879 end = start + writer->allocated;
3880 assert(str != NULL);
3881 assert(start <= str && str <= end);
3882#endif
3883}
3884
3885char*
3886_PyBytesWriter_Prepare(_PyBytesWriter *writer, char *str, Py_ssize_t size)
3887{
3888 Py_ssize_t allocated, pos;
3889
3890 _PyBytesWriter_CheckConsistency(writer, str);
3891 assert(size >= 0);
3892
3893 if (size == 0) {
3894 /* nothing to do */
3895 return str;
3896 }
3897
Victor Stinner53926a12015-10-09 12:37:03 +02003898 if (writer->min_size > PY_SSIZE_T_MAX - size) {
Victor Stinner00165072015-10-09 01:53:21 +02003899 PyErr_NoMemory();
3900 _PyBytesWriter_Dealloc(writer);
3901 return NULL;
3902 }
Victor Stinner53926a12015-10-09 12:37:03 +02003903 writer->min_size += size;
Victor Stinner00165072015-10-09 01:53:21 +02003904
3905 allocated = writer->allocated;
Victor Stinner53926a12015-10-09 12:37:03 +02003906 if (writer->min_size <= allocated)
Victor Stinner00165072015-10-09 01:53:21 +02003907 return str;
3908
Victor Stinner53926a12015-10-09 12:37:03 +02003909 allocated = writer->min_size;
Victor Stinner00165072015-10-09 01:53:21 +02003910 if (writer->overallocate
3911 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3912 /* overallocate to limit the number of realloc() */
3913 allocated += allocated / OVERALLOCATE_FACTOR;
3914 }
3915
3916 pos = _PyBytesWriter_GetPos(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003917 if (!writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003918 /* Note: Don't use a bytearray object because the conversion from
3919 byterray to bytes requires to copy all bytes. */
3920 if (_PyBytes_Resize(&writer->buffer, allocated)) {
3921 assert(writer->buffer == NULL);
3922 return NULL;
3923 }
3924 }
3925 else {
3926 /* convert from stack buffer to bytes object buffer */
3927 assert(writer->buffer == NULL);
3928
3929 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3930 if (writer->buffer == NULL)
3931 return NULL;
3932
3933 if (pos != 0) {
3934 Py_MEMCPY(PyBytes_AS_STRING(writer->buffer),
Victor Stinnerb3653a32015-10-09 03:38:24 +02003935 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003936 pos);
3937 }
3938
Victor Stinnerb3653a32015-10-09 03:38:24 +02003939 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003940#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003941 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003942#endif
Victor Stinner00165072015-10-09 01:53:21 +02003943 }
3944 writer->allocated = allocated;
3945
3946 str = _PyBytesWriter_AsString(writer) + pos;
3947 _PyBytesWriter_CheckConsistency(writer, str);
3948 return str;
3949}
3950
3951/* Allocate the buffer to write size bytes.
3952 Return the pointer to the beginning of buffer data.
3953 Raise an exception and return NULL on error. */
3954char*
3955_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3956{
3957 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003958 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003959 assert(size >= 0);
3960
Victor Stinnerb3653a32015-10-09 03:38:24 +02003961 writer->use_small_buffer = 1;
Victor Stinnerb13b97d2015-10-09 02:52:16 +02003962#ifdef Py_DEBUG
Victor Stinner00165072015-10-09 01:53:21 +02003963 /* the last byte is reserved, it must be '\0' */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003964 writer->allocated = sizeof(writer->small_buffer) - 1;
3965 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003966#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003967 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003968#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003969 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003970}
3971
3972PyObject *
3973_PyBytesWriter_Finish(_PyBytesWriter *writer, char *str)
3974{
3975 Py_ssize_t pos;
3976 PyObject *result;
3977
3978 _PyBytesWriter_CheckConsistency(writer, str);
3979
3980 pos = _PyBytesWriter_GetPos(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003981 if (!writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003982 if (pos != writer->allocated) {
3983 if (_PyBytes_Resize(&writer->buffer, pos)) {
3984 assert(writer->buffer == NULL);
3985 return NULL;
3986 }
3987 }
3988
3989 result = writer->buffer;
3990 writer->buffer = NULL;
3991 }
3992 else {
Victor Stinnerb3653a32015-10-09 03:38:24 +02003993 result = PyBytes_FromStringAndSize(writer->small_buffer, pos);
Victor Stinner00165072015-10-09 01:53:21 +02003994 }
3995
3996 return result;
3997}