blob: 4a0735fc78beff512589d245a488825c1c2aadda [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +00008#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00009#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000010
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020011/*[clinic input]
Martin v. Löwis0efea322014-07-27 17:29:17 +020012class bytes "PyBytesObject*" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020013[clinic start generated code]*/
Martin v. Löwis0efea322014-07-27 17:29:17 +020014/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030016#include "clinic/bytesobject.c.h"
17
Christian Heimes2c9c7a52008-05-26 13:42:13 +000018#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000019Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000020#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000021
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022static PyBytesObject *characters[UCHAR_MAX + 1];
23static PyBytesObject *nullstring;
24
Mark Dickinsonfd24b322008-12-06 15:33:31 +000025/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
26 for a string of length n should request PyBytesObject_SIZE + n bytes.
27
28 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29 3 bytes per string allocation on a typical system.
30*/
31#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32
Christian Heimes2c9c7a52008-05-26 13:42:13 +000033/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000034 For PyBytes_FromString(), the parameter `str' points to a null-terminated
35 string containing exactly `size' bytes.
36
37 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
38 either NULL or else points to a string containing at least `size' bytes.
39 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
40 not have to be null-terminated. (Therefore it is safe to construct a
41 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
42 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
43 bytes (setting the last byte to the null terminating character) and you can
44 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000045 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000046 alter the data yourself, since the strings may be shared.
47
48 The PyObject member `op->ob_size', which denotes the number of "extra
49 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020050 allocated for string data, not counting the null terminating character.
51 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000052 PyBytes_FromStringAndSize()) or the length of the string in the `str'
53 parameter (for PyBytes_FromString()).
54*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020055static PyObject *
56_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000057{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020058 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020059 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020060
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000061 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000062#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000063 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000064#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 Py_INCREF(op);
66 return (PyObject *)op;
67 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000068
Victor Stinner049e5092014-08-17 22:20:00 +020069 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000070 PyErr_SetString(PyExc_OverflowError,
71 "byte string is too large");
72 return NULL;
73 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000074
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000075 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020076 if (use_calloc)
77 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
78 else
79 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 if (op == NULL)
81 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010082 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020084 if (!use_calloc)
85 op->ob_sval[size] = '\0';
86 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 if (size == 0) {
88 nullstring = op;
89 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020090 }
91 return (PyObject *) op;
92}
93
94PyObject *
95PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
96{
97 PyBytesObject *op;
98 if (size < 0) {
99 PyErr_SetString(PyExc_SystemError,
100 "Negative size passed to PyBytes_FromStringAndSize");
101 return NULL;
102 }
103 if (size == 1 && str != NULL &&
104 (op = characters[*str & UCHAR_MAX]) != NULL)
105 {
106#ifdef COUNT_ALLOCS
107 one_strings++;
108#endif
109 Py_INCREF(op);
110 return (PyObject *)op;
111 }
112
113 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
114 if (op == NULL)
115 return NULL;
116 if (str == NULL)
117 return (PyObject *) op;
118
119 Py_MEMCPY(op->ob_sval, str, size);
120 /* share short strings */
121 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 characters[*str & UCHAR_MAX] = op;
123 Py_INCREF(op);
124 }
125 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000126}
127
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000128PyObject *
129PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000130{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200131 size_t size;
132 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 assert(str != NULL);
135 size = strlen(str);
136 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
137 PyErr_SetString(PyExc_OverflowError,
138 "byte string is too long");
139 return NULL;
140 }
141 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000144#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
148 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000151#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 Py_INCREF(op);
153 return (PyObject *)op;
154 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 /* Inline PyObject_NewVar */
157 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
158 if (op == NULL)
159 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100160 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 op->ob_shash = -1;
162 Py_MEMCPY(op->ob_sval, str, size+1);
163 /* share short strings */
164 if (size == 0) {
165 nullstring = op;
166 Py_INCREF(op);
167 } else if (size == 1) {
168 characters[*str & UCHAR_MAX] = op;
169 Py_INCREF(op);
170 }
171 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000172}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000173
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000174PyObject *
175PyBytes_FromFormatV(const char *format, va_list vargs)
176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 va_list count;
178 Py_ssize_t n = 0;
179 const char* f;
180 char *s;
181 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000182
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000183 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 /* step 1: figure out how large a buffer we need */
185 for (f = format; *f; f++) {
186 if (*f == '%') {
187 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000188 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
192 * they don't affect the amount of space we reserve.
193 */
194 if ((*f == 'l' || *f == 'z') &&
195 (f[1] == 'd' || f[1] == 'u'))
196 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000197
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000198 switch (*f) {
199 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100200 {
201 int c = va_arg(count, int);
202 if (c < 0 || c > 255) {
203 PyErr_SetString(PyExc_OverflowError,
204 "PyBytes_FromFormatV(): %c format "
205 "expects an integer in range [0; 255]");
206 return NULL;
207 }
208 n++;
209 break;
210 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 case '%':
212 n++;
213 break;
214 case 'd': case 'u': case 'i': case 'x':
215 (void) va_arg(count, int);
216 /* 20 bytes is enough to hold a 64-bit
217 integer. Decimal takes the most space.
218 This isn't enough for octal. */
219 n += 20;
220 break;
221 case 's':
222 s = va_arg(count, char*);
223 n += strlen(s);
224 break;
225 case 'p':
226 (void) va_arg(count, int);
227 /* maximum 64-bit pointer representation:
228 * 0xffffffffffffffff
229 * so 19 characters is enough.
230 * XXX I count 18 -- what's the extra for?
231 */
232 n += 19;
233 break;
234 default:
235 /* if we stumble upon an unknown
236 formatting code, copy the rest of
237 the format string to the output
238 string. (we cannot just skip the
239 code, since there's no way to know
240 what's in the argument list) */
241 n += strlen(p);
242 goto expand;
243 }
244 } else
245 n++;
246 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000247 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 /* step 2: fill the buffer */
249 /* Since we've analyzed how much space we need for the worst case,
250 use sprintf directly instead of the slower PyOS_snprintf. */
251 string = PyBytes_FromStringAndSize(NULL, n);
252 if (!string)
253 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000254
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000256
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000257 for (f = format; *f; f++) {
258 if (*f == '%') {
259 const char* p = f++;
260 Py_ssize_t i;
261 int longflag = 0;
262 int size_tflag = 0;
263 /* parse the width.precision part (we're only
264 interested in the precision value, if any) */
265 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000266 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000267 n = (n*10) + *f++ - '0';
268 if (*f == '.') {
269 f++;
270 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000271 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 n = (n*10) + *f++ - '0';
273 }
David Malcolm96960882010-11-05 17:23:41 +0000274 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 f++;
276 /* handle the long flag, but only for %ld and %lu.
277 others can be added when necessary. */
278 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
279 longflag = 1;
280 ++f;
281 }
282 /* handle the size_t flag. */
283 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
284 size_tflag = 1;
285 ++f;
286 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000287
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000288 switch (*f) {
289 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100290 {
291 int c = va_arg(vargs, int);
292 /* c has been checked for overflow in the first step */
293 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000294 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100295 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 case 'd':
297 if (longflag)
298 sprintf(s, "%ld", va_arg(vargs, long));
299 else if (size_tflag)
300 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
301 va_arg(vargs, Py_ssize_t));
302 else
303 sprintf(s, "%d", va_arg(vargs, int));
304 s += strlen(s);
305 break;
306 case 'u':
307 if (longflag)
308 sprintf(s, "%lu",
309 va_arg(vargs, unsigned long));
310 else if (size_tflag)
311 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
312 va_arg(vargs, size_t));
313 else
314 sprintf(s, "%u",
315 va_arg(vargs, unsigned int));
316 s += strlen(s);
317 break;
318 case 'i':
319 sprintf(s, "%i", va_arg(vargs, int));
320 s += strlen(s);
321 break;
322 case 'x':
323 sprintf(s, "%x", va_arg(vargs, int));
324 s += strlen(s);
325 break;
326 case 's':
327 p = va_arg(vargs, char*);
328 i = strlen(p);
329 if (n > 0 && i > n)
330 i = n;
331 Py_MEMCPY(s, p, i);
332 s += i;
333 break;
334 case 'p':
335 sprintf(s, "%p", va_arg(vargs, void*));
336 /* %p is ill-defined: ensure leading 0x. */
337 if (s[1] == 'X')
338 s[1] = 'x';
339 else if (s[1] != 'x') {
340 memmove(s+2, s, strlen(s)+1);
341 s[0] = '0';
342 s[1] = 'x';
343 }
344 s += strlen(s);
345 break;
346 case '%':
347 *s++ = '%';
348 break;
349 default:
350 strcpy(s, p);
351 s += strlen(s);
352 goto end;
353 }
354 } else
355 *s++ = *f;
356 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000357
358 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
360 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361}
362
363PyObject *
364PyBytes_FromFormat(const char *format, ...)
365{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 PyObject* ret;
367 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368
369#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000373#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000374 ret = PyBytes_FromFormatV(format, vargs);
375 va_end(vargs);
376 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000377}
378
Ethan Furmanb95b5612015-01-23 20:05:18 -0800379/* Helpers for formatstring */
380
381Py_LOCAL_INLINE(PyObject *)
382getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
383{
384 Py_ssize_t argidx = *p_argidx;
385 if (argidx < arglen) {
386 (*p_argidx)++;
387 if (arglen < 0)
388 return args;
389 else
390 return PyTuple_GetItem(args, argidx);
391 }
392 PyErr_SetString(PyExc_TypeError,
393 "not enough arguments for format string");
394 return NULL;
395}
396
397/* Format codes
398 * F_LJUST '-'
399 * F_SIGN '+'
400 * F_BLANK ' '
401 * F_ALT '#'
402 * F_ZERO '0'
403 */
404#define F_LJUST (1<<0)
405#define F_SIGN (1<<1)
406#define F_BLANK (1<<2)
407#define F_ALT (1<<3)
408#define F_ZERO (1<<4)
409
410/* Returns a new reference to a PyBytes object, or NULL on failure. */
411
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200412static char*
413formatfloat(PyObject *v, int flags, int prec, int type,
414 PyObject **p_result, _PyBytesWriter *writer, char *str,
415 Py_ssize_t prealloc)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800416{
417 char *p;
418 PyObject *result;
419 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200420 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800421
422 x = PyFloat_AsDouble(v);
423 if (x == -1.0 && PyErr_Occurred()) {
424 PyErr_Format(PyExc_TypeError, "float argument required, "
425 "not %.200s", Py_TYPE(v)->tp_name);
426 return NULL;
427 }
428
429 if (prec < 0)
430 prec = 6;
431
432 p = PyOS_double_to_string(x, type, prec,
433 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
434
435 if (p == NULL)
436 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200437
438 len = strlen(p);
439 if (writer != NULL) {
440 if ((Py_ssize_t)len > prealloc) {
441 str = _PyBytesWriter_Prepare(writer, str, len - prealloc);
442 if (str == NULL)
443 return NULL;
444 }
445 Py_MEMCPY(str, p, len);
446 str += len;
447 return str;
448 }
449
450 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800451 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200452 *p_result = result;
453 return str;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800454}
455
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300456static PyObject *
457formatlong(PyObject *v, int flags, int prec, int type)
458{
459 PyObject *result, *iobj;
460 if (type == 'i')
461 type = 'd';
462 if (PyLong_Check(v))
463 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
464 if (PyNumber_Check(v)) {
465 /* make sure number is a type of integer for o, x, and X */
466 if (type == 'o' || type == 'x' || type == 'X')
467 iobj = PyNumber_Index(v);
468 else
469 iobj = PyNumber_Long(v);
470 if (iobj == NULL) {
471 if (!PyErr_ExceptionMatches(PyExc_TypeError))
472 return NULL;
473 }
474 else if (!PyLong_Check(iobj))
475 Py_CLEAR(iobj);
476 if (iobj != NULL) {
477 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
478 Py_DECREF(iobj);
479 return result;
480 }
481 }
482 PyErr_Format(PyExc_TypeError,
483 "%%%c format: %s is required, not %.200s", type,
484 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
485 : "a number",
486 Py_TYPE(v)->tp_name);
487 return NULL;
488}
489
490static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200491byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800492{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200493 if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) {
494 *p = PyBytes_AS_STRING(arg)[0];
495 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800496 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200497 else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) {
498 *p = PyByteArray_AS_STRING(arg)[0];
499 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800500 }
501 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300502 PyObject *iobj;
503 long ival;
504 int overflow;
505 /* make sure number is a type of integer */
506 if (PyLong_Check(arg)) {
507 ival = PyLong_AsLongAndOverflow(arg, &overflow);
508 }
509 else {
510 iobj = PyNumber_Index(arg);
511 if (iobj == NULL) {
512 if (!PyErr_ExceptionMatches(PyExc_TypeError))
513 return 0;
514 goto onError;
515 }
516 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
517 Py_DECREF(iobj);
518 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300519 if (!overflow && ival == -1 && PyErr_Occurred())
520 goto onError;
521 if (overflow || !(0 <= ival && ival <= 255)) {
522 PyErr_SetString(PyExc_OverflowError,
523 "%c arg not in range(256)");
524 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800525 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300526 *p = (char)ival;
527 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800528 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300529 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200530 PyErr_SetString(PyExc_TypeError,
531 "%c requires an integer in range(256) or a single byte");
532 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800533}
534
535static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200536format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800537{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200538 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800539 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800540 /* is it a bytes object? */
541 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200542 *pbuf = PyBytes_AS_STRING(v);
543 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800544 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200545 return v;
546 }
547 if (PyByteArray_Check(v)) {
548 *pbuf = PyByteArray_AS_STRING(v);
549 *plen = PyByteArray_GET_SIZE(v);
550 Py_INCREF(v);
551 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800552 }
553 /* does it support __bytes__? */
554 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
555 if (func != NULL) {
556 result = PyObject_CallFunctionObjArgs(func, NULL);
557 Py_DECREF(func);
558 if (result == NULL)
559 return NULL;
560 if (!PyBytes_Check(result)) {
561 PyErr_Format(PyExc_TypeError,
562 "__bytes__ returned non-bytes (type %.200s)",
563 Py_TYPE(result)->tp_name);
564 Py_DECREF(result);
565 return NULL;
566 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200567 *pbuf = PyBytes_AS_STRING(result);
568 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800569 return result;
570 }
571 PyErr_Format(PyExc_TypeError,
572 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
573 Py_TYPE(v)->tp_name);
574 return NULL;
575}
576
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200577/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800578
579PyObject *
580_PyBytes_Format(PyObject *format, PyObject *args)
581{
582 char *fmt, *res;
583 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200584 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800585 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800586 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200587 _PyBytesWriter writer;
588
Ethan Furmanb95b5612015-01-23 20:05:18 -0800589 if (format == NULL || !PyBytes_Check(format) || args == NULL) {
590 PyErr_BadInternalCall();
591 return NULL;
592 }
593 fmt = PyBytes_AS_STRING(format);
594 fmtcnt = PyBytes_GET_SIZE(format);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200595
596 _PyBytesWriter_Init(&writer);
597
598 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
599 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800600 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200601 writer.overallocate = 1;
602
Ethan Furmanb95b5612015-01-23 20:05:18 -0800603 if (PyTuple_Check(args)) {
604 arglen = PyTuple_GET_SIZE(args);
605 argidx = 0;
606 }
607 else {
608 arglen = -1;
609 argidx = -2;
610 }
611 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
612 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
613 !PyByteArray_Check(args)) {
614 dict = args;
615 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200616
Ethan Furmanb95b5612015-01-23 20:05:18 -0800617 while (--fmtcnt >= 0) {
618 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200619 Py_ssize_t len;
620 char *pos;
621
622 pos = strchr(fmt + 1, '%');
623 if (pos != NULL)
624 len = pos - fmt;
625 else {
626 len = PyBytes_GET_SIZE(format);
627 len -= (fmt - PyBytes_AS_STRING(format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800628 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200629 assert(len != 0);
630
631 Py_MEMCPY(res, fmt, len);
632 res += len;
633 fmt += len;
634 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800635 }
636 else {
637 /* Got a format specifier */
638 int flags = 0;
639 Py_ssize_t width = -1;
640 int prec = -1;
641 int c = '\0';
642 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800643 PyObject *v = NULL;
644 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200645 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800646 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200647 Py_ssize_t len = 0;
648 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200649 Py_ssize_t alloc;
650#ifdef Py_DEBUG
651 char *before;
652#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -0800653
Ethan Furmanb95b5612015-01-23 20:05:18 -0800654 fmt++;
655 if (*fmt == '(') {
656 char *keystart;
657 Py_ssize_t keylen;
658 PyObject *key;
659 int pcount = 1;
660
661 if (dict == NULL) {
662 PyErr_SetString(PyExc_TypeError,
663 "format requires a mapping");
664 goto error;
665 }
666 ++fmt;
667 --fmtcnt;
668 keystart = fmt;
669 /* Skip over balanced parentheses */
670 while (pcount > 0 && --fmtcnt >= 0) {
671 if (*fmt == ')')
672 --pcount;
673 else if (*fmt == '(')
674 ++pcount;
675 fmt++;
676 }
677 keylen = fmt - keystart - 1;
678 if (fmtcnt < 0 || pcount > 0) {
679 PyErr_SetString(PyExc_ValueError,
680 "incomplete format key");
681 goto error;
682 }
683 key = PyBytes_FromStringAndSize(keystart,
684 keylen);
685 if (key == NULL)
686 goto error;
687 if (args_owned) {
688 Py_DECREF(args);
689 args_owned = 0;
690 }
691 args = PyObject_GetItem(dict, key);
692 Py_DECREF(key);
693 if (args == NULL) {
694 goto error;
695 }
696 args_owned = 1;
697 arglen = -1;
698 argidx = -2;
699 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200700
701 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800702 while (--fmtcnt >= 0) {
703 switch (c = *fmt++) {
704 case '-': flags |= F_LJUST; continue;
705 case '+': flags |= F_SIGN; continue;
706 case ' ': flags |= F_BLANK; continue;
707 case '#': flags |= F_ALT; continue;
708 case '0': flags |= F_ZERO; continue;
709 }
710 break;
711 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200712
713 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800714 if (c == '*') {
715 v = getnextarg(args, arglen, &argidx);
716 if (v == NULL)
717 goto error;
718 if (!PyLong_Check(v)) {
719 PyErr_SetString(PyExc_TypeError,
720 "* wants int");
721 goto error;
722 }
723 width = PyLong_AsSsize_t(v);
724 if (width == -1 && PyErr_Occurred())
725 goto error;
726 if (width < 0) {
727 flags |= F_LJUST;
728 width = -width;
729 }
730 if (--fmtcnt >= 0)
731 c = *fmt++;
732 }
733 else if (c >= 0 && isdigit(c)) {
734 width = c - '0';
735 while (--fmtcnt >= 0) {
736 c = Py_CHARMASK(*fmt++);
737 if (!isdigit(c))
738 break;
739 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
740 PyErr_SetString(
741 PyExc_ValueError,
742 "width too big");
743 goto error;
744 }
745 width = width*10 + (c - '0');
746 }
747 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200748
749 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800750 if (c == '.') {
751 prec = 0;
752 if (--fmtcnt >= 0)
753 c = *fmt++;
754 if (c == '*') {
755 v = getnextarg(args, arglen, &argidx);
756 if (v == NULL)
757 goto error;
758 if (!PyLong_Check(v)) {
759 PyErr_SetString(
760 PyExc_TypeError,
761 "* wants int");
762 goto error;
763 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200764 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800765 if (prec == -1 && PyErr_Occurred())
766 goto error;
767 if (prec < 0)
768 prec = 0;
769 if (--fmtcnt >= 0)
770 c = *fmt++;
771 }
772 else if (c >= 0 && isdigit(c)) {
773 prec = c - '0';
774 while (--fmtcnt >= 0) {
775 c = Py_CHARMASK(*fmt++);
776 if (!isdigit(c))
777 break;
778 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
779 PyErr_SetString(
780 PyExc_ValueError,
781 "prec too big");
782 goto error;
783 }
784 prec = prec*10 + (c - '0');
785 }
786 }
787 } /* prec */
788 if (fmtcnt >= 0) {
789 if (c == 'h' || c == 'l' || c == 'L') {
790 if (--fmtcnt >= 0)
791 c = *fmt++;
792 }
793 }
794 if (fmtcnt < 0) {
795 PyErr_SetString(PyExc_ValueError,
796 "incomplete format");
797 goto error;
798 }
799 if (c != '%') {
800 v = getnextarg(args, arglen, &argidx);
801 if (v == NULL)
802 goto error;
803 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200804
805 if (fmtcnt < 0) {
806 /* last writer: disable writer overallocation */
807 writer.overallocate = 0;
808 }
809
Ethan Furmanb95b5612015-01-23 20:05:18 -0800810 sign = 0;
811 fill = ' ';
812 switch (c) {
813 case '%':
814 pbuf = "%";
815 len = 1;
816 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200817
Ethan Furman62e977f2015-03-11 08:17:00 -0700818 case 'r':
819 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800820 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200821 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800822 if (temp == NULL)
823 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200824 assert(PyUnicode_IS_ASCII(temp));
825 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
826 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800827 if (prec >= 0 && len > prec)
828 len = prec;
829 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200830
Ethan Furmanb95b5612015-01-23 20:05:18 -0800831 case 's':
832 // %s is only for 2/3 code; 3 only code should use %b
833 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200834 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800835 if (temp == NULL)
836 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800837 if (prec >= 0 && len > prec)
838 len = prec;
839 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200840
Ethan Furmanb95b5612015-01-23 20:05:18 -0800841 case 'i':
842 case 'd':
843 case 'u':
844 case 'o':
845 case 'x':
846 case 'X':
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300847 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200848 if (!temp)
849 goto error;
850 assert(PyUnicode_IS_ASCII(temp));
851 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
852 len = PyUnicode_GET_LENGTH(temp);
853 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800854 if (flags & F_ZERO)
855 fill = '0';
856 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200857
Ethan Furmanb95b5612015-01-23 20:05:18 -0800858 case 'e':
859 case 'E':
860 case 'f':
861 case 'F':
862 case 'g':
863 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200864 if (width == -1 && prec == -1
865 && !(flags & (F_SIGN | F_BLANK)))
866 {
867 /* Fast path */
868 res = formatfloat(v, flags, prec, c, NULL, &writer, res, 1);
869 if (res == NULL)
870 goto error;
871 continue;
872 }
873
874 if (!formatfloat(v, flags, prec, c, &temp, NULL, res, 1))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800875 goto error;
876 pbuf = PyBytes_AS_STRING(temp);
877 len = PyBytes_GET_SIZE(temp);
878 sign = 1;
879 if (flags & F_ZERO)
880 fill = '0';
881 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200882
Ethan Furmanb95b5612015-01-23 20:05:18 -0800883 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200884 pbuf = &onechar;
885 len = byte_converter(v, &onechar);
886 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800887 goto error;
888 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200889
Ethan Furmanb95b5612015-01-23 20:05:18 -0800890 default:
891 PyErr_Format(PyExc_ValueError,
892 "unsupported format character '%c' (0x%x) "
893 "at index %zd",
894 c, c,
895 (Py_ssize_t)(fmt - 1 -
896 PyBytes_AsString(format)));
897 goto error;
898 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200899
Ethan Furmanb95b5612015-01-23 20:05:18 -0800900 if (sign) {
901 if (*pbuf == '-' || *pbuf == '+') {
902 sign = *pbuf++;
903 len--;
904 }
905 else if (flags & F_SIGN)
906 sign = '+';
907 else if (flags & F_BLANK)
908 sign = ' ';
909 else
910 sign = 0;
911 }
912 if (width < len)
913 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200914
915 alloc = width;
916 if (sign != 0 && len == width)
917 alloc++;
918 if (alloc > 1) {
919 res = _PyBytesWriter_Prepare(&writer, res, alloc - 1);
920 if (res == NULL)
921 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800922 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200923#ifdef Py_DEBUG
924 before = res;
925#endif
926
927 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800928 if (sign) {
929 if (fill != ' ')
930 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800931 if (width > len)
932 width--;
933 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200934
935 /* Write the numeric prefix for "x", "X" and "o" formats
936 if the alternate form is used.
937 For example, write "0x" for the "%#x" format. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800938 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
939 assert(pbuf[0] == '0');
940 assert(pbuf[1] == c);
941 if (fill != ' ') {
942 *res++ = *pbuf++;
943 *res++ = *pbuf++;
944 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800945 width -= 2;
946 if (width < 0)
947 width = 0;
948 len -= 2;
949 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200950
951 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800952 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200953 memset(res, fill, width - len);
954 res += (width - len);
955 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800956 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200957
958 /* If padding with spaces: write sign if needed and/or numeric
959 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800960 if (fill == ' ') {
961 if (sign)
962 *res++ = sign;
963 if ((flags & F_ALT) &&
964 (c == 'x' || c == 'X')) {
965 assert(pbuf[0] == '0');
966 assert(pbuf[1] == c);
967 *res++ = *pbuf++;
968 *res++ = *pbuf++;
969 }
970 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200971
972 /* Copy bytes */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800973 Py_MEMCPY(res, pbuf, len);
974 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200975
976 /* Pad right with the fill character if needed */
977 if (width > len) {
978 memset(res, ' ', width - len);
979 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800980 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200981
Ethan Furmanb95b5612015-01-23 20:05:18 -0800982 if (dict && (argidx < arglen) && c != '%') {
983 PyErr_SetString(PyExc_TypeError,
984 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -0800985 Py_XDECREF(temp);
986 goto error;
987 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800988 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200989
990#ifdef Py_DEBUG
991 /* check that we computed the exact size for this write */
992 assert((res - before) == alloc);
993#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -0800994 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200995
996 /* If overallocation was disabled, ensure that it was the last
997 write. Otherwise, we missed an optimization */
998 assert(writer.overallocate || fmtcnt < 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800999 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001000
Ethan Furmanb95b5612015-01-23 20:05:18 -08001001 if (argidx < arglen && !dict) {
1002 PyErr_SetString(PyExc_TypeError,
1003 "not all arguments converted during bytes formatting");
1004 goto error;
1005 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001006
Ethan Furmanb95b5612015-01-23 20:05:18 -08001007 if (args_owned) {
1008 Py_DECREF(args);
1009 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001010 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001011
1012 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001013 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001014 if (args_owned) {
1015 Py_DECREF(args);
1016 }
1017 return NULL;
1018}
1019
1020/* =-= */
1021
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001022static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001023bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001024{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001025 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001026}
1027
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001028/* Unescape a backslash-escaped string. If unicode is non-zero,
1029 the string is a u-literal. If recode_encoding is non-zero,
1030 the string is UTF-8 encoded and should be re-encoded in the
1031 specified encoding. */
1032
1033PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001034 Py_ssize_t len,
1035 const char *errors,
1036 Py_ssize_t unicode,
1037 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001038{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001039 int c;
1040 char *p, *buf;
1041 const char *end;
1042 PyObject *v;
1043 Py_ssize_t newlen = recode_encoding ? 4*len:len;
1044 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
1045 if (v == NULL)
1046 return NULL;
1047 p = buf = PyBytes_AsString(v);
1048 end = s + len;
1049 while (s < end) {
1050 if (*s != '\\') {
1051 non_esc:
1052 if (recode_encoding && (*s & 0x80)) {
1053 PyObject *u, *w;
1054 char *r;
1055 const char* t;
1056 Py_ssize_t rn;
1057 t = s;
1058 /* Decode non-ASCII bytes as UTF-8. */
1059 while (t < end && (*t & 0x80)) t++;
1060 u = PyUnicode_DecodeUTF8(s, t - s, errors);
1061 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001062
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001063 /* Recode them in target encoding. */
1064 w = PyUnicode_AsEncodedString(
1065 u, recode_encoding, errors);
1066 Py_DECREF(u);
1067 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001068
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001069 /* Append bytes to output buffer. */
1070 assert(PyBytes_Check(w));
1071 r = PyBytes_AS_STRING(w);
1072 rn = PyBytes_GET_SIZE(w);
1073 Py_MEMCPY(p, r, rn);
1074 p += rn;
1075 Py_DECREF(w);
1076 s = t;
1077 } else {
1078 *p++ = *s++;
1079 }
1080 continue;
1081 }
1082 s++;
1083 if (s==end) {
1084 PyErr_SetString(PyExc_ValueError,
1085 "Trailing \\ in string");
1086 goto failed;
1087 }
1088 switch (*s++) {
1089 /* XXX This assumes ASCII! */
1090 case '\n': break;
1091 case '\\': *p++ = '\\'; break;
1092 case '\'': *p++ = '\''; break;
1093 case '\"': *p++ = '\"'; break;
1094 case 'b': *p++ = '\b'; break;
1095 case 'f': *p++ = '\014'; break; /* FF */
1096 case 't': *p++ = '\t'; break;
1097 case 'n': *p++ = '\n'; break;
1098 case 'r': *p++ = '\r'; break;
1099 case 'v': *p++ = '\013'; break; /* VT */
1100 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1101 case '0': case '1': case '2': case '3':
1102 case '4': case '5': case '6': case '7':
1103 c = s[-1] - '0';
1104 if (s < end && '0' <= *s && *s <= '7') {
1105 c = (c<<3) + *s++ - '0';
1106 if (s < end && '0' <= *s && *s <= '7')
1107 c = (c<<3) + *s++ - '0';
1108 }
1109 *p++ = c;
1110 break;
1111 case 'x':
David Malcolm96960882010-11-05 17:23:41 +00001112 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001113 unsigned int x = 0;
1114 c = Py_CHARMASK(*s);
1115 s++;
David Malcolm96960882010-11-05 17:23:41 +00001116 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001117 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001118 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001119 x = 10 + c - 'a';
1120 else
1121 x = 10 + c - 'A';
1122 x = x << 4;
1123 c = Py_CHARMASK(*s);
1124 s++;
David Malcolm96960882010-11-05 17:23:41 +00001125 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001126 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001127 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001128 x += 10 + c - 'a';
1129 else
1130 x += 10 + c - 'A';
1131 *p++ = x;
1132 break;
1133 }
1134 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001135 PyErr_Format(PyExc_ValueError,
1136 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001137 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001138 goto failed;
1139 }
1140 if (strcmp(errors, "replace") == 0) {
1141 *p++ = '?';
1142 } else if (strcmp(errors, "ignore") == 0)
1143 /* do nothing */;
1144 else {
1145 PyErr_Format(PyExc_ValueError,
1146 "decoding error; unknown "
1147 "error handling code: %.400s",
1148 errors);
1149 goto failed;
1150 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001151 /* skip \x */
1152 if (s < end && Py_ISXDIGIT(s[0]))
1153 s++; /* and a hexdigit */
1154 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001155 default:
1156 *p++ = '\\';
1157 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001158 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001159 UTF-8 bytes may follow. */
1160 }
1161 }
1162 if (p-buf < newlen)
1163 _PyBytes_Resize(&v, p - buf);
1164 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001165 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001166 Py_DECREF(v);
1167 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001168}
1169
1170/* -------------------------------------------------------------------- */
1171/* object api */
1172
1173Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001174PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001175{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001176 if (!PyBytes_Check(op)) {
1177 PyErr_Format(PyExc_TypeError,
1178 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1179 return -1;
1180 }
1181 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001182}
1183
1184char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001185PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001186{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001187 if (!PyBytes_Check(op)) {
1188 PyErr_Format(PyExc_TypeError,
1189 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1190 return NULL;
1191 }
1192 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001193}
1194
1195int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001196PyBytes_AsStringAndSize(PyObject *obj,
1197 char **s,
1198 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001199{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001200 if (s == NULL) {
1201 PyErr_BadInternalCall();
1202 return -1;
1203 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001204
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001205 if (!PyBytes_Check(obj)) {
1206 PyErr_Format(PyExc_TypeError,
1207 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1208 return -1;
1209 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001210
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001211 *s = PyBytes_AS_STRING(obj);
1212 if (len != NULL)
1213 *len = PyBytes_GET_SIZE(obj);
1214 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001215 PyErr_SetString(PyExc_ValueError,
1216 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001217 return -1;
1218 }
1219 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001220}
Neal Norwitz6968b052007-02-27 19:02:19 +00001221
1222/* -------------------------------------------------------------------- */
1223/* Methods */
1224
Eric Smith0923d1d2009-04-16 20:16:10 +00001225#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001226
1227#include "stringlib/fastsearch.h"
1228#include "stringlib/count.h"
1229#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001230#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001231#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001232#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001233#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001234
Eric Smith0f78bff2009-11-30 01:01:42 +00001235#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001236
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001237PyObject *
1238PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001239{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001240 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001241 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001242 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001243 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001244 unsigned char quote, *s, *p;
1245
1246 /* Compute size of output string */
1247 squotes = dquotes = 0;
1248 newsize = 3; /* b'' */
1249 s = (unsigned char*)op->ob_sval;
1250 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001251 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001252 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001253 case '\'': squotes++; break;
1254 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001255 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001256 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001257 default:
1258 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001259 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001260 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001261 if (newsize > PY_SSIZE_T_MAX - incr)
1262 goto overflow;
1263 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001264 }
1265 quote = '\'';
1266 if (smartquotes && squotes && !dquotes)
1267 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001268 if (squotes && quote == '\'') {
1269 if (newsize > PY_SSIZE_T_MAX - squotes)
1270 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001271 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001272 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001273
1274 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001275 if (v == NULL) {
1276 return NULL;
1277 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001278 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001279
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001280 *p++ = 'b', *p++ = quote;
1281 for (i = 0; i < length; i++) {
1282 unsigned char c = op->ob_sval[i];
1283 if (c == quote || c == '\\')
1284 *p++ = '\\', *p++ = c;
1285 else if (c == '\t')
1286 *p++ = '\\', *p++ = 't';
1287 else if (c == '\n')
1288 *p++ = '\\', *p++ = 'n';
1289 else if (c == '\r')
1290 *p++ = '\\', *p++ = 'r';
1291 else if (c < ' ' || c >= 0x7f) {
1292 *p++ = '\\';
1293 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001294 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1295 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001296 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001297 else
1298 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001299 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001300 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001301 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001302 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001303
1304 overflow:
1305 PyErr_SetString(PyExc_OverflowError,
1306 "bytes object is too large to make repr");
1307 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001308}
1309
Neal Norwitz6968b052007-02-27 19:02:19 +00001310static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001311bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001312{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001313 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001314}
1315
Neal Norwitz6968b052007-02-27 19:02:19 +00001316static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001317bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001318{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001319 if (Py_BytesWarningFlag) {
1320 if (PyErr_WarnEx(PyExc_BytesWarning,
1321 "str() on a bytes instance", 1))
1322 return NULL;
1323 }
1324 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001325}
1326
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001327static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001328bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001329{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001330 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001331}
Neal Norwitz6968b052007-02-27 19:02:19 +00001332
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001333/* This is also used by PyBytes_Concat() */
1334static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001335bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001336{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001337 Py_ssize_t size;
1338 Py_buffer va, vb;
1339 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001340
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001341 va.len = -1;
1342 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001343 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1344 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001345 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1346 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1347 goto done;
1348 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001349
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001350 /* Optimize end cases */
1351 if (va.len == 0 && PyBytes_CheckExact(b)) {
1352 result = b;
1353 Py_INCREF(result);
1354 goto done;
1355 }
1356 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1357 result = a;
1358 Py_INCREF(result);
1359 goto done;
1360 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001361
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001362 size = va.len + vb.len;
1363 if (size < 0) {
1364 PyErr_NoMemory();
1365 goto done;
1366 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001367
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001368 result = PyBytes_FromStringAndSize(NULL, size);
1369 if (result != NULL) {
1370 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1371 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1372 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001373
1374 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001375 if (va.len != -1)
1376 PyBuffer_Release(&va);
1377 if (vb.len != -1)
1378 PyBuffer_Release(&vb);
1379 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001380}
Neal Norwitz6968b052007-02-27 19:02:19 +00001381
1382static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001383bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001384{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001385 Py_ssize_t i;
1386 Py_ssize_t j;
1387 Py_ssize_t size;
1388 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001389 size_t nbytes;
1390 if (n < 0)
1391 n = 0;
1392 /* watch out for overflows: the size can overflow int,
1393 * and the # of bytes needed can overflow size_t
1394 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001395 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001396 PyErr_SetString(PyExc_OverflowError,
1397 "repeated bytes are too long");
1398 return NULL;
1399 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001400 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001401 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1402 Py_INCREF(a);
1403 return (PyObject *)a;
1404 }
1405 nbytes = (size_t)size;
1406 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1407 PyErr_SetString(PyExc_OverflowError,
1408 "repeated bytes are too long");
1409 return NULL;
1410 }
1411 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1412 if (op == NULL)
1413 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001414 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 op->ob_shash = -1;
1416 op->ob_sval[size] = '\0';
1417 if (Py_SIZE(a) == 1 && n > 0) {
1418 memset(op->ob_sval, a->ob_sval[0] , n);
1419 return (PyObject *) op;
1420 }
1421 i = 0;
1422 if (i < size) {
1423 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1424 i = Py_SIZE(a);
1425 }
1426 while (i < size) {
1427 j = (i <= size-i) ? i : size-i;
1428 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1429 i += j;
1430 }
1431 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001432}
1433
Guido van Rossum98297ee2007-11-06 21:34:58 +00001434static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001435bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001436{
1437 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1438 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001439 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +00001440 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +00001441 PyErr_Clear();
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001442 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
Antoine Pitroud1188562010-06-09 16:38:55 +00001443 return -1;
1444 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
1445 varg.buf, varg.len, 0);
1446 PyBuffer_Release(&varg);
1447 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001448 }
1449 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001450 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1451 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001452 }
1453
Antoine Pitrou0010d372010-08-15 17:12:55 +00001454 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001455}
1456
Neal Norwitz6968b052007-02-27 19:02:19 +00001457static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001458bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001459{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001460 if (i < 0 || i >= Py_SIZE(a)) {
1461 PyErr_SetString(PyExc_IndexError, "index out of range");
1462 return NULL;
1463 }
1464 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001465}
1466
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001467Py_LOCAL(int)
1468bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1469{
1470 int cmp;
1471 Py_ssize_t len;
1472
1473 len = Py_SIZE(a);
1474 if (Py_SIZE(b) != len)
1475 return 0;
1476
1477 if (a->ob_sval[0] != b->ob_sval[0])
1478 return 0;
1479
1480 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1481 return (cmp == 0);
1482}
1483
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001484static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001485bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001486{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001487 int c;
1488 Py_ssize_t len_a, len_b;
1489 Py_ssize_t min_len;
1490 PyObject *result;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001491 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001492
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001493 /* Make sure both arguments are strings. */
1494 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001495 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001496 rc = PyObject_IsInstance((PyObject*)a,
1497 (PyObject*)&PyUnicode_Type);
1498 if (!rc)
1499 rc = PyObject_IsInstance((PyObject*)b,
1500 (PyObject*)&PyUnicode_Type);
1501 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001502 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001503 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001504 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001505 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001506 return NULL;
1507 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001508 else {
1509 rc = PyObject_IsInstance((PyObject*)a,
1510 (PyObject*)&PyLong_Type);
1511 if (!rc)
1512 rc = PyObject_IsInstance((PyObject*)b,
1513 (PyObject*)&PyLong_Type);
1514 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001515 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001516 if (rc) {
1517 if (PyErr_WarnEx(PyExc_BytesWarning,
1518 "Comparison between bytes and int", 1))
1519 return NULL;
1520 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001521 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001522 }
1523 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001524 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001525 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001526 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001527 case Py_EQ:
1528 case Py_LE:
1529 case Py_GE:
1530 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001531 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001532 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001533 case Py_NE:
1534 case Py_LT:
1535 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001536 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001537 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001538 default:
1539 PyErr_BadArgument();
1540 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001541 }
1542 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001543 else if (op == Py_EQ || op == Py_NE) {
1544 int eq = bytes_compare_eq(a, b);
1545 eq ^= (op == Py_NE);
1546 result = eq ? Py_True : Py_False;
1547 }
1548 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001549 len_a = Py_SIZE(a);
1550 len_b = Py_SIZE(b);
1551 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001552 if (min_len > 0) {
1553 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001554 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001555 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001556 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001557 else
1558 c = 0;
1559 if (c == 0)
1560 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1561 switch (op) {
1562 case Py_LT: c = c < 0; break;
1563 case Py_LE: c = c <= 0; break;
1564 case Py_GT: c = c > 0; break;
1565 case Py_GE: c = c >= 0; break;
1566 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001567 PyErr_BadArgument();
1568 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001569 }
1570 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001571 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001572
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001573 Py_INCREF(result);
1574 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001575}
1576
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001577static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001578bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001579{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001580 if (a->ob_shash == -1) {
1581 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001582 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001583 }
1584 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001585}
1586
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001587static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001588bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001589{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001590 if (PyIndex_Check(item)) {
1591 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1592 if (i == -1 && PyErr_Occurred())
1593 return NULL;
1594 if (i < 0)
1595 i += PyBytes_GET_SIZE(self);
1596 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1597 PyErr_SetString(PyExc_IndexError,
1598 "index out of range");
1599 return NULL;
1600 }
1601 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1602 }
1603 else if (PySlice_Check(item)) {
1604 Py_ssize_t start, stop, step, slicelength, cur, i;
1605 char* source_buf;
1606 char* result_buf;
1607 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001608
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001609 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001610 PyBytes_GET_SIZE(self),
1611 &start, &stop, &step, &slicelength) < 0) {
1612 return NULL;
1613 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001614
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001615 if (slicelength <= 0) {
1616 return PyBytes_FromStringAndSize("", 0);
1617 }
1618 else if (start == 0 && step == 1 &&
1619 slicelength == PyBytes_GET_SIZE(self) &&
1620 PyBytes_CheckExact(self)) {
1621 Py_INCREF(self);
1622 return (PyObject *)self;
1623 }
1624 else if (step == 1) {
1625 return PyBytes_FromStringAndSize(
1626 PyBytes_AS_STRING(self) + start,
1627 slicelength);
1628 }
1629 else {
1630 source_buf = PyBytes_AS_STRING(self);
1631 result = PyBytes_FromStringAndSize(NULL, slicelength);
1632 if (result == NULL)
1633 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001634
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001635 result_buf = PyBytes_AS_STRING(result);
1636 for (cur = start, i = 0; i < slicelength;
1637 cur += step, i++) {
1638 result_buf[i] = source_buf[cur];
1639 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001640
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001641 return result;
1642 }
1643 }
1644 else {
1645 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001646 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001647 Py_TYPE(item)->tp_name);
1648 return NULL;
1649 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001650}
1651
1652static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001653bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001654{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001655 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1656 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001657}
1658
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001659static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001660 (lenfunc)bytes_length, /*sq_length*/
1661 (binaryfunc)bytes_concat, /*sq_concat*/
1662 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1663 (ssizeargfunc)bytes_item, /*sq_item*/
1664 0, /*sq_slice*/
1665 0, /*sq_ass_item*/
1666 0, /*sq_ass_slice*/
1667 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001668};
1669
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001670static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001671 (lenfunc)bytes_length,
1672 (binaryfunc)bytes_subscript,
1673 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001674};
1675
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001676static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001677 (getbufferproc)bytes_buffer_getbuffer,
1678 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001679};
1680
1681
1682#define LEFTSTRIP 0
1683#define RIGHTSTRIP 1
1684#define BOTHSTRIP 2
1685
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001686/*[clinic input]
1687bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001688
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001689 sep: object = None
1690 The delimiter according which to split the bytes.
1691 None (the default value) means split on ASCII whitespace characters
1692 (space, tab, return, newline, formfeed, vertical tab).
1693 maxsplit: Py_ssize_t = -1
1694 Maximum number of splits to do.
1695 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001696
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001697Return a list of the sections in the bytes, using sep as the delimiter.
1698[clinic start generated code]*/
1699
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001700static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001701bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001702/*[clinic end generated code: output=8bde44dacb36ef2e input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001703{
1704 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001705 const char *s = PyBytes_AS_STRING(self), *sub;
1706 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001707 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001708
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001709 if (maxsplit < 0)
1710 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001711 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001712 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001713 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001714 return NULL;
1715 sub = vsub.buf;
1716 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001717
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001718 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1719 PyBuffer_Release(&vsub);
1720 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001721}
1722
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001723/*[clinic input]
1724bytes.partition
1725
1726 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001727 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001728 /
1729
1730Partition the bytes into three parts using the given separator.
1731
1732This will search for the separator sep in the bytes. If the separator is found,
1733returns a 3-tuple containing the part before the separator, the separator
1734itself, and the part after it.
1735
1736If the separator is not found, returns a 3-tuple containing the original bytes
1737object and two empty bytes objects.
1738[clinic start generated code]*/
1739
Neal Norwitz6968b052007-02-27 19:02:19 +00001740static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001741bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001742/*[clinic end generated code: output=f532b392a17ff695 input=bc855dc63ca949de]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001743{
Neal Norwitz6968b052007-02-27 19:02:19 +00001744 return stringlib_partition(
1745 (PyObject*) self,
1746 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001747 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001748 );
1749}
1750
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001751/*[clinic input]
1752bytes.rpartition
1753
1754 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001755 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001756 /
1757
1758Partition the bytes into three parts using the given separator.
1759
1760This will search for the separator sep in the bytes, starting and the end. If
1761the separator is found, returns a 3-tuple containing the part before the
1762separator, the separator itself, and the part after it.
1763
1764If the separator is not found, returns a 3-tuple containing two empty bytes
1765objects and the original bytes object.
1766[clinic start generated code]*/
1767
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001768static PyObject *
1769bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001770/*[clinic end generated code: output=191b114cbb028e50 input=6588fff262a9170e]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001771{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001772 return stringlib_rpartition(
1773 (PyObject*) self,
1774 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001775 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001776 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001777}
1778
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001779/*[clinic input]
1780bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001781
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001782Return a list of the sections in the bytes, using sep as the delimiter.
1783
1784Splitting is done starting at the end of the bytes and working to the front.
1785[clinic start generated code]*/
1786
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001787static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001788bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001789/*[clinic end generated code: output=0b6570b977911d88 input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001790{
1791 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001792 const char *s = PyBytes_AS_STRING(self), *sub;
1793 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001794 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001795
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001796 if (maxsplit < 0)
1797 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001798 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001799 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001800 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001801 return NULL;
1802 sub = vsub.buf;
1803 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001804
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001805 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1806 PyBuffer_Release(&vsub);
1807 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001808}
1809
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001810
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001811/*[clinic input]
1812bytes.join
1813
1814 iterable_of_bytes: object
1815 /
1816
1817Concatenate any number of bytes objects.
1818
1819The bytes whose method is called is inserted in between each pair.
1820
1821The result is returned as a new bytes object.
1822
1823Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1824[clinic start generated code]*/
1825
Neal Norwitz6968b052007-02-27 19:02:19 +00001826static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001827bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001828/*[clinic end generated code: output=634aff14764ff997 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001829{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001830 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001831}
1832
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001833PyObject *
1834_PyBytes_Join(PyObject *sep, PyObject *x)
1835{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001836 assert(sep != NULL && PyBytes_Check(sep));
1837 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001838 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001839}
1840
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001841/* helper macro to fixup start/end slice values */
1842#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001843 if (end > len) \
1844 end = len; \
1845 else if (end < 0) { \
1846 end += len; \
1847 if (end < 0) \
1848 end = 0; \
1849 } \
1850 if (start < 0) { \
1851 start += len; \
1852 if (start < 0) \
1853 start = 0; \
1854 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001855
1856Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001857bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001858{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001859 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001860 char byte;
1861 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001862 const char *sub;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001863 Py_ssize_t len, sub_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001864 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001865 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001866
Antoine Pitrouac65d962011-10-20 23:54:17 +02001867 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1868 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001869 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001870
Antoine Pitrouac65d962011-10-20 23:54:17 +02001871 if (subobj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001872 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001873 return -2;
1874
1875 sub = subbuf.buf;
1876 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001877 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001878 else {
1879 sub = &byte;
1880 sub_len = 1;
1881 }
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001882 len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001883
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001884 ADJUST_INDICES(start, end, len);
1885 if (end - start < sub_len)
1886 res = -1;
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001887 else if (sub_len == 1
1888#ifndef HAVE_MEMRCHR
1889 && dir > 0
1890#endif
1891 ) {
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001892 unsigned char needle = *sub;
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001893 int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001894 res = stringlib_fastsearch_memchr_1char(
1895 PyBytes_AS_STRING(self) + start, end - start,
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001896 needle, needle, mode);
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001897 if (res >= 0)
1898 res += start;
1899 }
1900 else {
1901 if (dir > 0)
1902 res = stringlib_find_slice(
1903 PyBytes_AS_STRING(self), len,
1904 sub, sub_len, start, end);
1905 else
1906 res = stringlib_rfind_slice(
1907 PyBytes_AS_STRING(self), len,
1908 sub, sub_len, start, end);
1909 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001910
1911 if (subobj)
1912 PyBuffer_Release(&subbuf);
1913
1914 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001915}
1916
1917
1918PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001919"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001920\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001921Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001922such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001923arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001924\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001925Return -1 on failure.");
1926
Neal Norwitz6968b052007-02-27 19:02:19 +00001927static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001928bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001929{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001930 Py_ssize_t result = bytes_find_internal(self, args, +1);
1931 if (result == -2)
1932 return NULL;
1933 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001934}
1935
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001936
1937PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001938"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001939\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001940Like B.find() but raise ValueError when the substring is not found.");
1941
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001942static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001943bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001944{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001945 Py_ssize_t result = bytes_find_internal(self, args, +1);
1946 if (result == -2)
1947 return NULL;
1948 if (result == -1) {
1949 PyErr_SetString(PyExc_ValueError,
1950 "substring not found");
1951 return NULL;
1952 }
1953 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001954}
1955
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001956
1957PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001958"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001959\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001960Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001961such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001962arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001963\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001964Return -1 on failure.");
1965
Neal Norwitz6968b052007-02-27 19:02:19 +00001966static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001967bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001968{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001969 Py_ssize_t result = bytes_find_internal(self, args, -1);
1970 if (result == -2)
1971 return NULL;
1972 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001973}
1974
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001975
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001976PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001977"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001978\n\
1979Like B.rfind() but raise ValueError when the substring is not found.");
1980
1981static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001982bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001983{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001984 Py_ssize_t result = bytes_find_internal(self, args, -1);
1985 if (result == -2)
1986 return NULL;
1987 if (result == -1) {
1988 PyErr_SetString(PyExc_ValueError,
1989 "substring not found");
1990 return NULL;
1991 }
1992 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001993}
1994
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001995
1996Py_LOCAL_INLINE(PyObject *)
1997do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001998{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001999 Py_buffer vsep;
2000 char *s = PyBytes_AS_STRING(self);
2001 Py_ssize_t len = PyBytes_GET_SIZE(self);
2002 char *sep;
2003 Py_ssize_t seplen;
2004 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002005
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002006 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002007 return NULL;
2008 sep = vsep.buf;
2009 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002010
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002011 i = 0;
2012 if (striptype != RIGHTSTRIP) {
2013 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2014 i++;
2015 }
2016 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002017
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002018 j = len;
2019 if (striptype != LEFTSTRIP) {
2020 do {
2021 j--;
2022 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2023 j++;
2024 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002025
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002026 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002027
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002028 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2029 Py_INCREF(self);
2030 return (PyObject*)self;
2031 }
2032 else
2033 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002034}
2035
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002036
2037Py_LOCAL_INLINE(PyObject *)
2038do_strip(PyBytesObject *self, int striptype)
2039{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002040 char *s = PyBytes_AS_STRING(self);
2041 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002042
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002043 i = 0;
2044 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00002045 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002046 i++;
2047 }
2048 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002049
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002050 j = len;
2051 if (striptype != LEFTSTRIP) {
2052 do {
2053 j--;
David Malcolm96960882010-11-05 17:23:41 +00002054 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002055 j++;
2056 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002057
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002058 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2059 Py_INCREF(self);
2060 return (PyObject*)self;
2061 }
2062 else
2063 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002064}
2065
2066
2067Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002068do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002069{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002070 if (bytes != NULL && bytes != Py_None) {
2071 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002072 }
2073 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002074}
2075
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002076/*[clinic input]
2077bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002078
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002079 self: self(type="PyBytesObject *")
2080 bytes: object = None
2081 /
2082
2083Strip leading and trailing bytes contained in the argument.
2084
2085If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2086[clinic start generated code]*/
2087
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002088static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002089bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002090/*[clinic end generated code: output=c7c228d3bd104a1b input=37daa5fad1395d95]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002091{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002092 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002093}
2094
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002095/*[clinic input]
2096bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002097
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002098 self: self(type="PyBytesObject *")
2099 bytes: object = None
2100 /
2101
2102Strip leading bytes contained in the argument.
2103
2104If the argument is omitted or None, strip leading ASCII whitespace.
2105[clinic start generated code]*/
2106
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002107static PyObject *
2108bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002109/*[clinic end generated code: output=28602e586f524e82 input=88811b09dfbc2988]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002110{
2111 return do_argstrip(self, LEFTSTRIP, bytes);
2112}
2113
2114/*[clinic input]
2115bytes.rstrip
2116
2117 self: self(type="PyBytesObject *")
2118 bytes: object = None
2119 /
2120
2121Strip trailing bytes contained in the argument.
2122
2123If the argument is omitted or None, strip trailing ASCII whitespace.
2124[clinic start generated code]*/
2125
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002126static PyObject *
2127bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002128/*[clinic end generated code: output=547e3815c95447da input=8f93c9cd361f0140]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002129{
2130 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002131}
Neal Norwitz6968b052007-02-27 19:02:19 +00002132
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002133
2134PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002135"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002136\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002137Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002138string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002139as in slice notation.");
2140
2141static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002142bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002143{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002144 PyObject *sub_obj;
2145 const char *str = PyBytes_AS_STRING(self), *sub;
2146 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02002147 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002148 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002149
Antoine Pitrouac65d962011-10-20 23:54:17 +02002150 Py_buffer vsub;
2151 PyObject *count_obj;
2152
2153 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
2154 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002155 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002156
Antoine Pitrouac65d962011-10-20 23:54:17 +02002157 if (sub_obj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002158 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02002159 return NULL;
2160
2161 sub = vsub.buf;
2162 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002163 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02002164 else {
2165 sub = &byte;
2166 sub_len = 1;
2167 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002168
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002169 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002170
Antoine Pitrouac65d962011-10-20 23:54:17 +02002171 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002172 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2173 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02002174
2175 if (sub_obj)
2176 PyBuffer_Release(&vsub);
2177
2178 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002179}
2180
2181
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002182/*[clinic input]
2183bytes.translate
2184
2185 self: self(type="PyBytesObject *")
Victor Stinner049e5092014-08-17 22:20:00 +02002186 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002187 Translation table, which must be a bytes object of length 256.
2188 [
2189 deletechars: object
2190 ]
2191 /
2192
2193Return a copy with each character mapped by the given translation table.
2194
2195All characters occurring in the optional argument deletechars are removed.
2196The remaining characters are mapped through the given translation table.
2197[clinic start generated code]*/
2198
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002199static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002200bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1,
2201 PyObject *deletechars)
2202/*[clinic end generated code: output=233df850eb50bf8d input=d8fa5519d7cc4be7]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002203{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002204 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002205 Py_buffer table_view = {NULL, NULL};
2206 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002207 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002208 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002209 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002210 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002211 Py_ssize_t inlen, tablen, dellen = 0;
2212 PyObject *result;
2213 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002214
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002215 if (PyBytes_Check(table)) {
2216 table_chars = PyBytes_AS_STRING(table);
2217 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002218 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002219 else if (table == Py_None) {
2220 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002221 tablen = 256;
2222 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002223 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002224 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002225 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002226 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002227 tablen = table_view.len;
2228 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002229
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002230 if (tablen != 256) {
2231 PyErr_SetString(PyExc_ValueError,
2232 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002233 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002234 return NULL;
2235 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002236
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002237 if (deletechars != NULL) {
2238 if (PyBytes_Check(deletechars)) {
2239 del_table_chars = PyBytes_AS_STRING(deletechars);
2240 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002241 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002242 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002243 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002244 PyBuffer_Release(&table_view);
2245 return NULL;
2246 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002247 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002248 dellen = del_table_view.len;
2249 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002250 }
2251 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002252 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002253 dellen = 0;
2254 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002255
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002256 inlen = PyBytes_GET_SIZE(input_obj);
2257 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002258 if (result == NULL) {
2259 PyBuffer_Release(&del_table_view);
2260 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002261 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002262 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002263 output_start = output = PyBytes_AsString(result);
2264 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002265
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002266 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002267 /* If no deletions are required, use faster code */
2268 for (i = inlen; --i >= 0; ) {
2269 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002270 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002271 changed = 1;
2272 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002273 if (!changed && PyBytes_CheckExact(input_obj)) {
2274 Py_INCREF(input_obj);
2275 Py_DECREF(result);
2276 result = input_obj;
2277 }
2278 PyBuffer_Release(&del_table_view);
2279 PyBuffer_Release(&table_view);
2280 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002281 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002282
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002283 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002284 for (i = 0; i < 256; i++)
2285 trans_table[i] = Py_CHARMASK(i);
2286 } else {
2287 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002288 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002289 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002290 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002291
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002292 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002293 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002294 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002295
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002296 for (i = inlen; --i >= 0; ) {
2297 c = Py_CHARMASK(*input++);
2298 if (trans_table[c] != -1)
2299 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2300 continue;
2301 changed = 1;
2302 }
2303 if (!changed && PyBytes_CheckExact(input_obj)) {
2304 Py_DECREF(result);
2305 Py_INCREF(input_obj);
2306 return input_obj;
2307 }
2308 /* Fix the size of the resulting string */
2309 if (inlen > 0)
2310 _PyBytes_Resize(&result, output - output_start);
2311 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002312}
2313
2314
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002315/*[clinic input]
2316
2317@staticmethod
2318bytes.maketrans
2319
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002320 frm: Py_buffer
2321 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002322 /
2323
2324Return a translation table useable for the bytes or bytearray translate method.
2325
2326The returned table will be one where each byte in frm is mapped to the byte at
2327the same position in to.
2328
2329The bytes objects frm and to must be of the same length.
2330[clinic start generated code]*/
2331
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002332static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002333bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002334/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002335{
2336 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002337}
2338
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002339/* find and count characters and substrings */
2340
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002341#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002342 ((char *)memchr((const void *)(target), c, target_len))
2343
2344/* String ops must return a string. */
2345/* If the object is subclass of string, create a copy */
2346Py_LOCAL(PyBytesObject *)
2347return_self(PyBytesObject *self)
2348{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002349 if (PyBytes_CheckExact(self)) {
2350 Py_INCREF(self);
2351 return self;
2352 }
2353 return (PyBytesObject *)PyBytes_FromStringAndSize(
2354 PyBytes_AS_STRING(self),
2355 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002356}
2357
2358Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00002359countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002360{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002361 Py_ssize_t count=0;
2362 const char *start=target;
2363 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002364
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002365 while ( (start=findchar(start, end-start, c)) != NULL ) {
2366 count++;
2367 if (count >= maxcount)
2368 break;
2369 start += 1;
2370 }
2371 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002372}
2373
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002374
2375/* Algorithms for different cases of string replacement */
2376
2377/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2378Py_LOCAL(PyBytesObject *)
2379replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002380 const char *to_s, Py_ssize_t to_len,
2381 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002382{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002383 char *self_s, *result_s;
2384 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002385 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002386 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002387
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002388 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002389
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002390 /* 1 at the end plus 1 after every character;
2391 count = min(maxcount, self_len + 1) */
2392 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002393 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002394 else
2395 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2396 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002397
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002398 /* Check for overflow */
2399 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002400 assert(count > 0);
2401 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002402 PyErr_SetString(PyExc_OverflowError,
2403 "replacement bytes are too long");
2404 return NULL;
2405 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002406 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002407
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002408 if (! (result = (PyBytesObject *)
2409 PyBytes_FromStringAndSize(NULL, result_len)) )
2410 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002411
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002412 self_s = PyBytes_AS_STRING(self);
2413 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002414
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002415 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002416
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002417 /* Lay the first one down (guaranteed this will occur) */
2418 Py_MEMCPY(result_s, to_s, to_len);
2419 result_s += to_len;
2420 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002422 for (i=0; i<count; i++) {
2423 *result_s++ = *self_s++;
2424 Py_MEMCPY(result_s, to_s, to_len);
2425 result_s += to_len;
2426 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002427
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002428 /* Copy the rest of the original string */
2429 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002430
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002431 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002432}
2433
2434/* Special case for deleting a single character */
2435/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2436Py_LOCAL(PyBytesObject *)
2437replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002438 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002439{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002440 char *self_s, *result_s;
2441 char *start, *next, *end;
2442 Py_ssize_t self_len, result_len;
2443 Py_ssize_t count;
2444 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002445
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002446 self_len = PyBytes_GET_SIZE(self);
2447 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002448
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002449 count = countchar(self_s, self_len, from_c, maxcount);
2450 if (count == 0) {
2451 return return_self(self);
2452 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002453
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002454 result_len = self_len - count; /* from_len == 1 */
2455 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002456
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002457 if ( (result = (PyBytesObject *)
2458 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2459 return NULL;
2460 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002461
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002462 start = self_s;
2463 end = self_s + self_len;
2464 while (count-- > 0) {
2465 next = findchar(start, end-start, from_c);
2466 if (next == NULL)
2467 break;
2468 Py_MEMCPY(result_s, start, next-start);
2469 result_s += (next-start);
2470 start = next+1;
2471 }
2472 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002473
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002474 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002475}
2476
2477/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2478
2479Py_LOCAL(PyBytesObject *)
2480replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002481 const char *from_s, Py_ssize_t from_len,
2482 Py_ssize_t maxcount) {
2483 char *self_s, *result_s;
2484 char *start, *next, *end;
2485 Py_ssize_t self_len, result_len;
2486 Py_ssize_t count, offset;
2487 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002488
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002489 self_len = PyBytes_GET_SIZE(self);
2490 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002491
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002492 count = stringlib_count(self_s, self_len,
2493 from_s, from_len,
2494 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002495
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002496 if (count == 0) {
2497 /* no matches */
2498 return return_self(self);
2499 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002500
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002501 result_len = self_len - (count * from_len);
2502 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002503
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002504 if ( (result = (PyBytesObject *)
2505 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2506 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002507
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002508 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002509
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002510 start = self_s;
2511 end = self_s + self_len;
2512 while (count-- > 0) {
2513 offset = stringlib_find(start, end-start,
2514 from_s, from_len,
2515 0);
2516 if (offset == -1)
2517 break;
2518 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002519
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002520 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002521
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002522 result_s += (next-start);
2523 start = next+from_len;
2524 }
2525 Py_MEMCPY(result_s, start, end-start);
2526 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002527}
2528
2529/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2530Py_LOCAL(PyBytesObject *)
2531replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002532 char from_c, char to_c,
2533 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002534{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002535 char *self_s, *result_s, *start, *end, *next;
2536 Py_ssize_t self_len;
2537 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002538
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002539 /* The result string will be the same size */
2540 self_s = PyBytes_AS_STRING(self);
2541 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002542
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002543 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002544
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002545 if (next == NULL) {
2546 /* No matches; return the original string */
2547 return return_self(self);
2548 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002549
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002550 /* Need to make a new string */
2551 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2552 if (result == NULL)
2553 return NULL;
2554 result_s = PyBytes_AS_STRING(result);
2555 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002556
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002557 /* change everything in-place, starting with this one */
2558 start = result_s + (next-self_s);
2559 *start = to_c;
2560 start++;
2561 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002562
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002563 while (--maxcount > 0) {
2564 next = findchar(start, end-start, from_c);
2565 if (next == NULL)
2566 break;
2567 *next = to_c;
2568 start = next+1;
2569 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002570
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002571 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002572}
2573
2574/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2575Py_LOCAL(PyBytesObject *)
2576replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002577 const char *from_s, Py_ssize_t from_len,
2578 const char *to_s, Py_ssize_t to_len,
2579 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002580{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002581 char *result_s, *start, *end;
2582 char *self_s;
2583 Py_ssize_t self_len, offset;
2584 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002585
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002586 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002587
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002588 self_s = PyBytes_AS_STRING(self);
2589 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002590
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002591 offset = stringlib_find(self_s, self_len,
2592 from_s, from_len,
2593 0);
2594 if (offset == -1) {
2595 /* No matches; return the original string */
2596 return return_self(self);
2597 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002598
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002599 /* Need to make a new string */
2600 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2601 if (result == NULL)
2602 return NULL;
2603 result_s = PyBytes_AS_STRING(result);
2604 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002605
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002606 /* change everything in-place, starting with this one */
2607 start = result_s + offset;
2608 Py_MEMCPY(start, to_s, from_len);
2609 start += from_len;
2610 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002611
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002612 while ( --maxcount > 0) {
2613 offset = stringlib_find(start, end-start,
2614 from_s, from_len,
2615 0);
2616 if (offset==-1)
2617 break;
2618 Py_MEMCPY(start+offset, to_s, from_len);
2619 start += offset+from_len;
2620 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002621
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002622 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002623}
2624
2625/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2626Py_LOCAL(PyBytesObject *)
2627replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002628 char from_c,
2629 const char *to_s, Py_ssize_t to_len,
2630 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002631{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002632 char *self_s, *result_s;
2633 char *start, *next, *end;
2634 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002635 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002636 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002637
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002638 self_s = PyBytes_AS_STRING(self);
2639 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002640
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002641 count = countchar(self_s, self_len, from_c, maxcount);
2642 if (count == 0) {
2643 /* no matches, return unchanged */
2644 return return_self(self);
2645 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002646
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002647 /* use the difference between current and new, hence the "-1" */
2648 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002649 assert(count > 0);
2650 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002651 PyErr_SetString(PyExc_OverflowError,
2652 "replacement bytes are too long");
2653 return NULL;
2654 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002655 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002656
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002657 if ( (result = (PyBytesObject *)
2658 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2659 return NULL;
2660 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002661
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002662 start = self_s;
2663 end = self_s + self_len;
2664 while (count-- > 0) {
2665 next = findchar(start, end-start, from_c);
2666 if (next == NULL)
2667 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002668
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002669 if (next == start) {
2670 /* replace with the 'to' */
2671 Py_MEMCPY(result_s, to_s, to_len);
2672 result_s += to_len;
2673 start += 1;
2674 } else {
2675 /* copy the unchanged old then the 'to' */
2676 Py_MEMCPY(result_s, start, next-start);
2677 result_s += (next-start);
2678 Py_MEMCPY(result_s, to_s, to_len);
2679 result_s += to_len;
2680 start = next+1;
2681 }
2682 }
2683 /* Copy the remainder of the remaining string */
2684 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002685
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002686 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002687}
2688
2689/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2690Py_LOCAL(PyBytesObject *)
2691replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002692 const char *from_s, Py_ssize_t from_len,
2693 const char *to_s, Py_ssize_t to_len,
2694 Py_ssize_t maxcount) {
2695 char *self_s, *result_s;
2696 char *start, *next, *end;
2697 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002698 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002699 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002700
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002701 self_s = PyBytes_AS_STRING(self);
2702 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002703
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002704 count = stringlib_count(self_s, self_len,
2705 from_s, from_len,
2706 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002707
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002708 if (count == 0) {
2709 /* no matches, return unchanged */
2710 return return_self(self);
2711 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002712
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002713 /* Check for overflow */
2714 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002715 assert(count > 0);
2716 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002717 PyErr_SetString(PyExc_OverflowError,
2718 "replacement bytes are too long");
2719 return NULL;
2720 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002721 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002722
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002723 if ( (result = (PyBytesObject *)
2724 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2725 return NULL;
2726 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002727
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002728 start = self_s;
2729 end = self_s + self_len;
2730 while (count-- > 0) {
2731 offset = stringlib_find(start, end-start,
2732 from_s, from_len,
2733 0);
2734 if (offset == -1)
2735 break;
2736 next = start+offset;
2737 if (next == start) {
2738 /* replace with the 'to' */
2739 Py_MEMCPY(result_s, to_s, to_len);
2740 result_s += to_len;
2741 start += from_len;
2742 } else {
2743 /* copy the unchanged old then the 'to' */
2744 Py_MEMCPY(result_s, start, next-start);
2745 result_s += (next-start);
2746 Py_MEMCPY(result_s, to_s, to_len);
2747 result_s += to_len;
2748 start = next+from_len;
2749 }
2750 }
2751 /* Copy the remainder of the remaining string */
2752 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002753
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002754 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002755}
2756
2757
2758Py_LOCAL(PyBytesObject *)
2759replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002760 const char *from_s, Py_ssize_t from_len,
2761 const char *to_s, Py_ssize_t to_len,
2762 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002763{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002764 if (maxcount < 0) {
2765 maxcount = PY_SSIZE_T_MAX;
2766 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2767 /* nothing to do; return the original string */
2768 return return_self(self);
2769 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002770
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002771 if (maxcount == 0 ||
2772 (from_len == 0 && to_len == 0)) {
2773 /* nothing to do; return the original string */
2774 return return_self(self);
2775 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002776
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002777 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002778
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002779 if (from_len == 0) {
2780 /* insert the 'to' string everywhere. */
2781 /* >>> "Python".replace("", ".") */
2782 /* '.P.y.t.h.o.n.' */
2783 return replace_interleave(self, to_s, to_len, maxcount);
2784 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002785
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002786 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2787 /* point for an empty self string to generate a non-empty string */
2788 /* Special case so the remaining code always gets a non-empty string */
2789 if (PyBytes_GET_SIZE(self) == 0) {
2790 return return_self(self);
2791 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002792
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002793 if (to_len == 0) {
2794 /* delete all occurrences of 'from' string */
2795 if (from_len == 1) {
2796 return replace_delete_single_character(
2797 self, from_s[0], maxcount);
2798 } else {
2799 return replace_delete_substring(self, from_s,
2800 from_len, maxcount);
2801 }
2802 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002803
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002804 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002805
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002806 if (from_len == to_len) {
2807 if (from_len == 1) {
2808 return replace_single_character_in_place(
2809 self,
2810 from_s[0],
2811 to_s[0],
2812 maxcount);
2813 } else {
2814 return replace_substring_in_place(
2815 self, from_s, from_len, to_s, to_len,
2816 maxcount);
2817 }
2818 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002819
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002820 /* Otherwise use the more generic algorithms */
2821 if (from_len == 1) {
2822 return replace_single_character(self, from_s[0],
2823 to_s, to_len, maxcount);
2824 } else {
2825 /* len('from')>=2, len('to')>=1 */
2826 return replace_substring(self, from_s, from_len, to_s, to_len,
2827 maxcount);
2828 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002829}
2830
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002831
2832/*[clinic input]
2833bytes.replace
2834
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002835 old: Py_buffer
2836 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002837 count: Py_ssize_t = -1
2838 Maximum number of occurrences to replace.
2839 -1 (the default value) means replace all occurrences.
2840 /
2841
2842Return a copy with all occurrences of substring old replaced by new.
2843
2844If the optional argument count is given, only the first count occurrences are
2845replaced.
2846[clinic start generated code]*/
2847
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002848static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002849bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new,
2850 Py_ssize_t count)
2851/*[clinic end generated code: output=403dc9d7a83c5a1d input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002852{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002853 return (PyObject *)replace((PyBytesObject *) self,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002854 (const char *)old->buf, old->len,
2855 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002856}
2857
2858/** End DALKE **/
2859
2860/* Matches the end (direction >= 0) or start (direction < 0) of self
2861 * against substr, using the start and end arguments. Returns
2862 * -1 on error, 0 if not found and 1 if found.
2863 */
2864Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002865_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002866 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002867{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002868 Py_ssize_t len = PyBytes_GET_SIZE(self);
2869 Py_ssize_t slen;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002870 Py_buffer sub_view = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002871 const char* sub;
2872 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002873
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002874 if (PyBytes_Check(substr)) {
2875 sub = PyBytes_AS_STRING(substr);
2876 slen = PyBytes_GET_SIZE(substr);
2877 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002878 else {
2879 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
2880 return -1;
2881 sub = sub_view.buf;
2882 slen = sub_view.len;
2883 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002884 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002885
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002886 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002887
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002888 if (direction < 0) {
2889 /* startswith */
2890 if (start+slen > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002891 goto notfound;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002892 } else {
2893 /* endswith */
2894 if (end-start < slen || start > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002895 goto notfound;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002896
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002897 if (end-slen > start)
2898 start = end - slen;
2899 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002900 if (end-start < slen)
2901 goto notfound;
2902 if (memcmp(str+start, sub, slen) != 0)
2903 goto notfound;
2904
2905 PyBuffer_Release(&sub_view);
2906 return 1;
2907
2908notfound:
2909 PyBuffer_Release(&sub_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002910 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002911}
2912
2913
2914PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002915"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002916\n\
2917Return True if B starts with the specified prefix, False otherwise.\n\
2918With optional start, test B beginning at that position.\n\
2919With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002920prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002921
2922static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002923bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002924{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002925 Py_ssize_t start = 0;
2926 Py_ssize_t end = PY_SSIZE_T_MAX;
2927 PyObject *subobj;
2928 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002929
Jesus Ceaac451502011-04-20 17:09:23 +02002930 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002931 return NULL;
2932 if (PyTuple_Check(subobj)) {
2933 Py_ssize_t i;
2934 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2935 result = _bytes_tailmatch(self,
2936 PyTuple_GET_ITEM(subobj, i),
2937 start, end, -1);
2938 if (result == -1)
2939 return NULL;
2940 else if (result) {
2941 Py_RETURN_TRUE;
2942 }
2943 }
2944 Py_RETURN_FALSE;
2945 }
2946 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002947 if (result == -1) {
2948 if (PyErr_ExceptionMatches(PyExc_TypeError))
2949 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2950 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002951 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002952 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002953 else
2954 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002955}
2956
2957
2958PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002959"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002960\n\
2961Return True if B ends with the specified suffix, False otherwise.\n\
2962With optional start, test B beginning at that position.\n\
2963With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002964suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002965
2966static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002967bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002968{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002969 Py_ssize_t start = 0;
2970 Py_ssize_t end = PY_SSIZE_T_MAX;
2971 PyObject *subobj;
2972 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002973
Jesus Ceaac451502011-04-20 17:09:23 +02002974 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002975 return NULL;
2976 if (PyTuple_Check(subobj)) {
2977 Py_ssize_t i;
2978 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2979 result = _bytes_tailmatch(self,
2980 PyTuple_GET_ITEM(subobj, i),
2981 start, end, +1);
2982 if (result == -1)
2983 return NULL;
2984 else if (result) {
2985 Py_RETURN_TRUE;
2986 }
2987 }
2988 Py_RETURN_FALSE;
2989 }
2990 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002991 if (result == -1) {
2992 if (PyErr_ExceptionMatches(PyExc_TypeError))
2993 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2994 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002995 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002996 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002997 else
2998 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002999}
3000
3001
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003002/*[clinic input]
3003bytes.decode
3004
3005 encoding: str(c_default="NULL") = 'utf-8'
3006 The encoding with which to decode the bytes.
3007 errors: str(c_default="NULL") = 'strict'
3008 The error handling scheme to use for the handling of decoding errors.
3009 The default is 'strict' meaning that decoding errors raise a
3010 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
3011 as well as any other name registered with codecs.register_error that
3012 can handle UnicodeDecodeErrors.
3013
3014Decode the bytes using the codec registered for encoding.
3015[clinic start generated code]*/
3016
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003017static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04003018bytes_decode_impl(PyBytesObject*self, const char *encoding,
3019 const char *errors)
3020/*[clinic end generated code: output=2d2016ff8e0bb176 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003021{
Martin v. Löwis0efea322014-07-27 17:29:17 +02003022 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00003023}
3024
Guido van Rossum20188312006-05-05 15:15:40 +00003025
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003026/*[clinic input]
3027bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003028
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03003029 keepends: int(c_default="0") = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003030
3031Return a list of the lines in the bytes, breaking at line boundaries.
3032
3033Line breaks are not included in the resulting list unless keepends is given and
3034true.
3035[clinic start generated code]*/
3036
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003037static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003038bytes_splitlines_impl(PyBytesObject*self, int keepends)
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03003039/*[clinic end generated code: output=995c3598f7833cad input=7f4aac67144f9944]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003040{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003041 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00003042 (PyObject*) self, PyBytes_AS_STRING(self),
3043 PyBytes_GET_SIZE(self), keepends
3044 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003045}
3046
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003047static int
Victor Stinner6430fd52011-09-29 04:02:13 +02003048hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003049{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003050 if (c >= 128)
3051 return -1;
David Malcolm96960882010-11-05 17:23:41 +00003052 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003053 return c - '0';
3054 else {
David Malcolm96960882010-11-05 17:23:41 +00003055 if (Py_ISUPPER(c))
3056 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003057 if (c >= 'a' && c <= 'f')
3058 return c - 'a' + 10;
3059 }
3060 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003061}
3062
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003063/*[clinic input]
3064@classmethod
3065bytes.fromhex
3066
3067 string: unicode
3068 /
3069
3070Create a bytes object from a string of hexadecimal numbers.
3071
3072Spaces between two numbers are accepted.
3073Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
3074[clinic start generated code]*/
3075
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003076static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003077bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03003078/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003079{
3080 PyObject *newstring;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003081 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003082 Py_ssize_t hexlen, byteslen, i, j;
3083 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003084 void *data;
3085 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003086
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003087 assert(PyUnicode_Check(string));
3088 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003089 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003090 kind = PyUnicode_KIND(string);
3091 data = PyUnicode_DATA(string);
3092 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003093
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003094 byteslen = hexlen/2; /* This overestimates if there are spaces */
3095 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
3096 if (!newstring)
3097 return NULL;
3098 buf = PyBytes_AS_STRING(newstring);
3099 for (i = j = 0; i < hexlen; i += 2) {
3100 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003101 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003102 i++;
3103 if (i >= hexlen)
3104 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003105 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
3106 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003107 if (top == -1 || bot == -1) {
3108 PyErr_Format(PyExc_ValueError,
3109 "non-hexadecimal number found in "
3110 "fromhex() arg at position %zd", i);
3111 goto error;
3112 }
3113 buf[j++] = (top << 4) + bot;
3114 }
3115 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
3116 goto error;
3117 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003118
3119 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003120 Py_XDECREF(newstring);
3121 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003122}
3123
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003124PyDoc_STRVAR(hex__doc__,
3125"B.hex() -> string\n\
3126\n\
3127Create a string of hexadecimal numbers from a bytes object.\n\
3128Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
3129
3130static PyObject *
3131bytes_hex(PyBytesObject *self)
3132{
3133 char* argbuf = PyBytes_AS_STRING(self);
3134 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
3135 return _Py_strhex(argbuf, arglen);
3136}
3137
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003138static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003139bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003140{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003141 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003142}
3143
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003144
3145static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003146bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003147 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
3148 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3149 _Py_capitalize__doc__},
3150 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3151 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003152 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003153 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
3154 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02003155 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003156 expandtabs__doc__},
3157 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003158 BYTES_FROMHEX_METHODDEF
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003159 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003160 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3161 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3162 _Py_isalnum__doc__},
3163 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3164 _Py_isalpha__doc__},
3165 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3166 _Py_isdigit__doc__},
3167 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3168 _Py_islower__doc__},
3169 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3170 _Py_isspace__doc__},
3171 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3172 _Py_istitle__doc__},
3173 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3174 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003175 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003176 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3177 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003178 BYTES_LSTRIP_METHODDEF
3179 BYTES_MAKETRANS_METHODDEF
3180 BYTES_PARTITION_METHODDEF
3181 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003182 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3183 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3184 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003185 BYTES_RPARTITION_METHODDEF
3186 BYTES_RSPLIT_METHODDEF
3187 BYTES_RSTRIP_METHODDEF
3188 BYTES_SPLIT_METHODDEF
3189 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003190 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
3191 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003192 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003193 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3194 _Py_swapcase__doc__},
3195 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003196 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003197 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3198 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003199 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003200};
3201
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003202static PyObject *
Ethan Furmanb95b5612015-01-23 20:05:18 -08003203bytes_mod(PyObject *v, PyObject *w)
3204{
3205 if (!PyBytes_Check(v))
3206 Py_RETURN_NOTIMPLEMENTED;
3207 return _PyBytes_Format(v, w);
3208}
3209
3210static PyNumberMethods bytes_as_number = {
3211 0, /*nb_add*/
3212 0, /*nb_subtract*/
3213 0, /*nb_multiply*/
3214 bytes_mod, /*nb_remainder*/
3215};
3216
3217static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003218str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3219
3220static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003221bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003222{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003223 PyObject *x = NULL;
3224 const char *encoding = NULL;
3225 const char *errors = NULL;
3226 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003227 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003228 Py_ssize_t size;
3229 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003230 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003231
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003232 if (type != &PyBytes_Type)
3233 return str_subtype_new(type, args, kwds);
3234 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3235 &encoding, &errors))
3236 return NULL;
3237 if (x == NULL) {
3238 if (encoding != NULL || errors != NULL) {
3239 PyErr_SetString(PyExc_TypeError,
3240 "encoding or errors without sequence "
3241 "argument");
3242 return NULL;
3243 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003244 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003245 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003246
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003247 if (PyUnicode_Check(x)) {
3248 /* Encode via the codec registry */
3249 if (encoding == NULL) {
3250 PyErr_SetString(PyExc_TypeError,
3251 "string argument without an encoding");
3252 return NULL;
3253 }
3254 new = PyUnicode_AsEncodedString(x, encoding, errors);
3255 if (new == NULL)
3256 return NULL;
3257 assert(PyBytes_Check(new));
3258 return new;
3259 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003260
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003261 /* If it's not unicode, there can't be encoding or errors */
3262 if (encoding != NULL || errors != NULL) {
3263 PyErr_SetString(PyExc_TypeError,
3264 "encoding or errors without a string argument");
3265 return NULL;
3266 }
3267
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003268 /* We'd like to call PyObject_Bytes here, but we need to check for an
3269 integer argument before deferring to PyBytes_FromObject, something
3270 PyObject_Bytes doesn't do. */
3271 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3272 if (func != NULL) {
3273 new = PyObject_CallFunctionObjArgs(func, NULL);
3274 Py_DECREF(func);
3275 if (new == NULL)
3276 return NULL;
3277 if (!PyBytes_Check(new)) {
3278 PyErr_Format(PyExc_TypeError,
3279 "__bytes__ returned non-bytes (type %.200s)",
3280 Py_TYPE(new)->tp_name);
3281 Py_DECREF(new);
3282 return NULL;
3283 }
3284 return new;
3285 }
3286 else if (PyErr_Occurred())
3287 return NULL;
3288
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003289 /* Is it an integer? */
3290 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3291 if (size == -1 && PyErr_Occurred()) {
3292 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3293 return NULL;
3294 PyErr_Clear();
3295 }
3296 else if (size < 0) {
3297 PyErr_SetString(PyExc_ValueError, "negative count");
3298 return NULL;
3299 }
3300 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003301 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003302 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003303 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003304 return new;
3305 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003306
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003307 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003308}
3309
3310PyObject *
3311PyBytes_FromObject(PyObject *x)
3312{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003313 PyObject *new, *it;
3314 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003315
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003316 if (x == NULL) {
3317 PyErr_BadInternalCall();
3318 return NULL;
3319 }
Larry Hastingsca28e992012-05-24 22:58:30 -07003320
3321 if (PyBytes_CheckExact(x)) {
3322 Py_INCREF(x);
3323 return x;
3324 }
3325
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003326 /* Use the modern buffer interface */
3327 if (PyObject_CheckBuffer(x)) {
3328 Py_buffer view;
3329 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3330 return NULL;
3331 new = PyBytes_FromStringAndSize(NULL, view.len);
3332 if (!new)
3333 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003334 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3335 &view, view.len, 'C') < 0)
3336 goto fail;
3337 PyBuffer_Release(&view);
3338 return new;
3339 fail:
3340 Py_XDECREF(new);
3341 PyBuffer_Release(&view);
3342 return NULL;
3343 }
3344 if (PyUnicode_Check(x)) {
3345 PyErr_SetString(PyExc_TypeError,
3346 "cannot convert unicode object to bytes");
3347 return NULL;
3348 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003349
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003350 if (PyList_CheckExact(x)) {
3351 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3352 if (new == NULL)
3353 return NULL;
3354 for (i = 0; i < Py_SIZE(x); i++) {
3355 Py_ssize_t value = PyNumber_AsSsize_t(
3356 PyList_GET_ITEM(x, i), PyExc_ValueError);
3357 if (value == -1 && PyErr_Occurred()) {
3358 Py_DECREF(new);
3359 return NULL;
3360 }
3361 if (value < 0 || value >= 256) {
3362 PyErr_SetString(PyExc_ValueError,
3363 "bytes must be in range(0, 256)");
3364 Py_DECREF(new);
3365 return NULL;
3366 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003367 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003368 }
3369 return new;
3370 }
3371 if (PyTuple_CheckExact(x)) {
3372 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3373 if (new == NULL)
3374 return NULL;
3375 for (i = 0; i < Py_SIZE(x); i++) {
3376 Py_ssize_t value = PyNumber_AsSsize_t(
3377 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
3378 if (value == -1 && PyErr_Occurred()) {
3379 Py_DECREF(new);
3380 return NULL;
3381 }
3382 if (value < 0 || value >= 256) {
3383 PyErr_SetString(PyExc_ValueError,
3384 "bytes must be in range(0, 256)");
3385 Py_DECREF(new);
3386 return NULL;
3387 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003388 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003389 }
3390 return new;
3391 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00003392
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003393 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003394 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003395 if (size == -1 && PyErr_Occurred())
3396 return NULL;
3397 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
3398 returning a shared empty bytes string. This required because we
3399 want to call _PyBytes_Resize() the returned object, which we can
3400 only do on bytes objects with refcount == 1. */
Victor Stinner88d146b2014-08-17 21:12:18 +02003401 if (size == 0)
3402 size = 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003403 new = PyBytes_FromStringAndSize(NULL, size);
3404 if (new == NULL)
3405 return NULL;
Victor Stinner88d146b2014-08-17 21:12:18 +02003406 assert(Py_REFCNT(new) == 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003407
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003408 /* Get the iterator */
3409 it = PyObject_GetIter(x);
3410 if (it == NULL)
3411 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003412
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003413 /* Run the iterator to exhaustion */
3414 for (i = 0; ; i++) {
3415 PyObject *item;
3416 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003417
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003418 /* Get the next item */
3419 item = PyIter_Next(it);
3420 if (item == NULL) {
3421 if (PyErr_Occurred())
3422 goto error;
3423 break;
3424 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003425
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003426 /* Interpret it as an int (__index__) */
3427 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3428 Py_DECREF(item);
3429 if (value == -1 && PyErr_Occurred())
3430 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003431
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003432 /* Range check */
3433 if (value < 0 || value >= 256) {
3434 PyErr_SetString(PyExc_ValueError,
3435 "bytes must be in range(0, 256)");
3436 goto error;
3437 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003438
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003439 /* Append the byte */
3440 if (i >= size) {
3441 size = 2 * size + 1;
3442 if (_PyBytes_Resize(&new, size) < 0)
3443 goto error;
3444 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003445 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003446 }
3447 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003448
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003449 /* Clean up and return success */
3450 Py_DECREF(it);
3451 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003452
3453 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003454 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01003455 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003456 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003457}
3458
3459static PyObject *
3460str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3461{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003462 PyObject *tmp, *pnew;
3463 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003464
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003465 assert(PyType_IsSubtype(type, &PyBytes_Type));
3466 tmp = bytes_new(&PyBytes_Type, args, kwds);
3467 if (tmp == NULL)
3468 return NULL;
3469 assert(PyBytes_CheckExact(tmp));
3470 n = PyBytes_GET_SIZE(tmp);
3471 pnew = type->tp_alloc(type, n);
3472 if (pnew != NULL) {
3473 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3474 PyBytes_AS_STRING(tmp), n+1);
3475 ((PyBytesObject *)pnew)->ob_shash =
3476 ((PyBytesObject *)tmp)->ob_shash;
3477 }
3478 Py_DECREF(tmp);
3479 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003480}
3481
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003482PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003483"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003484bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003485bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003486bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3487bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003488\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003489Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003490 - an iterable yielding integers in range(256)\n\
3491 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003492 - any object implementing the buffer API.\n\
3493 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003494
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003495static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003496
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003497PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003498 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3499 "bytes",
3500 PyBytesObject_SIZE,
3501 sizeof(char),
3502 bytes_dealloc, /* tp_dealloc */
3503 0, /* tp_print */
3504 0, /* tp_getattr */
3505 0, /* tp_setattr */
3506 0, /* tp_reserved */
3507 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08003508 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003509 &bytes_as_sequence, /* tp_as_sequence */
3510 &bytes_as_mapping, /* tp_as_mapping */
3511 (hashfunc)bytes_hash, /* tp_hash */
3512 0, /* tp_call */
3513 bytes_str, /* tp_str */
3514 PyObject_GenericGetAttr, /* tp_getattro */
3515 0, /* tp_setattro */
3516 &bytes_as_buffer, /* tp_as_buffer */
3517 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3518 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3519 bytes_doc, /* tp_doc */
3520 0, /* tp_traverse */
3521 0, /* tp_clear */
3522 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3523 0, /* tp_weaklistoffset */
3524 bytes_iter, /* tp_iter */
3525 0, /* tp_iternext */
3526 bytes_methods, /* tp_methods */
3527 0, /* tp_members */
3528 0, /* tp_getset */
3529 &PyBaseObject_Type, /* tp_base */
3530 0, /* tp_dict */
3531 0, /* tp_descr_get */
3532 0, /* tp_descr_set */
3533 0, /* tp_dictoffset */
3534 0, /* tp_init */
3535 0, /* tp_alloc */
3536 bytes_new, /* tp_new */
3537 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003538};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003539
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003540void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003541PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003542{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003543 assert(pv != NULL);
3544 if (*pv == NULL)
3545 return;
3546 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003547 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003548 return;
3549 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003550
3551 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3552 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003553 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003554 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02003555
Antoine Pitrou161d6952014-05-01 14:36:20 +02003556 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003557 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02003558 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3559 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3560 Py_CLEAR(*pv);
3561 return;
3562 }
3563
3564 oldsize = PyBytes_GET_SIZE(*pv);
3565 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3566 PyErr_NoMemory();
3567 goto error;
3568 }
3569 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3570 goto error;
3571
3572 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3573 PyBuffer_Release(&wb);
3574 return;
3575
3576 error:
3577 PyBuffer_Release(&wb);
3578 Py_CLEAR(*pv);
3579 return;
3580 }
3581
3582 else {
3583 /* Multiple references, need to create new object */
3584 PyObject *v;
3585 v = bytes_concat(*pv, w);
3586 Py_DECREF(*pv);
3587 *pv = v;
3588 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003589}
3590
3591void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003592PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003593{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003594 PyBytes_Concat(pv, w);
3595 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003596}
3597
3598
Ethan Furmanb95b5612015-01-23 20:05:18 -08003599/* The following function breaks the notion that bytes are immutable:
3600 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003601 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08003602 as creating a new bytes object and destroying the old one, only
3603 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003604 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08003605 Note that if there's not enough memory to resize the bytes object, the
3606 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003607 memory" exception is set, and -1 is returned. Else (on success) 0 is
3608 returned, and the value in *pv may or may not be the same as on input.
3609 As always, an extra byte is allocated for a trailing \0 byte (newsize
3610 does *not* include that), and a trailing \0 byte is stored.
3611*/
3612
3613int
3614_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3615{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003616 PyObject *v;
3617 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003618 v = *pv;
3619 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3620 *pv = 0;
3621 Py_DECREF(v);
3622 PyErr_BadInternalCall();
3623 return -1;
3624 }
3625 /* XXX UNREF/NEWREF interface should be more symmetrical */
3626 _Py_DEC_REFTOTAL;
3627 _Py_ForgetReference(v);
3628 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003629 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003630 if (*pv == NULL) {
3631 PyObject_Del(v);
3632 PyErr_NoMemory();
3633 return -1;
3634 }
3635 _Py_NewReference(*pv);
3636 sv = (PyBytesObject *) *pv;
3637 Py_SIZE(sv) = newsize;
3638 sv->ob_sval[newsize] = '\0';
3639 sv->ob_shash = -1; /* invalidate cached hash value */
3640 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003641}
3642
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003643void
3644PyBytes_Fini(void)
3645{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003646 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003647 for (i = 0; i < UCHAR_MAX + 1; i++)
3648 Py_CLEAR(characters[i]);
3649 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003650}
3651
Benjamin Peterson4116f362008-05-27 00:36:20 +00003652/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003653
3654typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003655 PyObject_HEAD
3656 Py_ssize_t it_index;
3657 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003658} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003659
3660static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003661striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003662{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003663 _PyObject_GC_UNTRACK(it);
3664 Py_XDECREF(it->it_seq);
3665 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003666}
3667
3668static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003669striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003670{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003671 Py_VISIT(it->it_seq);
3672 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003673}
3674
3675static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003676striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003677{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003678 PyBytesObject *seq;
3679 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003680
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003681 assert(it != NULL);
3682 seq = it->it_seq;
3683 if (seq == NULL)
3684 return NULL;
3685 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003686
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003687 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3688 item = PyLong_FromLong(
3689 (unsigned char)seq->ob_sval[it->it_index]);
3690 if (item != NULL)
3691 ++it->it_index;
3692 return item;
3693 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003694
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003695 Py_DECREF(seq);
3696 it->it_seq = NULL;
3697 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003698}
3699
3700static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003701striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003702{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003703 Py_ssize_t len = 0;
3704 if (it->it_seq)
3705 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3706 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003707}
3708
3709PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003710 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003711
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003712static PyObject *
3713striter_reduce(striterobject *it)
3714{
3715 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003716 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003717 it->it_seq, it->it_index);
3718 } else {
3719 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3720 if (u == NULL)
3721 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02003722 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003723 }
3724}
3725
3726PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3727
3728static PyObject *
3729striter_setstate(striterobject *it, PyObject *state)
3730{
3731 Py_ssize_t index = PyLong_AsSsize_t(state);
3732 if (index == -1 && PyErr_Occurred())
3733 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003734 if (it->it_seq != NULL) {
3735 if (index < 0)
3736 index = 0;
3737 else if (index > PyBytes_GET_SIZE(it->it_seq))
3738 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3739 it->it_index = index;
3740 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003741 Py_RETURN_NONE;
3742}
3743
3744PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3745
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003746static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003747 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3748 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003749 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3750 reduce_doc},
3751 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3752 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003753 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003754};
3755
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003756PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003757 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3758 "bytes_iterator", /* tp_name */
3759 sizeof(striterobject), /* tp_basicsize */
3760 0, /* tp_itemsize */
3761 /* methods */
3762 (destructor)striter_dealloc, /* tp_dealloc */
3763 0, /* tp_print */
3764 0, /* tp_getattr */
3765 0, /* tp_setattr */
3766 0, /* tp_reserved */
3767 0, /* tp_repr */
3768 0, /* tp_as_number */
3769 0, /* tp_as_sequence */
3770 0, /* tp_as_mapping */
3771 0, /* tp_hash */
3772 0, /* tp_call */
3773 0, /* tp_str */
3774 PyObject_GenericGetAttr, /* tp_getattro */
3775 0, /* tp_setattro */
3776 0, /* tp_as_buffer */
3777 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3778 0, /* tp_doc */
3779 (traverseproc)striter_traverse, /* tp_traverse */
3780 0, /* tp_clear */
3781 0, /* tp_richcompare */
3782 0, /* tp_weaklistoffset */
3783 PyObject_SelfIter, /* tp_iter */
3784 (iternextfunc)striter_next, /* tp_iternext */
3785 striter_methods, /* tp_methods */
3786 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003787};
3788
3789static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003790bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003791{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003792 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003793
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003794 if (!PyBytes_Check(seq)) {
3795 PyErr_BadInternalCall();
3796 return NULL;
3797 }
3798 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3799 if (it == NULL)
3800 return NULL;
3801 it->it_index = 0;
3802 Py_INCREF(seq);
3803 it->it_seq = (PyBytesObject *)seq;
3804 _PyObject_GC_TRACK(it);
3805 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003806}
Victor Stinner00165072015-10-09 01:53:21 +02003807
3808
3809/* _PyBytesWriter API */
3810
3811#ifdef MS_WINDOWS
3812 /* On Windows, overallocate by 50% is the best factor */
3813# define OVERALLOCATE_FACTOR 2
3814#else
3815 /* On Linux, overallocate by 25% is the best factor */
3816# define OVERALLOCATE_FACTOR 4
3817#endif
3818
3819void
3820_PyBytesWriter_Init(_PyBytesWriter *writer)
3821{
3822 writer->buffer = NULL;
3823 writer->allocated = 0;
Victor Stinner53926a12015-10-09 12:37:03 +02003824 writer->min_size = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003825 writer->overallocate = 0;
Victor Stinnerb3653a32015-10-09 03:38:24 +02003826 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003827#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003828 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003829#endif
3830}
3831
3832void
3833_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3834{
3835 Py_CLEAR(writer->buffer);
3836}
3837
3838Py_LOCAL_INLINE(char*)
3839_PyBytesWriter_AsString(_PyBytesWriter *writer)
3840{
Victor Stinnerb3653a32015-10-09 03:38:24 +02003841 if (!writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003842 assert(writer->buffer != NULL);
3843 return PyBytes_AS_STRING(writer->buffer);
3844 }
3845 else {
3846 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003847 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003848 }
3849}
3850
3851Py_LOCAL_INLINE(Py_ssize_t)
3852_PyBytesWriter_GetPos(_PyBytesWriter *writer, char *str)
3853{
3854 char *start = _PyBytesWriter_AsString(writer);
3855 assert(str != NULL);
3856 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003857 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003858 return str - start;
3859}
3860
3861Py_LOCAL_INLINE(void)
3862_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3863{
3864#ifdef Py_DEBUG
3865 char *start, *end;
3866
Victor Stinnerb3653a32015-10-09 03:38:24 +02003867 if (!writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003868 assert(writer->buffer != NULL);
3869 assert(PyBytes_CheckExact(writer->buffer));
3870 assert(Py_REFCNT(writer->buffer) == 1);
3871 }
3872 else {
3873 assert(writer->buffer == NULL);
3874 }
3875
3876 start = _PyBytesWriter_AsString(writer);
Victor Stinner53926a12015-10-09 12:37:03 +02003877 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003878 /* the last byte must always be null */
3879 assert(start[writer->allocated] == 0);
3880
3881 end = start + writer->allocated;
3882 assert(str != NULL);
3883 assert(start <= str && str <= end);
3884#endif
3885}
3886
3887char*
3888_PyBytesWriter_Prepare(_PyBytesWriter *writer, char *str, Py_ssize_t size)
3889{
3890 Py_ssize_t allocated, pos;
3891
3892 _PyBytesWriter_CheckConsistency(writer, str);
3893 assert(size >= 0);
3894
3895 if (size == 0) {
3896 /* nothing to do */
3897 return str;
3898 }
3899
Victor Stinner53926a12015-10-09 12:37:03 +02003900 if (writer->min_size > PY_SSIZE_T_MAX - size) {
Victor Stinner00165072015-10-09 01:53:21 +02003901 PyErr_NoMemory();
3902 _PyBytesWriter_Dealloc(writer);
3903 return NULL;
3904 }
Victor Stinner53926a12015-10-09 12:37:03 +02003905 writer->min_size += size;
Victor Stinner00165072015-10-09 01:53:21 +02003906
3907 allocated = writer->allocated;
Victor Stinner53926a12015-10-09 12:37:03 +02003908 if (writer->min_size <= allocated)
Victor Stinner00165072015-10-09 01:53:21 +02003909 return str;
3910
Victor Stinner53926a12015-10-09 12:37:03 +02003911 allocated = writer->min_size;
Victor Stinner00165072015-10-09 01:53:21 +02003912 if (writer->overallocate
3913 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3914 /* overallocate to limit the number of realloc() */
3915 allocated += allocated / OVERALLOCATE_FACTOR;
3916 }
3917
3918 pos = _PyBytesWriter_GetPos(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003919 if (!writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003920 /* Note: Don't use a bytearray object because the conversion from
3921 byterray to bytes requires to copy all bytes. */
3922 if (_PyBytes_Resize(&writer->buffer, allocated)) {
3923 assert(writer->buffer == NULL);
3924 return NULL;
3925 }
3926 }
3927 else {
3928 /* convert from stack buffer to bytes object buffer */
3929 assert(writer->buffer == NULL);
3930
3931 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3932 if (writer->buffer == NULL)
3933 return NULL;
3934
3935 if (pos != 0) {
3936 Py_MEMCPY(PyBytes_AS_STRING(writer->buffer),
Victor Stinnerb3653a32015-10-09 03:38:24 +02003937 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003938 pos);
3939 }
3940
Victor Stinnerb3653a32015-10-09 03:38:24 +02003941 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003942#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003943 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003944#endif
Victor Stinner00165072015-10-09 01:53:21 +02003945 }
3946 writer->allocated = allocated;
3947
3948 str = _PyBytesWriter_AsString(writer) + pos;
3949 _PyBytesWriter_CheckConsistency(writer, str);
3950 return str;
3951}
3952
3953/* Allocate the buffer to write size bytes.
3954 Return the pointer to the beginning of buffer data.
3955 Raise an exception and return NULL on error. */
3956char*
3957_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3958{
3959 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003960 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003961 assert(size >= 0);
3962
Victor Stinnerb3653a32015-10-09 03:38:24 +02003963 writer->use_small_buffer = 1;
Victor Stinnerb13b97d2015-10-09 02:52:16 +02003964#ifdef Py_DEBUG
Victor Stinner00165072015-10-09 01:53:21 +02003965 /* the last byte is reserved, it must be '\0' */
Victor Stinnerb3653a32015-10-09 03:38:24 +02003966 writer->allocated = sizeof(writer->small_buffer) - 1;
3967 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003968#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02003969 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02003970#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02003971 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02003972}
3973
3974PyObject *
3975_PyBytesWriter_Finish(_PyBytesWriter *writer, char *str)
3976{
3977 Py_ssize_t pos;
3978 PyObject *result;
3979
3980 _PyBytesWriter_CheckConsistency(writer, str);
3981
3982 pos = _PyBytesWriter_GetPos(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003983 if (!writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003984 if (pos != writer->allocated) {
3985 if (_PyBytes_Resize(&writer->buffer, pos)) {
3986 assert(writer->buffer == NULL);
3987 return NULL;
3988 }
3989 }
3990
3991 result = writer->buffer;
3992 writer->buffer = NULL;
3993 }
3994 else {
Victor Stinnerb3653a32015-10-09 03:38:24 +02003995 result = PyBytes_FromStringAndSize(writer->small_buffer, pos);
Victor Stinner00165072015-10-09 01:53:21 +02003996 }
3997
3998 return result;
3999}