blob: 532051e2424be631e7239005f8d3944147de3919 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +00008#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00009#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000010
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020011/*[clinic input]
Martin v. Löwis0efea322014-07-27 17:29:17 +020012class bytes "PyBytesObject*" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020013[clinic start generated code]*/
Martin v. Löwis0efea322014-07-27 17:29:17 +020014/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030016#include "clinic/bytesobject.c.h"
17
Christian Heimes2c9c7a52008-05-26 13:42:13 +000018#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000019Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000020#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000021
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022static PyBytesObject *characters[UCHAR_MAX + 1];
23static PyBytesObject *nullstring;
24
Mark Dickinsonfd24b322008-12-06 15:33:31 +000025/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
26 for a string of length n should request PyBytesObject_SIZE + n bytes.
27
28 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29 3 bytes per string allocation on a typical system.
30*/
31#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32
Christian Heimes2c9c7a52008-05-26 13:42:13 +000033/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000034 For PyBytes_FromString(), the parameter `str' points to a null-terminated
35 string containing exactly `size' bytes.
36
37 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
38 either NULL or else points to a string containing at least `size' bytes.
39 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
40 not have to be null-terminated. (Therefore it is safe to construct a
41 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
42 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
43 bytes (setting the last byte to the null terminating character) and you can
44 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000045 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000046 alter the data yourself, since the strings may be shared.
47
48 The PyObject member `op->ob_size', which denotes the number of "extra
49 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020050 allocated for string data, not counting the null terminating character.
51 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000052 PyBytes_FromStringAndSize()) or the length of the string in the `str'
53 parameter (for PyBytes_FromString()).
54*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020055static PyObject *
56_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000057{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020058 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020059 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020060
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000061 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000062#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000063 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000064#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 Py_INCREF(op);
66 return (PyObject *)op;
67 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000068
Victor Stinner049e5092014-08-17 22:20:00 +020069 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000070 PyErr_SetString(PyExc_OverflowError,
71 "byte string is too large");
72 return NULL;
73 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000074
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000075 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020076 if (use_calloc)
77 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
78 else
79 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 if (op == NULL)
81 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010082 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020084 if (!use_calloc)
85 op->ob_sval[size] = '\0';
86 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 if (size == 0) {
88 nullstring = op;
89 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020090 }
91 return (PyObject *) op;
92}
93
94PyObject *
95PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
96{
97 PyBytesObject *op;
98 if (size < 0) {
99 PyErr_SetString(PyExc_SystemError,
100 "Negative size passed to PyBytes_FromStringAndSize");
101 return NULL;
102 }
103 if (size == 1 && str != NULL &&
104 (op = characters[*str & UCHAR_MAX]) != NULL)
105 {
106#ifdef COUNT_ALLOCS
107 one_strings++;
108#endif
109 Py_INCREF(op);
110 return (PyObject *)op;
111 }
112
113 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
114 if (op == NULL)
115 return NULL;
116 if (str == NULL)
117 return (PyObject *) op;
118
119 Py_MEMCPY(op->ob_sval, str, size);
120 /* share short strings */
121 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 characters[*str & UCHAR_MAX] = op;
123 Py_INCREF(op);
124 }
125 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000126}
127
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000128PyObject *
129PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000130{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200131 size_t size;
132 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 assert(str != NULL);
135 size = strlen(str);
136 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
137 PyErr_SetString(PyExc_OverflowError,
138 "byte string is too long");
139 return NULL;
140 }
141 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000144#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
148 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000151#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 Py_INCREF(op);
153 return (PyObject *)op;
154 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 /* Inline PyObject_NewVar */
157 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
158 if (op == NULL)
159 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100160 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 op->ob_shash = -1;
162 Py_MEMCPY(op->ob_sval, str, size+1);
163 /* share short strings */
164 if (size == 0) {
165 nullstring = op;
166 Py_INCREF(op);
167 } else if (size == 1) {
168 characters[*str & UCHAR_MAX] = op;
169 Py_INCREF(op);
170 }
171 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000172}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000173
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000174PyObject *
175PyBytes_FromFormatV(const char *format, va_list vargs)
176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 va_list count;
178 Py_ssize_t n = 0;
179 const char* f;
180 char *s;
181 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000182
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000183 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 /* step 1: figure out how large a buffer we need */
185 for (f = format; *f; f++) {
186 if (*f == '%') {
187 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000188 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
192 * they don't affect the amount of space we reserve.
193 */
194 if ((*f == 'l' || *f == 'z') &&
195 (f[1] == 'd' || f[1] == 'u'))
196 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000197
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000198 switch (*f) {
199 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100200 {
201 int c = va_arg(count, int);
202 if (c < 0 || c > 255) {
203 PyErr_SetString(PyExc_OverflowError,
204 "PyBytes_FromFormatV(): %c format "
205 "expects an integer in range [0; 255]");
206 return NULL;
207 }
208 n++;
209 break;
210 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 case '%':
212 n++;
213 break;
214 case 'd': case 'u': case 'i': case 'x':
215 (void) va_arg(count, int);
216 /* 20 bytes is enough to hold a 64-bit
217 integer. Decimal takes the most space.
218 This isn't enough for octal. */
219 n += 20;
220 break;
221 case 's':
222 s = va_arg(count, char*);
223 n += strlen(s);
224 break;
225 case 'p':
226 (void) va_arg(count, int);
227 /* maximum 64-bit pointer representation:
228 * 0xffffffffffffffff
229 * so 19 characters is enough.
230 * XXX I count 18 -- what's the extra for?
231 */
232 n += 19;
233 break;
234 default:
235 /* if we stumble upon an unknown
236 formatting code, copy the rest of
237 the format string to the output
238 string. (we cannot just skip the
239 code, since there's no way to know
240 what's in the argument list) */
241 n += strlen(p);
242 goto expand;
243 }
244 } else
245 n++;
246 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000247 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 /* step 2: fill the buffer */
249 /* Since we've analyzed how much space we need for the worst case,
250 use sprintf directly instead of the slower PyOS_snprintf. */
251 string = PyBytes_FromStringAndSize(NULL, n);
252 if (!string)
253 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000254
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000256
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000257 for (f = format; *f; f++) {
258 if (*f == '%') {
259 const char* p = f++;
260 Py_ssize_t i;
261 int longflag = 0;
262 int size_tflag = 0;
263 /* parse the width.precision part (we're only
264 interested in the precision value, if any) */
265 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000266 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000267 n = (n*10) + *f++ - '0';
268 if (*f == '.') {
269 f++;
270 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000271 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 n = (n*10) + *f++ - '0';
273 }
David Malcolm96960882010-11-05 17:23:41 +0000274 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 f++;
276 /* handle the long flag, but only for %ld and %lu.
277 others can be added when necessary. */
278 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
279 longflag = 1;
280 ++f;
281 }
282 /* handle the size_t flag. */
283 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
284 size_tflag = 1;
285 ++f;
286 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000287
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000288 switch (*f) {
289 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100290 {
291 int c = va_arg(vargs, int);
292 /* c has been checked for overflow in the first step */
293 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000294 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100295 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 case 'd':
297 if (longflag)
298 sprintf(s, "%ld", va_arg(vargs, long));
299 else if (size_tflag)
300 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
301 va_arg(vargs, Py_ssize_t));
302 else
303 sprintf(s, "%d", va_arg(vargs, int));
304 s += strlen(s);
305 break;
306 case 'u':
307 if (longflag)
308 sprintf(s, "%lu",
309 va_arg(vargs, unsigned long));
310 else if (size_tflag)
311 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
312 va_arg(vargs, size_t));
313 else
314 sprintf(s, "%u",
315 va_arg(vargs, unsigned int));
316 s += strlen(s);
317 break;
318 case 'i':
319 sprintf(s, "%i", va_arg(vargs, int));
320 s += strlen(s);
321 break;
322 case 'x':
323 sprintf(s, "%x", va_arg(vargs, int));
324 s += strlen(s);
325 break;
326 case 's':
327 p = va_arg(vargs, char*);
328 i = strlen(p);
329 if (n > 0 && i > n)
330 i = n;
331 Py_MEMCPY(s, p, i);
332 s += i;
333 break;
334 case 'p':
335 sprintf(s, "%p", va_arg(vargs, void*));
336 /* %p is ill-defined: ensure leading 0x. */
337 if (s[1] == 'X')
338 s[1] = 'x';
339 else if (s[1] != 'x') {
340 memmove(s+2, s, strlen(s)+1);
341 s[0] = '0';
342 s[1] = 'x';
343 }
344 s += strlen(s);
345 break;
346 case '%':
347 *s++ = '%';
348 break;
349 default:
350 strcpy(s, p);
351 s += strlen(s);
352 goto end;
353 }
354 } else
355 *s++ = *f;
356 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000357
358 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
360 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361}
362
363PyObject *
364PyBytes_FromFormat(const char *format, ...)
365{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 PyObject* ret;
367 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368
369#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000373#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000374 ret = PyBytes_FromFormatV(format, vargs);
375 va_end(vargs);
376 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000377}
378
Ethan Furmanb95b5612015-01-23 20:05:18 -0800379/* Helpers for formatstring */
380
381Py_LOCAL_INLINE(PyObject *)
382getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
383{
384 Py_ssize_t argidx = *p_argidx;
385 if (argidx < arglen) {
386 (*p_argidx)++;
387 if (arglen < 0)
388 return args;
389 else
390 return PyTuple_GetItem(args, argidx);
391 }
392 PyErr_SetString(PyExc_TypeError,
393 "not enough arguments for format string");
394 return NULL;
395}
396
397/* Format codes
398 * F_LJUST '-'
399 * F_SIGN '+'
400 * F_BLANK ' '
401 * F_ALT '#'
402 * F_ZERO '0'
403 */
404#define F_LJUST (1<<0)
405#define F_SIGN (1<<1)
406#define F_BLANK (1<<2)
407#define F_ALT (1<<3)
408#define F_ZERO (1<<4)
409
410/* Returns a new reference to a PyBytes object, or NULL on failure. */
411
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200412static char*
413formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200414 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800415{
416 char *p;
417 PyObject *result;
418 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200419 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800420
421 x = PyFloat_AsDouble(v);
422 if (x == -1.0 && PyErr_Occurred()) {
423 PyErr_Format(PyExc_TypeError, "float argument required, "
424 "not %.200s", Py_TYPE(v)->tp_name);
425 return NULL;
426 }
427
428 if (prec < 0)
429 prec = 6;
430
431 p = PyOS_double_to_string(x, type, prec,
432 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
433
434 if (p == NULL)
435 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200436
437 len = strlen(p);
438 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200439 str = _PyBytesWriter_Prepare(writer, str, len);
440 if (str == NULL)
441 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200442 Py_MEMCPY(str, p, len);
443 str += len;
444 return str;
445 }
446
447 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800448 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200449 *p_result = result;
450 return str;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800451}
452
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300453static PyObject *
454formatlong(PyObject *v, int flags, int prec, int type)
455{
456 PyObject *result, *iobj;
457 if (type == 'i')
458 type = 'd';
459 if (PyLong_Check(v))
460 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
461 if (PyNumber_Check(v)) {
462 /* make sure number is a type of integer for o, x, and X */
463 if (type == 'o' || type == 'x' || type == 'X')
464 iobj = PyNumber_Index(v);
465 else
466 iobj = PyNumber_Long(v);
467 if (iobj == NULL) {
468 if (!PyErr_ExceptionMatches(PyExc_TypeError))
469 return NULL;
470 }
471 else if (!PyLong_Check(iobj))
472 Py_CLEAR(iobj);
473 if (iobj != NULL) {
474 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
475 Py_DECREF(iobj);
476 return result;
477 }
478 }
479 PyErr_Format(PyExc_TypeError,
480 "%%%c format: %s is required, not %.200s", type,
481 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
482 : "a number",
483 Py_TYPE(v)->tp_name);
484 return NULL;
485}
486
487static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200488byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800489{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200490 if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) {
491 *p = PyBytes_AS_STRING(arg)[0];
492 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800493 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200494 else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) {
495 *p = PyByteArray_AS_STRING(arg)[0];
496 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800497 }
498 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300499 PyObject *iobj;
500 long ival;
501 int overflow;
502 /* make sure number is a type of integer */
503 if (PyLong_Check(arg)) {
504 ival = PyLong_AsLongAndOverflow(arg, &overflow);
505 }
506 else {
507 iobj = PyNumber_Index(arg);
508 if (iobj == NULL) {
509 if (!PyErr_ExceptionMatches(PyExc_TypeError))
510 return 0;
511 goto onError;
512 }
513 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
514 Py_DECREF(iobj);
515 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300516 if (!overflow && ival == -1 && PyErr_Occurred())
517 goto onError;
518 if (overflow || !(0 <= ival && ival <= 255)) {
519 PyErr_SetString(PyExc_OverflowError,
520 "%c arg not in range(256)");
521 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800522 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300523 *p = (char)ival;
524 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800525 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300526 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200527 PyErr_SetString(PyExc_TypeError,
528 "%c requires an integer in range(256) or a single byte");
529 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800530}
531
532static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200533format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800534{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200535 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800536 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800537 /* is it a bytes object? */
538 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200539 *pbuf = PyBytes_AS_STRING(v);
540 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800541 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200542 return v;
543 }
544 if (PyByteArray_Check(v)) {
545 *pbuf = PyByteArray_AS_STRING(v);
546 *plen = PyByteArray_GET_SIZE(v);
547 Py_INCREF(v);
548 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800549 }
550 /* does it support __bytes__? */
551 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
552 if (func != NULL) {
553 result = PyObject_CallFunctionObjArgs(func, NULL);
554 Py_DECREF(func);
555 if (result == NULL)
556 return NULL;
557 if (!PyBytes_Check(result)) {
558 PyErr_Format(PyExc_TypeError,
559 "__bytes__ returned non-bytes (type %.200s)",
560 Py_TYPE(result)->tp_name);
561 Py_DECREF(result);
562 return NULL;
563 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200564 *pbuf = PyBytes_AS_STRING(result);
565 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800566 return result;
567 }
568 PyErr_Format(PyExc_TypeError,
569 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
570 Py_TYPE(v)->tp_name);
571 return NULL;
572}
573
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200574/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800575
576PyObject *
577_PyBytes_Format(PyObject *format, PyObject *args)
578{
579 char *fmt, *res;
580 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200581 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800582 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800583 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200584 _PyBytesWriter writer;
585
Ethan Furmanb95b5612015-01-23 20:05:18 -0800586 if (format == NULL || !PyBytes_Check(format) || args == NULL) {
587 PyErr_BadInternalCall();
588 return NULL;
589 }
590 fmt = PyBytes_AS_STRING(format);
591 fmtcnt = PyBytes_GET_SIZE(format);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200592
593 _PyBytesWriter_Init(&writer);
594
595 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
596 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800597 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200598 writer.overallocate = 1;
599
Ethan Furmanb95b5612015-01-23 20:05:18 -0800600 if (PyTuple_Check(args)) {
601 arglen = PyTuple_GET_SIZE(args);
602 argidx = 0;
603 }
604 else {
605 arglen = -1;
606 argidx = -2;
607 }
608 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
609 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
610 !PyByteArray_Check(args)) {
611 dict = args;
612 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200613
Ethan Furmanb95b5612015-01-23 20:05:18 -0800614 while (--fmtcnt >= 0) {
615 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200616 Py_ssize_t len;
617 char *pos;
618
619 pos = strchr(fmt + 1, '%');
620 if (pos != NULL)
621 len = pos - fmt;
622 else {
623 len = PyBytes_GET_SIZE(format);
624 len -= (fmt - PyBytes_AS_STRING(format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800625 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200626 assert(len != 0);
627
628 Py_MEMCPY(res, fmt, len);
629 res += len;
630 fmt += len;
631 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800632 }
633 else {
634 /* Got a format specifier */
635 int flags = 0;
636 Py_ssize_t width = -1;
637 int prec = -1;
638 int c = '\0';
639 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800640 PyObject *v = NULL;
641 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200642 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800643 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200644 Py_ssize_t len = 0;
645 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200646 Py_ssize_t alloc;
647#ifdef Py_DEBUG
648 char *before;
649#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -0800650
Ethan Furmanb95b5612015-01-23 20:05:18 -0800651 fmt++;
652 if (*fmt == '(') {
653 char *keystart;
654 Py_ssize_t keylen;
655 PyObject *key;
656 int pcount = 1;
657
658 if (dict == NULL) {
659 PyErr_SetString(PyExc_TypeError,
660 "format requires a mapping");
661 goto error;
662 }
663 ++fmt;
664 --fmtcnt;
665 keystart = fmt;
666 /* Skip over balanced parentheses */
667 while (pcount > 0 && --fmtcnt >= 0) {
668 if (*fmt == ')')
669 --pcount;
670 else if (*fmt == '(')
671 ++pcount;
672 fmt++;
673 }
674 keylen = fmt - keystart - 1;
675 if (fmtcnt < 0 || pcount > 0) {
676 PyErr_SetString(PyExc_ValueError,
677 "incomplete format key");
678 goto error;
679 }
680 key = PyBytes_FromStringAndSize(keystart,
681 keylen);
682 if (key == NULL)
683 goto error;
684 if (args_owned) {
685 Py_DECREF(args);
686 args_owned = 0;
687 }
688 args = PyObject_GetItem(dict, key);
689 Py_DECREF(key);
690 if (args == NULL) {
691 goto error;
692 }
693 args_owned = 1;
694 arglen = -1;
695 argidx = -2;
696 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200697
698 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800699 while (--fmtcnt >= 0) {
700 switch (c = *fmt++) {
701 case '-': flags |= F_LJUST; continue;
702 case '+': flags |= F_SIGN; continue;
703 case ' ': flags |= F_BLANK; continue;
704 case '#': flags |= F_ALT; continue;
705 case '0': flags |= F_ZERO; continue;
706 }
707 break;
708 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200709
710 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800711 if (c == '*') {
712 v = getnextarg(args, arglen, &argidx);
713 if (v == NULL)
714 goto error;
715 if (!PyLong_Check(v)) {
716 PyErr_SetString(PyExc_TypeError,
717 "* wants int");
718 goto error;
719 }
720 width = PyLong_AsSsize_t(v);
721 if (width == -1 && PyErr_Occurred())
722 goto error;
723 if (width < 0) {
724 flags |= F_LJUST;
725 width = -width;
726 }
727 if (--fmtcnt >= 0)
728 c = *fmt++;
729 }
730 else if (c >= 0 && isdigit(c)) {
731 width = c - '0';
732 while (--fmtcnt >= 0) {
733 c = Py_CHARMASK(*fmt++);
734 if (!isdigit(c))
735 break;
736 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
737 PyErr_SetString(
738 PyExc_ValueError,
739 "width too big");
740 goto error;
741 }
742 width = width*10 + (c - '0');
743 }
744 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200745
746 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800747 if (c == '.') {
748 prec = 0;
749 if (--fmtcnt >= 0)
750 c = *fmt++;
751 if (c == '*') {
752 v = getnextarg(args, arglen, &argidx);
753 if (v == NULL)
754 goto error;
755 if (!PyLong_Check(v)) {
756 PyErr_SetString(
757 PyExc_TypeError,
758 "* wants int");
759 goto error;
760 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200761 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800762 if (prec == -1 && PyErr_Occurred())
763 goto error;
764 if (prec < 0)
765 prec = 0;
766 if (--fmtcnt >= 0)
767 c = *fmt++;
768 }
769 else if (c >= 0 && isdigit(c)) {
770 prec = c - '0';
771 while (--fmtcnt >= 0) {
772 c = Py_CHARMASK(*fmt++);
773 if (!isdigit(c))
774 break;
775 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
776 PyErr_SetString(
777 PyExc_ValueError,
778 "prec too big");
779 goto error;
780 }
781 prec = prec*10 + (c - '0');
782 }
783 }
784 } /* prec */
785 if (fmtcnt >= 0) {
786 if (c == 'h' || c == 'l' || c == 'L') {
787 if (--fmtcnt >= 0)
788 c = *fmt++;
789 }
790 }
791 if (fmtcnt < 0) {
792 PyErr_SetString(PyExc_ValueError,
793 "incomplete format");
794 goto error;
795 }
796 if (c != '%') {
797 v = getnextarg(args, arglen, &argidx);
798 if (v == NULL)
799 goto error;
800 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200801
802 if (fmtcnt < 0) {
803 /* last writer: disable writer overallocation */
804 writer.overallocate = 0;
805 }
806
Ethan Furmanb95b5612015-01-23 20:05:18 -0800807 sign = 0;
808 fill = ' ';
809 switch (c) {
810 case '%':
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200811 *res++ = '%';
812 continue;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200813
Ethan Furman62e977f2015-03-11 08:17:00 -0700814 case 'r':
815 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800816 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200817 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800818 if (temp == NULL)
819 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200820 assert(PyUnicode_IS_ASCII(temp));
821 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
822 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800823 if (prec >= 0 && len > prec)
824 len = prec;
825 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200826
Ethan Furmanb95b5612015-01-23 20:05:18 -0800827 case 's':
828 // %s is only for 2/3 code; 3 only code should use %b
829 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200830 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800831 if (temp == NULL)
832 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800833 if (prec >= 0 && len > prec)
834 len = prec;
835 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200836
Ethan Furmanb95b5612015-01-23 20:05:18 -0800837 case 'i':
838 case 'd':
839 case 'u':
840 case 'o':
841 case 'x':
842 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200843 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200844 && width == -1 && prec == -1
845 && !(flags & (F_SIGN | F_BLANK))
846 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200847 {
848 /* Fast path */
849 int alternate = flags & F_ALT;
850 int base;
851
852 switch(c)
853 {
854 default:
855 assert(0 && "'type' not in [diuoxX]");
856 case 'd':
857 case 'i':
858 case 'u':
859 base = 10;
860 break;
861 case 'o':
862 base = 8;
863 break;
864 case 'x':
865 case 'X':
866 base = 16;
867 break;
868 }
869
870 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200871 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200872 res = _PyLong_FormatBytesWriter(&writer, res,
873 v, base, alternate);
874 if (res == NULL)
875 goto error;
876 continue;
877 }
878
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300879 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200880 if (!temp)
881 goto error;
882 assert(PyUnicode_IS_ASCII(temp));
883 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
884 len = PyUnicode_GET_LENGTH(temp);
885 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800886 if (flags & F_ZERO)
887 fill = '0';
888 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200889
Ethan Furmanb95b5612015-01-23 20:05:18 -0800890 case 'e':
891 case 'E':
892 case 'f':
893 case 'F':
894 case 'g':
895 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200896 if (width == -1 && prec == -1
897 && !(flags & (F_SIGN | F_BLANK)))
898 {
899 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200900 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200901 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200902 if (res == NULL)
903 goto error;
904 continue;
905 }
906
Victor Stinnerad771582015-10-09 12:38:53 +0200907 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800908 goto error;
909 pbuf = PyBytes_AS_STRING(temp);
910 len = PyBytes_GET_SIZE(temp);
911 sign = 1;
912 if (flags & F_ZERO)
913 fill = '0';
914 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200915
Ethan Furmanb95b5612015-01-23 20:05:18 -0800916 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200917 pbuf = &onechar;
918 len = byte_converter(v, &onechar);
919 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800920 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200921 if (width == -1) {
922 /* Fast path */
923 *res++ = onechar;
924 continue;
925 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800926 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200927
Ethan Furmanb95b5612015-01-23 20:05:18 -0800928 default:
929 PyErr_Format(PyExc_ValueError,
930 "unsupported format character '%c' (0x%x) "
931 "at index %zd",
932 c, c,
933 (Py_ssize_t)(fmt - 1 -
934 PyBytes_AsString(format)));
935 goto error;
936 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200937
Ethan Furmanb95b5612015-01-23 20:05:18 -0800938 if (sign) {
939 if (*pbuf == '-' || *pbuf == '+') {
940 sign = *pbuf++;
941 len--;
942 }
943 else if (flags & F_SIGN)
944 sign = '+';
945 else if (flags & F_BLANK)
946 sign = ' ';
947 else
948 sign = 0;
949 }
950 if (width < len)
951 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200952
953 alloc = width;
954 if (sign != 0 && len == width)
955 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200956 /* 2: size preallocated for %s */
957 if (alloc > 2) {
958 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200959 if (res == NULL)
960 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800961 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200962#ifdef Py_DEBUG
963 before = res;
964#endif
965
966 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800967 if (sign) {
968 if (fill != ' ')
969 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800970 if (width > len)
971 width--;
972 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200973
974 /* Write the numeric prefix for "x", "X" and "o" formats
975 if the alternate form is used.
976 For example, write "0x" for the "%#x" format. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800977 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
978 assert(pbuf[0] == '0');
979 assert(pbuf[1] == c);
980 if (fill != ' ') {
981 *res++ = *pbuf++;
982 *res++ = *pbuf++;
983 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800984 width -= 2;
985 if (width < 0)
986 width = 0;
987 len -= 2;
988 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200989
990 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800991 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200992 memset(res, fill, width - len);
993 res += (width - len);
994 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800995 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200996
997 /* If padding with spaces: write sign if needed and/or numeric
998 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800999 if (fill == ' ') {
1000 if (sign)
1001 *res++ = sign;
1002 if ((flags & F_ALT) &&
1003 (c == 'x' || c == 'X')) {
1004 assert(pbuf[0] == '0');
1005 assert(pbuf[1] == c);
1006 *res++ = *pbuf++;
1007 *res++ = *pbuf++;
1008 }
1009 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001010
1011 /* Copy bytes */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001012 Py_MEMCPY(res, pbuf, len);
1013 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001014
1015 /* Pad right with the fill character if needed */
1016 if (width > len) {
1017 memset(res, ' ', width - len);
1018 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001019 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001020
Ethan Furmanb95b5612015-01-23 20:05:18 -08001021 if (dict && (argidx < arglen) && c != '%') {
1022 PyErr_SetString(PyExc_TypeError,
1023 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001024 Py_XDECREF(temp);
1025 goto error;
1026 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001027 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001028
1029#ifdef Py_DEBUG
1030 /* check that we computed the exact size for this write */
1031 assert((res - before) == alloc);
1032#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001033 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001034
1035 /* If overallocation was disabled, ensure that it was the last
1036 write. Otherwise, we missed an optimization */
1037 assert(writer.overallocate || fmtcnt < 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001038 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001039
Ethan Furmanb95b5612015-01-23 20:05:18 -08001040 if (argidx < arglen && !dict) {
1041 PyErr_SetString(PyExc_TypeError,
1042 "not all arguments converted during bytes formatting");
1043 goto error;
1044 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001045
Ethan Furmanb95b5612015-01-23 20:05:18 -08001046 if (args_owned) {
1047 Py_DECREF(args);
1048 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001049 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001050
1051 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001052 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001053 if (args_owned) {
1054 Py_DECREF(args);
1055 }
1056 return NULL;
1057}
1058
1059/* =-= */
1060
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001061static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001062bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001063{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001064 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001065}
1066
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001067/* Unescape a backslash-escaped string. If unicode is non-zero,
1068 the string is a u-literal. If recode_encoding is non-zero,
1069 the string is UTF-8 encoded and should be re-encoded in the
1070 specified encoding. */
1071
1072PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001073 Py_ssize_t len,
1074 const char *errors,
1075 Py_ssize_t unicode,
1076 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001077{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001078 int c;
1079 char *p, *buf;
1080 const char *end;
1081 PyObject *v;
1082 Py_ssize_t newlen = recode_encoding ? 4*len:len;
1083 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
1084 if (v == NULL)
1085 return NULL;
1086 p = buf = PyBytes_AsString(v);
1087 end = s + len;
1088 while (s < end) {
1089 if (*s != '\\') {
1090 non_esc:
1091 if (recode_encoding && (*s & 0x80)) {
1092 PyObject *u, *w;
1093 char *r;
1094 const char* t;
1095 Py_ssize_t rn;
1096 t = s;
1097 /* Decode non-ASCII bytes as UTF-8. */
1098 while (t < end && (*t & 0x80)) t++;
1099 u = PyUnicode_DecodeUTF8(s, t - s, errors);
1100 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001101
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001102 /* Recode them in target encoding. */
1103 w = PyUnicode_AsEncodedString(
1104 u, recode_encoding, errors);
1105 Py_DECREF(u);
1106 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001107
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 /* Append bytes to output buffer. */
1109 assert(PyBytes_Check(w));
1110 r = PyBytes_AS_STRING(w);
1111 rn = PyBytes_GET_SIZE(w);
1112 Py_MEMCPY(p, r, rn);
1113 p += rn;
1114 Py_DECREF(w);
1115 s = t;
1116 } else {
1117 *p++ = *s++;
1118 }
1119 continue;
1120 }
1121 s++;
1122 if (s==end) {
1123 PyErr_SetString(PyExc_ValueError,
1124 "Trailing \\ in string");
1125 goto failed;
1126 }
1127 switch (*s++) {
1128 /* XXX This assumes ASCII! */
1129 case '\n': break;
1130 case '\\': *p++ = '\\'; break;
1131 case '\'': *p++ = '\''; break;
1132 case '\"': *p++ = '\"'; break;
1133 case 'b': *p++ = '\b'; break;
1134 case 'f': *p++ = '\014'; break; /* FF */
1135 case 't': *p++ = '\t'; break;
1136 case 'n': *p++ = '\n'; break;
1137 case 'r': *p++ = '\r'; break;
1138 case 'v': *p++ = '\013'; break; /* VT */
1139 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1140 case '0': case '1': case '2': case '3':
1141 case '4': case '5': case '6': case '7':
1142 c = s[-1] - '0';
1143 if (s < end && '0' <= *s && *s <= '7') {
1144 c = (c<<3) + *s++ - '0';
1145 if (s < end && '0' <= *s && *s <= '7')
1146 c = (c<<3) + *s++ - '0';
1147 }
1148 *p++ = c;
1149 break;
1150 case 'x':
David Malcolm96960882010-11-05 17:23:41 +00001151 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001152 unsigned int x = 0;
1153 c = Py_CHARMASK(*s);
1154 s++;
David Malcolm96960882010-11-05 17:23:41 +00001155 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001156 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001157 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001158 x = 10 + c - 'a';
1159 else
1160 x = 10 + c - 'A';
1161 x = x << 4;
1162 c = Py_CHARMASK(*s);
1163 s++;
David Malcolm96960882010-11-05 17:23:41 +00001164 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001165 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001166 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001167 x += 10 + c - 'a';
1168 else
1169 x += 10 + c - 'A';
1170 *p++ = x;
1171 break;
1172 }
1173 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001174 PyErr_Format(PyExc_ValueError,
1175 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001176 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001177 goto failed;
1178 }
1179 if (strcmp(errors, "replace") == 0) {
1180 *p++ = '?';
1181 } else if (strcmp(errors, "ignore") == 0)
1182 /* do nothing */;
1183 else {
1184 PyErr_Format(PyExc_ValueError,
1185 "decoding error; unknown "
1186 "error handling code: %.400s",
1187 errors);
1188 goto failed;
1189 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001190 /* skip \x */
1191 if (s < end && Py_ISXDIGIT(s[0]))
1192 s++; /* and a hexdigit */
1193 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001194 default:
1195 *p++ = '\\';
1196 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001197 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001198 UTF-8 bytes may follow. */
1199 }
1200 }
1201 if (p-buf < newlen)
1202 _PyBytes_Resize(&v, p - buf);
1203 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001204 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001205 Py_DECREF(v);
1206 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001207}
1208
1209/* -------------------------------------------------------------------- */
1210/* object api */
1211
1212Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001213PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001214{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001215 if (!PyBytes_Check(op)) {
1216 PyErr_Format(PyExc_TypeError,
1217 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1218 return -1;
1219 }
1220 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001221}
1222
1223char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001224PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001225{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 if (!PyBytes_Check(op)) {
1227 PyErr_Format(PyExc_TypeError,
1228 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1229 return NULL;
1230 }
1231 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001232}
1233
1234int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001235PyBytes_AsStringAndSize(PyObject *obj,
1236 char **s,
1237 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001238{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001239 if (s == NULL) {
1240 PyErr_BadInternalCall();
1241 return -1;
1242 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001243
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001244 if (!PyBytes_Check(obj)) {
1245 PyErr_Format(PyExc_TypeError,
1246 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1247 return -1;
1248 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001249
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 *s = PyBytes_AS_STRING(obj);
1251 if (len != NULL)
1252 *len = PyBytes_GET_SIZE(obj);
1253 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001254 PyErr_SetString(PyExc_ValueError,
1255 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001256 return -1;
1257 }
1258 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001259}
Neal Norwitz6968b052007-02-27 19:02:19 +00001260
1261/* -------------------------------------------------------------------- */
1262/* Methods */
1263
Eric Smith0923d1d2009-04-16 20:16:10 +00001264#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001265
1266#include "stringlib/fastsearch.h"
1267#include "stringlib/count.h"
1268#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001269#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001270#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001271#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001272#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001273
Eric Smith0f78bff2009-11-30 01:01:42 +00001274#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001275
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001276PyObject *
1277PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001278{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001279 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001280 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001281 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001282 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001283 unsigned char quote, *s, *p;
1284
1285 /* Compute size of output string */
1286 squotes = dquotes = 0;
1287 newsize = 3; /* b'' */
1288 s = (unsigned char*)op->ob_sval;
1289 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001290 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001291 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001292 case '\'': squotes++; break;
1293 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001294 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001295 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001296 default:
1297 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001298 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001299 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001300 if (newsize > PY_SSIZE_T_MAX - incr)
1301 goto overflow;
1302 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001303 }
1304 quote = '\'';
1305 if (smartquotes && squotes && !dquotes)
1306 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001307 if (squotes && quote == '\'') {
1308 if (newsize > PY_SSIZE_T_MAX - squotes)
1309 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001310 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001311 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001312
1313 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001314 if (v == NULL) {
1315 return NULL;
1316 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001317 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001318
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001319 *p++ = 'b', *p++ = quote;
1320 for (i = 0; i < length; i++) {
1321 unsigned char c = op->ob_sval[i];
1322 if (c == quote || c == '\\')
1323 *p++ = '\\', *p++ = c;
1324 else if (c == '\t')
1325 *p++ = '\\', *p++ = 't';
1326 else if (c == '\n')
1327 *p++ = '\\', *p++ = 'n';
1328 else if (c == '\r')
1329 *p++ = '\\', *p++ = 'r';
1330 else if (c < ' ' || c >= 0x7f) {
1331 *p++ = '\\';
1332 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001333 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1334 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001335 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001336 else
1337 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001338 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001339 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001340 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001341 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001342
1343 overflow:
1344 PyErr_SetString(PyExc_OverflowError,
1345 "bytes object is too large to make repr");
1346 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001347}
1348
Neal Norwitz6968b052007-02-27 19:02:19 +00001349static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001350bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001351{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001352 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001353}
1354
Neal Norwitz6968b052007-02-27 19:02:19 +00001355static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001356bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001357{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001358 if (Py_BytesWarningFlag) {
1359 if (PyErr_WarnEx(PyExc_BytesWarning,
1360 "str() on a bytes instance", 1))
1361 return NULL;
1362 }
1363 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001364}
1365
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001366static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001367bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001368{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001369 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001370}
Neal Norwitz6968b052007-02-27 19:02:19 +00001371
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001372/* This is also used by PyBytes_Concat() */
1373static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001374bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001375{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001376 Py_ssize_t size;
1377 Py_buffer va, vb;
1378 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001379
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001380 va.len = -1;
1381 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001382 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1383 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001384 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1385 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1386 goto done;
1387 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001388
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001389 /* Optimize end cases */
1390 if (va.len == 0 && PyBytes_CheckExact(b)) {
1391 result = b;
1392 Py_INCREF(result);
1393 goto done;
1394 }
1395 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1396 result = a;
1397 Py_INCREF(result);
1398 goto done;
1399 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001400
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001401 size = va.len + vb.len;
1402 if (size < 0) {
1403 PyErr_NoMemory();
1404 goto done;
1405 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001406
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001407 result = PyBytes_FromStringAndSize(NULL, size);
1408 if (result != NULL) {
1409 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1410 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1411 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001412
1413 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001414 if (va.len != -1)
1415 PyBuffer_Release(&va);
1416 if (vb.len != -1)
1417 PyBuffer_Release(&vb);
1418 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001419}
Neal Norwitz6968b052007-02-27 19:02:19 +00001420
1421static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001422bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001423{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001424 Py_ssize_t i;
1425 Py_ssize_t j;
1426 Py_ssize_t size;
1427 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 size_t nbytes;
1429 if (n < 0)
1430 n = 0;
1431 /* watch out for overflows: the size can overflow int,
1432 * and the # of bytes needed can overflow size_t
1433 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001434 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001435 PyErr_SetString(PyExc_OverflowError,
1436 "repeated bytes are too long");
1437 return NULL;
1438 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001439 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001440 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1441 Py_INCREF(a);
1442 return (PyObject *)a;
1443 }
1444 nbytes = (size_t)size;
1445 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1446 PyErr_SetString(PyExc_OverflowError,
1447 "repeated bytes are too long");
1448 return NULL;
1449 }
1450 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1451 if (op == NULL)
1452 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001453 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001454 op->ob_shash = -1;
1455 op->ob_sval[size] = '\0';
1456 if (Py_SIZE(a) == 1 && n > 0) {
1457 memset(op->ob_sval, a->ob_sval[0] , n);
1458 return (PyObject *) op;
1459 }
1460 i = 0;
1461 if (i < size) {
1462 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1463 i = Py_SIZE(a);
1464 }
1465 while (i < size) {
1466 j = (i <= size-i) ? i : size-i;
1467 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1468 i += j;
1469 }
1470 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001471}
1472
Guido van Rossum98297ee2007-11-06 21:34:58 +00001473static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001474bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001475{
1476 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1477 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001478 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +00001479 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +00001480 PyErr_Clear();
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001481 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
Antoine Pitroud1188562010-06-09 16:38:55 +00001482 return -1;
1483 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
1484 varg.buf, varg.len, 0);
1485 PyBuffer_Release(&varg);
1486 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001487 }
1488 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001489 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1490 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001491 }
1492
Antoine Pitrou0010d372010-08-15 17:12:55 +00001493 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001494}
1495
Neal Norwitz6968b052007-02-27 19:02:19 +00001496static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001497bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001498{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001499 if (i < 0 || i >= Py_SIZE(a)) {
1500 PyErr_SetString(PyExc_IndexError, "index out of range");
1501 return NULL;
1502 }
1503 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001504}
1505
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001506Py_LOCAL(int)
1507bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1508{
1509 int cmp;
1510 Py_ssize_t len;
1511
1512 len = Py_SIZE(a);
1513 if (Py_SIZE(b) != len)
1514 return 0;
1515
1516 if (a->ob_sval[0] != b->ob_sval[0])
1517 return 0;
1518
1519 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1520 return (cmp == 0);
1521}
1522
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001523static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001524bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001525{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001526 int c;
1527 Py_ssize_t len_a, len_b;
1528 Py_ssize_t min_len;
1529 PyObject *result;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001530 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001531
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001532 /* Make sure both arguments are strings. */
1533 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001534 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001535 rc = PyObject_IsInstance((PyObject*)a,
1536 (PyObject*)&PyUnicode_Type);
1537 if (!rc)
1538 rc = PyObject_IsInstance((PyObject*)b,
1539 (PyObject*)&PyUnicode_Type);
1540 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001541 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001542 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001543 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001544 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001545 return NULL;
1546 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001547 else {
1548 rc = PyObject_IsInstance((PyObject*)a,
1549 (PyObject*)&PyLong_Type);
1550 if (!rc)
1551 rc = PyObject_IsInstance((PyObject*)b,
1552 (PyObject*)&PyLong_Type);
1553 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001554 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001555 if (rc) {
1556 if (PyErr_WarnEx(PyExc_BytesWarning,
1557 "Comparison between bytes and int", 1))
1558 return NULL;
1559 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001560 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001561 }
1562 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001563 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001564 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001565 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001566 case Py_EQ:
1567 case Py_LE:
1568 case Py_GE:
1569 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001570 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001571 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001572 case Py_NE:
1573 case Py_LT:
1574 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001575 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001576 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001577 default:
1578 PyErr_BadArgument();
1579 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001580 }
1581 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001582 else if (op == Py_EQ || op == Py_NE) {
1583 int eq = bytes_compare_eq(a, b);
1584 eq ^= (op == Py_NE);
1585 result = eq ? Py_True : Py_False;
1586 }
1587 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001588 len_a = Py_SIZE(a);
1589 len_b = Py_SIZE(b);
1590 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001591 if (min_len > 0) {
1592 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001593 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001594 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001595 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001596 else
1597 c = 0;
1598 if (c == 0)
1599 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1600 switch (op) {
1601 case Py_LT: c = c < 0; break;
1602 case Py_LE: c = c <= 0; break;
1603 case Py_GT: c = c > 0; break;
1604 case Py_GE: c = c >= 0; break;
1605 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001606 PyErr_BadArgument();
1607 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001608 }
1609 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001610 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001611
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001612 Py_INCREF(result);
1613 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001614}
1615
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001616static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001617bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001618{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001619 if (a->ob_shash == -1) {
1620 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001621 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001622 }
1623 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001624}
1625
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001626static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001627bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001628{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001629 if (PyIndex_Check(item)) {
1630 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1631 if (i == -1 && PyErr_Occurred())
1632 return NULL;
1633 if (i < 0)
1634 i += PyBytes_GET_SIZE(self);
1635 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1636 PyErr_SetString(PyExc_IndexError,
1637 "index out of range");
1638 return NULL;
1639 }
1640 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1641 }
1642 else if (PySlice_Check(item)) {
1643 Py_ssize_t start, stop, step, slicelength, cur, i;
1644 char* source_buf;
1645 char* result_buf;
1646 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001647
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001648 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001649 PyBytes_GET_SIZE(self),
1650 &start, &stop, &step, &slicelength) < 0) {
1651 return NULL;
1652 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001653
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001654 if (slicelength <= 0) {
1655 return PyBytes_FromStringAndSize("", 0);
1656 }
1657 else if (start == 0 && step == 1 &&
1658 slicelength == PyBytes_GET_SIZE(self) &&
1659 PyBytes_CheckExact(self)) {
1660 Py_INCREF(self);
1661 return (PyObject *)self;
1662 }
1663 else if (step == 1) {
1664 return PyBytes_FromStringAndSize(
1665 PyBytes_AS_STRING(self) + start,
1666 slicelength);
1667 }
1668 else {
1669 source_buf = PyBytes_AS_STRING(self);
1670 result = PyBytes_FromStringAndSize(NULL, slicelength);
1671 if (result == NULL)
1672 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001673
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001674 result_buf = PyBytes_AS_STRING(result);
1675 for (cur = start, i = 0; i < slicelength;
1676 cur += step, i++) {
1677 result_buf[i] = source_buf[cur];
1678 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001679
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001680 return result;
1681 }
1682 }
1683 else {
1684 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001685 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001686 Py_TYPE(item)->tp_name);
1687 return NULL;
1688 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001689}
1690
1691static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001692bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001693{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001694 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1695 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001696}
1697
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001698static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001699 (lenfunc)bytes_length, /*sq_length*/
1700 (binaryfunc)bytes_concat, /*sq_concat*/
1701 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1702 (ssizeargfunc)bytes_item, /*sq_item*/
1703 0, /*sq_slice*/
1704 0, /*sq_ass_item*/
1705 0, /*sq_ass_slice*/
1706 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001707};
1708
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001709static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001710 (lenfunc)bytes_length,
1711 (binaryfunc)bytes_subscript,
1712 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001713};
1714
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001715static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001716 (getbufferproc)bytes_buffer_getbuffer,
1717 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001718};
1719
1720
1721#define LEFTSTRIP 0
1722#define RIGHTSTRIP 1
1723#define BOTHSTRIP 2
1724
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001725/*[clinic input]
1726bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001727
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001728 sep: object = None
1729 The delimiter according which to split the bytes.
1730 None (the default value) means split on ASCII whitespace characters
1731 (space, tab, return, newline, formfeed, vertical tab).
1732 maxsplit: Py_ssize_t = -1
1733 Maximum number of splits to do.
1734 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001735
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001736Return a list of the sections in the bytes, using sep as the delimiter.
1737[clinic start generated code]*/
1738
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001739static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001740bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001741/*[clinic end generated code: output=8bde44dacb36ef2e input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001742{
1743 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001744 const char *s = PyBytes_AS_STRING(self), *sub;
1745 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001746 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001747
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001748 if (maxsplit < 0)
1749 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001750 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001751 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001752 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001753 return NULL;
1754 sub = vsub.buf;
1755 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001756
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001757 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1758 PyBuffer_Release(&vsub);
1759 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001760}
1761
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001762/*[clinic input]
1763bytes.partition
1764
1765 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001766 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001767 /
1768
1769Partition the bytes into three parts using the given separator.
1770
1771This will search for the separator sep in the bytes. If the separator is found,
1772returns a 3-tuple containing the part before the separator, the separator
1773itself, and the part after it.
1774
1775If the separator is not found, returns a 3-tuple containing the original bytes
1776object and two empty bytes objects.
1777[clinic start generated code]*/
1778
Neal Norwitz6968b052007-02-27 19:02:19 +00001779static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001780bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001781/*[clinic end generated code: output=f532b392a17ff695 input=bc855dc63ca949de]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001782{
Neal Norwitz6968b052007-02-27 19:02:19 +00001783 return stringlib_partition(
1784 (PyObject*) self,
1785 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001786 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001787 );
1788}
1789
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001790/*[clinic input]
1791bytes.rpartition
1792
1793 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001794 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001795 /
1796
1797Partition the bytes into three parts using the given separator.
1798
1799This will search for the separator sep in the bytes, starting and the end. If
1800the separator is found, returns a 3-tuple containing the part before the
1801separator, the separator itself, and the part after it.
1802
1803If the separator is not found, returns a 3-tuple containing two empty bytes
1804objects and the original bytes object.
1805[clinic start generated code]*/
1806
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001807static PyObject *
1808bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001809/*[clinic end generated code: output=191b114cbb028e50 input=6588fff262a9170e]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001810{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001811 return stringlib_rpartition(
1812 (PyObject*) self,
1813 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001814 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001815 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001816}
1817
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001818/*[clinic input]
1819bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001820
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001821Return a list of the sections in the bytes, using sep as the delimiter.
1822
1823Splitting is done starting at the end of the bytes and working to the front.
1824[clinic start generated code]*/
1825
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001826static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001827bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001828/*[clinic end generated code: output=0b6570b977911d88 input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001829{
1830 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001831 const char *s = PyBytes_AS_STRING(self), *sub;
1832 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001833 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001834
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001835 if (maxsplit < 0)
1836 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001837 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001838 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001839 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001840 return NULL;
1841 sub = vsub.buf;
1842 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001843
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001844 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1845 PyBuffer_Release(&vsub);
1846 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001847}
1848
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001849
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001850/*[clinic input]
1851bytes.join
1852
1853 iterable_of_bytes: object
1854 /
1855
1856Concatenate any number of bytes objects.
1857
1858The bytes whose method is called is inserted in between each pair.
1859
1860The result is returned as a new bytes object.
1861
1862Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1863[clinic start generated code]*/
1864
Neal Norwitz6968b052007-02-27 19:02:19 +00001865static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001866bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001867/*[clinic end generated code: output=634aff14764ff997 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001868{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001869 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001870}
1871
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001872PyObject *
1873_PyBytes_Join(PyObject *sep, PyObject *x)
1874{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001875 assert(sep != NULL && PyBytes_Check(sep));
1876 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001877 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001878}
1879
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001880/* helper macro to fixup start/end slice values */
1881#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001882 if (end > len) \
1883 end = len; \
1884 else if (end < 0) { \
1885 end += len; \
1886 if (end < 0) \
1887 end = 0; \
1888 } \
1889 if (start < 0) { \
1890 start += len; \
1891 if (start < 0) \
1892 start = 0; \
1893 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001894
1895Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001896bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001897{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001898 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001899 char byte;
1900 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001901 const char *sub;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001902 Py_ssize_t len, sub_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001903 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001904 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001905
Antoine Pitrouac65d962011-10-20 23:54:17 +02001906 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1907 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001908 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001909
Antoine Pitrouac65d962011-10-20 23:54:17 +02001910 if (subobj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001911 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001912 return -2;
1913
1914 sub = subbuf.buf;
1915 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001916 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001917 else {
1918 sub = &byte;
1919 sub_len = 1;
1920 }
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001921 len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001922
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001923 ADJUST_INDICES(start, end, len);
1924 if (end - start < sub_len)
1925 res = -1;
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001926 else if (sub_len == 1
1927#ifndef HAVE_MEMRCHR
1928 && dir > 0
1929#endif
1930 ) {
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001931 unsigned char needle = *sub;
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001932 int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001933 res = stringlib_fastsearch_memchr_1char(
1934 PyBytes_AS_STRING(self) + start, end - start,
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001935 needle, needle, mode);
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001936 if (res >= 0)
1937 res += start;
1938 }
1939 else {
1940 if (dir > 0)
1941 res = stringlib_find_slice(
1942 PyBytes_AS_STRING(self), len,
1943 sub, sub_len, start, end);
1944 else
1945 res = stringlib_rfind_slice(
1946 PyBytes_AS_STRING(self), len,
1947 sub, sub_len, start, end);
1948 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001949
1950 if (subobj)
1951 PyBuffer_Release(&subbuf);
1952
1953 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001954}
1955
1956
1957PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001958"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001959\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001960Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001961such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001962arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001963\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001964Return -1 on failure.");
1965
Neal Norwitz6968b052007-02-27 19:02:19 +00001966static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001967bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001968{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001969 Py_ssize_t result = bytes_find_internal(self, args, +1);
1970 if (result == -2)
1971 return NULL;
1972 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001973}
1974
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001975
1976PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001977"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001978\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001979Like B.find() but raise ValueError when the substring is not found.");
1980
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001981static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001982bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001983{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001984 Py_ssize_t result = bytes_find_internal(self, args, +1);
1985 if (result == -2)
1986 return NULL;
1987 if (result == -1) {
1988 PyErr_SetString(PyExc_ValueError,
1989 "substring not found");
1990 return NULL;
1991 }
1992 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001993}
1994
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001995
1996PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001997"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001998\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001999Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08002000such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002001arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002002\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002003Return -1 on failure.");
2004
Neal Norwitz6968b052007-02-27 19:02:19 +00002005static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002006bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00002007{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002008 Py_ssize_t result = bytes_find_internal(self, args, -1);
2009 if (result == -2)
2010 return NULL;
2011 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00002012}
2013
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002014
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002015PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002016"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002017\n\
2018Like B.rfind() but raise ValueError when the substring is not found.");
2019
2020static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002021bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002022{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002023 Py_ssize_t result = bytes_find_internal(self, args, -1);
2024 if (result == -2)
2025 return NULL;
2026 if (result == -1) {
2027 PyErr_SetString(PyExc_ValueError,
2028 "substring not found");
2029 return NULL;
2030 }
2031 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002032}
2033
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002034
2035Py_LOCAL_INLINE(PyObject *)
2036do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002037{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002038 Py_buffer vsep;
2039 char *s = PyBytes_AS_STRING(self);
2040 Py_ssize_t len = PyBytes_GET_SIZE(self);
2041 char *sep;
2042 Py_ssize_t seplen;
2043 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002044
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002045 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002046 return NULL;
2047 sep = vsep.buf;
2048 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002049
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002050 i = 0;
2051 if (striptype != RIGHTSTRIP) {
2052 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2053 i++;
2054 }
2055 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002056
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002057 j = len;
2058 if (striptype != LEFTSTRIP) {
2059 do {
2060 j--;
2061 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2062 j++;
2063 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002065 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002066
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002067 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2068 Py_INCREF(self);
2069 return (PyObject*)self;
2070 }
2071 else
2072 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002073}
2074
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002075
2076Py_LOCAL_INLINE(PyObject *)
2077do_strip(PyBytesObject *self, int striptype)
2078{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002079 char *s = PyBytes_AS_STRING(self);
2080 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002082 i = 0;
2083 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00002084 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002085 i++;
2086 }
2087 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002088
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002089 j = len;
2090 if (striptype != LEFTSTRIP) {
2091 do {
2092 j--;
David Malcolm96960882010-11-05 17:23:41 +00002093 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002094 j++;
2095 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002097 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2098 Py_INCREF(self);
2099 return (PyObject*)self;
2100 }
2101 else
2102 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002103}
2104
2105
2106Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002107do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002108{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002109 if (bytes != NULL && bytes != Py_None) {
2110 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002111 }
2112 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002113}
2114
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002115/*[clinic input]
2116bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002117
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002118 self: self(type="PyBytesObject *")
2119 bytes: object = None
2120 /
2121
2122Strip leading and trailing bytes contained in the argument.
2123
2124If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2125[clinic start generated code]*/
2126
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002127static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002128bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002129/*[clinic end generated code: output=c7c228d3bd104a1b input=37daa5fad1395d95]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002130{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002131 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002132}
2133
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002134/*[clinic input]
2135bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002136
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002137 self: self(type="PyBytesObject *")
2138 bytes: object = None
2139 /
2140
2141Strip leading bytes contained in the argument.
2142
2143If the argument is omitted or None, strip leading ASCII whitespace.
2144[clinic start generated code]*/
2145
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002146static PyObject *
2147bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002148/*[clinic end generated code: output=28602e586f524e82 input=88811b09dfbc2988]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002149{
2150 return do_argstrip(self, LEFTSTRIP, bytes);
2151}
2152
2153/*[clinic input]
2154bytes.rstrip
2155
2156 self: self(type="PyBytesObject *")
2157 bytes: object = None
2158 /
2159
2160Strip trailing bytes contained in the argument.
2161
2162If the argument is omitted or None, strip trailing ASCII whitespace.
2163[clinic start generated code]*/
2164
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002165static PyObject *
2166bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002167/*[clinic end generated code: output=547e3815c95447da input=8f93c9cd361f0140]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002168{
2169 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002170}
Neal Norwitz6968b052007-02-27 19:02:19 +00002171
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002172
2173PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002174"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002175\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002176Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002177string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002178as in slice notation.");
2179
2180static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002181bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002182{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002183 PyObject *sub_obj;
2184 const char *str = PyBytes_AS_STRING(self), *sub;
2185 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02002186 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002187 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002188
Antoine Pitrouac65d962011-10-20 23:54:17 +02002189 Py_buffer vsub;
2190 PyObject *count_obj;
2191
2192 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
2193 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002194 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002195
Antoine Pitrouac65d962011-10-20 23:54:17 +02002196 if (sub_obj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002197 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02002198 return NULL;
2199
2200 sub = vsub.buf;
2201 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002202 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02002203 else {
2204 sub = &byte;
2205 sub_len = 1;
2206 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002207
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002208 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002209
Antoine Pitrouac65d962011-10-20 23:54:17 +02002210 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002211 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2212 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02002213
2214 if (sub_obj)
2215 PyBuffer_Release(&vsub);
2216
2217 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002218}
2219
2220
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002221/*[clinic input]
2222bytes.translate
2223
2224 self: self(type="PyBytesObject *")
Victor Stinner049e5092014-08-17 22:20:00 +02002225 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002226 Translation table, which must be a bytes object of length 256.
2227 [
2228 deletechars: object
2229 ]
2230 /
2231
2232Return a copy with each character mapped by the given translation table.
2233
2234All characters occurring in the optional argument deletechars are removed.
2235The remaining characters are mapped through the given translation table.
2236[clinic start generated code]*/
2237
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002238static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002239bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1,
2240 PyObject *deletechars)
2241/*[clinic end generated code: output=233df850eb50bf8d input=d8fa5519d7cc4be7]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002242{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002243 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002244 Py_buffer table_view = {NULL, NULL};
2245 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002246 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002247 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002248 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002249 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002250 Py_ssize_t inlen, tablen, dellen = 0;
2251 PyObject *result;
2252 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002253
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002254 if (PyBytes_Check(table)) {
2255 table_chars = PyBytes_AS_STRING(table);
2256 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002257 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002258 else if (table == Py_None) {
2259 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002260 tablen = 256;
2261 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002262 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002263 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002264 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002265 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002266 tablen = table_view.len;
2267 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002268
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002269 if (tablen != 256) {
2270 PyErr_SetString(PyExc_ValueError,
2271 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002272 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002273 return NULL;
2274 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002275
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002276 if (deletechars != NULL) {
2277 if (PyBytes_Check(deletechars)) {
2278 del_table_chars = PyBytes_AS_STRING(deletechars);
2279 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002280 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002281 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002282 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002283 PyBuffer_Release(&table_view);
2284 return NULL;
2285 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002286 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002287 dellen = del_table_view.len;
2288 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002289 }
2290 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002291 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002292 dellen = 0;
2293 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002294
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002295 inlen = PyBytes_GET_SIZE(input_obj);
2296 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002297 if (result == NULL) {
2298 PyBuffer_Release(&del_table_view);
2299 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002300 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002301 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002302 output_start = output = PyBytes_AsString(result);
2303 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002304
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002305 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002306 /* If no deletions are required, use faster code */
2307 for (i = inlen; --i >= 0; ) {
2308 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002309 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002310 changed = 1;
2311 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002312 if (!changed && PyBytes_CheckExact(input_obj)) {
2313 Py_INCREF(input_obj);
2314 Py_DECREF(result);
2315 result = input_obj;
2316 }
2317 PyBuffer_Release(&del_table_view);
2318 PyBuffer_Release(&table_view);
2319 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002320 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002321
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002322 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002323 for (i = 0; i < 256; i++)
2324 trans_table[i] = Py_CHARMASK(i);
2325 } else {
2326 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002327 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002328 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002329 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002330
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002331 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002332 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002333 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002334
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002335 for (i = inlen; --i >= 0; ) {
2336 c = Py_CHARMASK(*input++);
2337 if (trans_table[c] != -1)
2338 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2339 continue;
2340 changed = 1;
2341 }
2342 if (!changed && PyBytes_CheckExact(input_obj)) {
2343 Py_DECREF(result);
2344 Py_INCREF(input_obj);
2345 return input_obj;
2346 }
2347 /* Fix the size of the resulting string */
2348 if (inlen > 0)
2349 _PyBytes_Resize(&result, output - output_start);
2350 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002351}
2352
2353
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002354/*[clinic input]
2355
2356@staticmethod
2357bytes.maketrans
2358
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002359 frm: Py_buffer
2360 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002361 /
2362
2363Return a translation table useable for the bytes or bytearray translate method.
2364
2365The returned table will be one where each byte in frm is mapped to the byte at
2366the same position in to.
2367
2368The bytes objects frm and to must be of the same length.
2369[clinic start generated code]*/
2370
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002371static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002372bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002373/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002374{
2375 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002376}
2377
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002378/* find and count characters and substrings */
2379
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002380#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002381 ((char *)memchr((const void *)(target), c, target_len))
2382
2383/* String ops must return a string. */
2384/* If the object is subclass of string, create a copy */
2385Py_LOCAL(PyBytesObject *)
2386return_self(PyBytesObject *self)
2387{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002388 if (PyBytes_CheckExact(self)) {
2389 Py_INCREF(self);
2390 return self;
2391 }
2392 return (PyBytesObject *)PyBytes_FromStringAndSize(
2393 PyBytes_AS_STRING(self),
2394 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002395}
2396
2397Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00002398countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002399{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002400 Py_ssize_t count=0;
2401 const char *start=target;
2402 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002403
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002404 while ( (start=findchar(start, end-start, c)) != NULL ) {
2405 count++;
2406 if (count >= maxcount)
2407 break;
2408 start += 1;
2409 }
2410 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002411}
2412
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002413
2414/* Algorithms for different cases of string replacement */
2415
2416/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2417Py_LOCAL(PyBytesObject *)
2418replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002419 const char *to_s, Py_ssize_t to_len,
2420 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002421{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002422 char *self_s, *result_s;
2423 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002424 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002425 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002426
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002427 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002428
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002429 /* 1 at the end plus 1 after every character;
2430 count = min(maxcount, self_len + 1) */
2431 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002432 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002433 else
2434 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2435 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002436
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002437 /* Check for overflow */
2438 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002439 assert(count > 0);
2440 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002441 PyErr_SetString(PyExc_OverflowError,
2442 "replacement bytes are too long");
2443 return NULL;
2444 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002445 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002446
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002447 if (! (result = (PyBytesObject *)
2448 PyBytes_FromStringAndSize(NULL, result_len)) )
2449 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002450
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002451 self_s = PyBytes_AS_STRING(self);
2452 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002453
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002454 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002455
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002456 /* Lay the first one down (guaranteed this will occur) */
2457 Py_MEMCPY(result_s, to_s, to_len);
2458 result_s += to_len;
2459 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002460
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002461 for (i=0; i<count; i++) {
2462 *result_s++ = *self_s++;
2463 Py_MEMCPY(result_s, to_s, to_len);
2464 result_s += to_len;
2465 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002466
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002467 /* Copy the rest of the original string */
2468 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002469
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002470 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002471}
2472
2473/* Special case for deleting a single character */
2474/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2475Py_LOCAL(PyBytesObject *)
2476replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002477 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002478{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002479 char *self_s, *result_s;
2480 char *start, *next, *end;
2481 Py_ssize_t self_len, result_len;
2482 Py_ssize_t count;
2483 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002484
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002485 self_len = PyBytes_GET_SIZE(self);
2486 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002487
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002488 count = countchar(self_s, self_len, from_c, maxcount);
2489 if (count == 0) {
2490 return return_self(self);
2491 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002492
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002493 result_len = self_len - count; /* from_len == 1 */
2494 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002495
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002496 if ( (result = (PyBytesObject *)
2497 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2498 return NULL;
2499 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002500
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002501 start = self_s;
2502 end = self_s + self_len;
2503 while (count-- > 0) {
2504 next = findchar(start, end-start, from_c);
2505 if (next == NULL)
2506 break;
2507 Py_MEMCPY(result_s, start, next-start);
2508 result_s += (next-start);
2509 start = next+1;
2510 }
2511 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002512
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002513 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002514}
2515
2516/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2517
2518Py_LOCAL(PyBytesObject *)
2519replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002520 const char *from_s, Py_ssize_t from_len,
2521 Py_ssize_t maxcount) {
2522 char *self_s, *result_s;
2523 char *start, *next, *end;
2524 Py_ssize_t self_len, result_len;
2525 Py_ssize_t count, offset;
2526 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002527
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002528 self_len = PyBytes_GET_SIZE(self);
2529 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002530
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002531 count = stringlib_count(self_s, self_len,
2532 from_s, from_len,
2533 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002534
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002535 if (count == 0) {
2536 /* no matches */
2537 return return_self(self);
2538 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002539
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002540 result_len = self_len - (count * from_len);
2541 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002542
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002543 if ( (result = (PyBytesObject *)
2544 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2545 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002546
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002547 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002548
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002549 start = self_s;
2550 end = self_s + self_len;
2551 while (count-- > 0) {
2552 offset = stringlib_find(start, end-start,
2553 from_s, from_len,
2554 0);
2555 if (offset == -1)
2556 break;
2557 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002558
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002559 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002560
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002561 result_s += (next-start);
2562 start = next+from_len;
2563 }
2564 Py_MEMCPY(result_s, start, end-start);
2565 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002566}
2567
2568/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2569Py_LOCAL(PyBytesObject *)
2570replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002571 char from_c, char to_c,
2572 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002573{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002574 char *self_s, *result_s, *start, *end, *next;
2575 Py_ssize_t self_len;
2576 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002577
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002578 /* The result string will be the same size */
2579 self_s = PyBytes_AS_STRING(self);
2580 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002581
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002582 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002583
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002584 if (next == NULL) {
2585 /* No matches; return the original string */
2586 return return_self(self);
2587 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002588
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002589 /* Need to make a new string */
2590 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2591 if (result == NULL)
2592 return NULL;
2593 result_s = PyBytes_AS_STRING(result);
2594 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002595
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002596 /* change everything in-place, starting with this one */
2597 start = result_s + (next-self_s);
2598 *start = to_c;
2599 start++;
2600 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002601
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002602 while (--maxcount > 0) {
2603 next = findchar(start, end-start, from_c);
2604 if (next == NULL)
2605 break;
2606 *next = to_c;
2607 start = next+1;
2608 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002609
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002610 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002611}
2612
2613/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2614Py_LOCAL(PyBytesObject *)
2615replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002616 const char *from_s, Py_ssize_t from_len,
2617 const char *to_s, Py_ssize_t to_len,
2618 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002619{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002620 char *result_s, *start, *end;
2621 char *self_s;
2622 Py_ssize_t self_len, offset;
2623 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002624
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002625 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002626
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002627 self_s = PyBytes_AS_STRING(self);
2628 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002629
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002630 offset = stringlib_find(self_s, self_len,
2631 from_s, from_len,
2632 0);
2633 if (offset == -1) {
2634 /* No matches; return the original string */
2635 return return_self(self);
2636 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002637
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002638 /* Need to make a new string */
2639 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2640 if (result == NULL)
2641 return NULL;
2642 result_s = PyBytes_AS_STRING(result);
2643 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002644
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002645 /* change everything in-place, starting with this one */
2646 start = result_s + offset;
2647 Py_MEMCPY(start, to_s, from_len);
2648 start += from_len;
2649 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002650
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002651 while ( --maxcount > 0) {
2652 offset = stringlib_find(start, end-start,
2653 from_s, from_len,
2654 0);
2655 if (offset==-1)
2656 break;
2657 Py_MEMCPY(start+offset, to_s, from_len);
2658 start += offset+from_len;
2659 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002660
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002661 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002662}
2663
2664/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2665Py_LOCAL(PyBytesObject *)
2666replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002667 char from_c,
2668 const char *to_s, Py_ssize_t to_len,
2669 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002670{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002671 char *self_s, *result_s;
2672 char *start, *next, *end;
2673 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002674 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002675 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002676
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002677 self_s = PyBytes_AS_STRING(self);
2678 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002679
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002680 count = countchar(self_s, self_len, from_c, maxcount);
2681 if (count == 0) {
2682 /* no matches, return unchanged */
2683 return return_self(self);
2684 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002685
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002686 /* use the difference between current and new, hence the "-1" */
2687 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002688 assert(count > 0);
2689 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002690 PyErr_SetString(PyExc_OverflowError,
2691 "replacement bytes are too long");
2692 return NULL;
2693 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002694 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002695
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002696 if ( (result = (PyBytesObject *)
2697 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2698 return NULL;
2699 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002700
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002701 start = self_s;
2702 end = self_s + self_len;
2703 while (count-- > 0) {
2704 next = findchar(start, end-start, from_c);
2705 if (next == NULL)
2706 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002707
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002708 if (next == start) {
2709 /* replace with the 'to' */
2710 Py_MEMCPY(result_s, to_s, to_len);
2711 result_s += to_len;
2712 start += 1;
2713 } else {
2714 /* copy the unchanged old then the 'to' */
2715 Py_MEMCPY(result_s, start, next-start);
2716 result_s += (next-start);
2717 Py_MEMCPY(result_s, to_s, to_len);
2718 result_s += to_len;
2719 start = next+1;
2720 }
2721 }
2722 /* Copy the remainder of the remaining string */
2723 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002724
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002725 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002726}
2727
2728/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2729Py_LOCAL(PyBytesObject *)
2730replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002731 const char *from_s, Py_ssize_t from_len,
2732 const char *to_s, Py_ssize_t to_len,
2733 Py_ssize_t maxcount) {
2734 char *self_s, *result_s;
2735 char *start, *next, *end;
2736 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002737 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002738 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002739
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002740 self_s = PyBytes_AS_STRING(self);
2741 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002742
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002743 count = stringlib_count(self_s, self_len,
2744 from_s, from_len,
2745 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002746
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002747 if (count == 0) {
2748 /* no matches, return unchanged */
2749 return return_self(self);
2750 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002751
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002752 /* Check for overflow */
2753 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002754 assert(count > 0);
2755 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002756 PyErr_SetString(PyExc_OverflowError,
2757 "replacement bytes are too long");
2758 return NULL;
2759 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002760 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002761
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002762 if ( (result = (PyBytesObject *)
2763 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2764 return NULL;
2765 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002766
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002767 start = self_s;
2768 end = self_s + self_len;
2769 while (count-- > 0) {
2770 offset = stringlib_find(start, end-start,
2771 from_s, from_len,
2772 0);
2773 if (offset == -1)
2774 break;
2775 next = start+offset;
2776 if (next == start) {
2777 /* replace with the 'to' */
2778 Py_MEMCPY(result_s, to_s, to_len);
2779 result_s += to_len;
2780 start += from_len;
2781 } else {
2782 /* copy the unchanged old then the 'to' */
2783 Py_MEMCPY(result_s, start, next-start);
2784 result_s += (next-start);
2785 Py_MEMCPY(result_s, to_s, to_len);
2786 result_s += to_len;
2787 start = next+from_len;
2788 }
2789 }
2790 /* Copy the remainder of the remaining string */
2791 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002792
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002793 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002794}
2795
2796
2797Py_LOCAL(PyBytesObject *)
2798replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002799 const char *from_s, Py_ssize_t from_len,
2800 const char *to_s, Py_ssize_t to_len,
2801 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002802{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002803 if (maxcount < 0) {
2804 maxcount = PY_SSIZE_T_MAX;
2805 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2806 /* nothing to do; return the original string */
2807 return return_self(self);
2808 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002809
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002810 if (maxcount == 0 ||
2811 (from_len == 0 && to_len == 0)) {
2812 /* nothing to do; return the original string */
2813 return return_self(self);
2814 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002815
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002816 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002817
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002818 if (from_len == 0) {
2819 /* insert the 'to' string everywhere. */
2820 /* >>> "Python".replace("", ".") */
2821 /* '.P.y.t.h.o.n.' */
2822 return replace_interleave(self, to_s, to_len, maxcount);
2823 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002824
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002825 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2826 /* point for an empty self string to generate a non-empty string */
2827 /* Special case so the remaining code always gets a non-empty string */
2828 if (PyBytes_GET_SIZE(self) == 0) {
2829 return return_self(self);
2830 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002831
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002832 if (to_len == 0) {
2833 /* delete all occurrences of 'from' string */
2834 if (from_len == 1) {
2835 return replace_delete_single_character(
2836 self, from_s[0], maxcount);
2837 } else {
2838 return replace_delete_substring(self, from_s,
2839 from_len, maxcount);
2840 }
2841 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002842
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002843 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002844
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002845 if (from_len == to_len) {
2846 if (from_len == 1) {
2847 return replace_single_character_in_place(
2848 self,
2849 from_s[0],
2850 to_s[0],
2851 maxcount);
2852 } else {
2853 return replace_substring_in_place(
2854 self, from_s, from_len, to_s, to_len,
2855 maxcount);
2856 }
2857 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002858
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002859 /* Otherwise use the more generic algorithms */
2860 if (from_len == 1) {
2861 return replace_single_character(self, from_s[0],
2862 to_s, to_len, maxcount);
2863 } else {
2864 /* len('from')>=2, len('to')>=1 */
2865 return replace_substring(self, from_s, from_len, to_s, to_len,
2866 maxcount);
2867 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002868}
2869
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002870
2871/*[clinic input]
2872bytes.replace
2873
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002874 old: Py_buffer
2875 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002876 count: Py_ssize_t = -1
2877 Maximum number of occurrences to replace.
2878 -1 (the default value) means replace all occurrences.
2879 /
2880
2881Return a copy with all occurrences of substring old replaced by new.
2882
2883If the optional argument count is given, only the first count occurrences are
2884replaced.
2885[clinic start generated code]*/
2886
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002887static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002888bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new,
2889 Py_ssize_t count)
2890/*[clinic end generated code: output=403dc9d7a83c5a1d input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002891{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002892 return (PyObject *)replace((PyBytesObject *) self,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002893 (const char *)old->buf, old->len,
2894 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002895}
2896
2897/** End DALKE **/
2898
2899/* Matches the end (direction >= 0) or start (direction < 0) of self
2900 * against substr, using the start and end arguments. Returns
2901 * -1 on error, 0 if not found and 1 if found.
2902 */
2903Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002904_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002905 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002906{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002907 Py_ssize_t len = PyBytes_GET_SIZE(self);
2908 Py_ssize_t slen;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002909 Py_buffer sub_view = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002910 const char* sub;
2911 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002912
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002913 if (PyBytes_Check(substr)) {
2914 sub = PyBytes_AS_STRING(substr);
2915 slen = PyBytes_GET_SIZE(substr);
2916 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002917 else {
2918 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
2919 return -1;
2920 sub = sub_view.buf;
2921 slen = sub_view.len;
2922 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002923 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002924
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002925 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002926
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002927 if (direction < 0) {
2928 /* startswith */
2929 if (start+slen > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002930 goto notfound;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002931 } else {
2932 /* endswith */
2933 if (end-start < slen || start > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002934 goto notfound;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002935
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002936 if (end-slen > start)
2937 start = end - slen;
2938 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002939 if (end-start < slen)
2940 goto notfound;
2941 if (memcmp(str+start, sub, slen) != 0)
2942 goto notfound;
2943
2944 PyBuffer_Release(&sub_view);
2945 return 1;
2946
2947notfound:
2948 PyBuffer_Release(&sub_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002949 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002950}
2951
2952
2953PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002954"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002955\n\
2956Return True if B starts with the specified prefix, False otherwise.\n\
2957With optional start, test B beginning at that position.\n\
2958With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002959prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002960
2961static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002962bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002963{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002964 Py_ssize_t start = 0;
2965 Py_ssize_t end = PY_SSIZE_T_MAX;
2966 PyObject *subobj;
2967 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002968
Jesus Ceaac451502011-04-20 17:09:23 +02002969 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002970 return NULL;
2971 if (PyTuple_Check(subobj)) {
2972 Py_ssize_t i;
2973 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2974 result = _bytes_tailmatch(self,
2975 PyTuple_GET_ITEM(subobj, i),
2976 start, end, -1);
2977 if (result == -1)
2978 return NULL;
2979 else if (result) {
2980 Py_RETURN_TRUE;
2981 }
2982 }
2983 Py_RETURN_FALSE;
2984 }
2985 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002986 if (result == -1) {
2987 if (PyErr_ExceptionMatches(PyExc_TypeError))
2988 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2989 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002990 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002991 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002992 else
2993 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002994}
2995
2996
2997PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002998"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002999\n\
3000Return True if B ends with the specified suffix, False otherwise.\n\
3001With optional start, test B beginning at that position.\n\
3002With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00003003suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003004
3005static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003006bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003007{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003008 Py_ssize_t start = 0;
3009 Py_ssize_t end = PY_SSIZE_T_MAX;
3010 PyObject *subobj;
3011 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003012
Jesus Ceaac451502011-04-20 17:09:23 +02003013 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003014 return NULL;
3015 if (PyTuple_Check(subobj)) {
3016 Py_ssize_t i;
3017 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3018 result = _bytes_tailmatch(self,
3019 PyTuple_GET_ITEM(subobj, i),
3020 start, end, +1);
3021 if (result == -1)
3022 return NULL;
3023 else if (result) {
3024 Py_RETURN_TRUE;
3025 }
3026 }
3027 Py_RETURN_FALSE;
3028 }
3029 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03003030 if (result == -1) {
3031 if (PyErr_ExceptionMatches(PyExc_TypeError))
3032 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
3033 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003034 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03003035 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003036 else
3037 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003038}
3039
3040
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003041/*[clinic input]
3042bytes.decode
3043
3044 encoding: str(c_default="NULL") = 'utf-8'
3045 The encoding with which to decode the bytes.
3046 errors: str(c_default="NULL") = 'strict'
3047 The error handling scheme to use for the handling of decoding errors.
3048 The default is 'strict' meaning that decoding errors raise a
3049 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
3050 as well as any other name registered with codecs.register_error that
3051 can handle UnicodeDecodeErrors.
3052
3053Decode the bytes using the codec registered for encoding.
3054[clinic start generated code]*/
3055
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003056static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04003057bytes_decode_impl(PyBytesObject*self, const char *encoding,
3058 const char *errors)
3059/*[clinic end generated code: output=2d2016ff8e0bb176 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003060{
Martin v. Löwis0efea322014-07-27 17:29:17 +02003061 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00003062}
3063
Guido van Rossum20188312006-05-05 15:15:40 +00003064
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003065/*[clinic input]
3066bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003067
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03003068 keepends: int(c_default="0") = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003069
3070Return a list of the lines in the bytes, breaking at line boundaries.
3071
3072Line breaks are not included in the resulting list unless keepends is given and
3073true.
3074[clinic start generated code]*/
3075
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003076static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003077bytes_splitlines_impl(PyBytesObject*self, int keepends)
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03003078/*[clinic end generated code: output=995c3598f7833cad input=7f4aac67144f9944]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003079{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003080 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00003081 (PyObject*) self, PyBytes_AS_STRING(self),
3082 PyBytes_GET_SIZE(self), keepends
3083 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003084}
3085
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003086static int
Victor Stinner6430fd52011-09-29 04:02:13 +02003087hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003088{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003089 if (c >= 128)
3090 return -1;
David Malcolm96960882010-11-05 17:23:41 +00003091 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003092 return c - '0';
3093 else {
David Malcolm96960882010-11-05 17:23:41 +00003094 if (Py_ISUPPER(c))
3095 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003096 if (c >= 'a' && c <= 'f')
3097 return c - 'a' + 10;
3098 }
3099 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003100}
3101
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003102/*[clinic input]
3103@classmethod
3104bytes.fromhex
3105
3106 string: unicode
3107 /
3108
3109Create a bytes object from a string of hexadecimal numbers.
3110
3111Spaces between two numbers are accepted.
3112Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
3113[clinic start generated code]*/
3114
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003115static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003116bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03003117/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003118{
3119 PyObject *newstring;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003120 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003121 Py_ssize_t hexlen, byteslen, i, j;
3122 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003123 void *data;
3124 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003125
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003126 assert(PyUnicode_Check(string));
3127 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003128 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003129 kind = PyUnicode_KIND(string);
3130 data = PyUnicode_DATA(string);
3131 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003132
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003133 byteslen = hexlen/2; /* This overestimates if there are spaces */
3134 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
3135 if (!newstring)
3136 return NULL;
3137 buf = PyBytes_AS_STRING(newstring);
3138 for (i = j = 0; i < hexlen; i += 2) {
3139 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003140 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003141 i++;
3142 if (i >= hexlen)
3143 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003144 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
3145 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003146 if (top == -1 || bot == -1) {
3147 PyErr_Format(PyExc_ValueError,
3148 "non-hexadecimal number found in "
3149 "fromhex() arg at position %zd", i);
3150 goto error;
3151 }
3152 buf[j++] = (top << 4) + bot;
3153 }
3154 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
3155 goto error;
3156 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003157
3158 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003159 Py_XDECREF(newstring);
3160 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003161}
3162
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003163PyDoc_STRVAR(hex__doc__,
3164"B.hex() -> string\n\
3165\n\
3166Create a string of hexadecimal numbers from a bytes object.\n\
3167Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
3168
3169static PyObject *
3170bytes_hex(PyBytesObject *self)
3171{
3172 char* argbuf = PyBytes_AS_STRING(self);
3173 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
3174 return _Py_strhex(argbuf, arglen);
3175}
3176
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003177static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003178bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003179{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003180 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003181}
3182
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003183
3184static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003185bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003186 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
3187 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3188 _Py_capitalize__doc__},
3189 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3190 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003191 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003192 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
3193 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02003194 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003195 expandtabs__doc__},
3196 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003197 BYTES_FROMHEX_METHODDEF
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003198 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003199 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3200 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3201 _Py_isalnum__doc__},
3202 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3203 _Py_isalpha__doc__},
3204 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3205 _Py_isdigit__doc__},
3206 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3207 _Py_islower__doc__},
3208 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3209 _Py_isspace__doc__},
3210 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3211 _Py_istitle__doc__},
3212 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3213 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003214 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003215 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3216 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003217 BYTES_LSTRIP_METHODDEF
3218 BYTES_MAKETRANS_METHODDEF
3219 BYTES_PARTITION_METHODDEF
3220 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003221 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3222 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3223 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003224 BYTES_RPARTITION_METHODDEF
3225 BYTES_RSPLIT_METHODDEF
3226 BYTES_RSTRIP_METHODDEF
3227 BYTES_SPLIT_METHODDEF
3228 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003229 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
3230 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003231 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003232 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3233 _Py_swapcase__doc__},
3234 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003235 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003236 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3237 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003238 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003239};
3240
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003241static PyObject *
Ethan Furmanb95b5612015-01-23 20:05:18 -08003242bytes_mod(PyObject *v, PyObject *w)
3243{
3244 if (!PyBytes_Check(v))
3245 Py_RETURN_NOTIMPLEMENTED;
3246 return _PyBytes_Format(v, w);
3247}
3248
3249static PyNumberMethods bytes_as_number = {
3250 0, /*nb_add*/
3251 0, /*nb_subtract*/
3252 0, /*nb_multiply*/
3253 bytes_mod, /*nb_remainder*/
3254};
3255
3256static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003257str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3258
3259static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003260bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003261{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003262 PyObject *x = NULL;
3263 const char *encoding = NULL;
3264 const char *errors = NULL;
3265 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003266 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003267 Py_ssize_t size;
3268 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003269 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003270
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003271 if (type != &PyBytes_Type)
3272 return str_subtype_new(type, args, kwds);
3273 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3274 &encoding, &errors))
3275 return NULL;
3276 if (x == NULL) {
3277 if (encoding != NULL || errors != NULL) {
3278 PyErr_SetString(PyExc_TypeError,
3279 "encoding or errors without sequence "
3280 "argument");
3281 return NULL;
3282 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003283 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003284 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003285
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003286 if (PyUnicode_Check(x)) {
3287 /* Encode via the codec registry */
3288 if (encoding == NULL) {
3289 PyErr_SetString(PyExc_TypeError,
3290 "string argument without an encoding");
3291 return NULL;
3292 }
3293 new = PyUnicode_AsEncodedString(x, encoding, errors);
3294 if (new == NULL)
3295 return NULL;
3296 assert(PyBytes_Check(new));
3297 return new;
3298 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003299
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003300 /* If it's not unicode, there can't be encoding or errors */
3301 if (encoding != NULL || errors != NULL) {
3302 PyErr_SetString(PyExc_TypeError,
3303 "encoding or errors without a string argument");
3304 return NULL;
3305 }
3306
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003307 /* We'd like to call PyObject_Bytes here, but we need to check for an
3308 integer argument before deferring to PyBytes_FromObject, something
3309 PyObject_Bytes doesn't do. */
3310 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3311 if (func != NULL) {
3312 new = PyObject_CallFunctionObjArgs(func, NULL);
3313 Py_DECREF(func);
3314 if (new == NULL)
3315 return NULL;
3316 if (!PyBytes_Check(new)) {
3317 PyErr_Format(PyExc_TypeError,
3318 "__bytes__ returned non-bytes (type %.200s)",
3319 Py_TYPE(new)->tp_name);
3320 Py_DECREF(new);
3321 return NULL;
3322 }
3323 return new;
3324 }
3325 else if (PyErr_Occurred())
3326 return NULL;
3327
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003328 /* Is it an integer? */
3329 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3330 if (size == -1 && PyErr_Occurred()) {
3331 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3332 return NULL;
3333 PyErr_Clear();
3334 }
3335 else if (size < 0) {
3336 PyErr_SetString(PyExc_ValueError, "negative count");
3337 return NULL;
3338 }
3339 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003340 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003341 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003342 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003343 return new;
3344 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003345
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003346 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003347}
3348
3349PyObject *
3350PyBytes_FromObject(PyObject *x)
3351{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003352 PyObject *new, *it;
3353 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003354
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003355 if (x == NULL) {
3356 PyErr_BadInternalCall();
3357 return NULL;
3358 }
Larry Hastingsca28e992012-05-24 22:58:30 -07003359
3360 if (PyBytes_CheckExact(x)) {
3361 Py_INCREF(x);
3362 return x;
3363 }
3364
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003365 /* Use the modern buffer interface */
3366 if (PyObject_CheckBuffer(x)) {
3367 Py_buffer view;
3368 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3369 return NULL;
3370 new = PyBytes_FromStringAndSize(NULL, view.len);
3371 if (!new)
3372 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003373 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3374 &view, view.len, 'C') < 0)
3375 goto fail;
3376 PyBuffer_Release(&view);
3377 return new;
3378 fail:
3379 Py_XDECREF(new);
3380 PyBuffer_Release(&view);
3381 return NULL;
3382 }
3383 if (PyUnicode_Check(x)) {
3384 PyErr_SetString(PyExc_TypeError,
3385 "cannot convert unicode object to bytes");
3386 return NULL;
3387 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003388
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003389 if (PyList_CheckExact(x)) {
3390 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3391 if (new == NULL)
3392 return NULL;
3393 for (i = 0; i < Py_SIZE(x); i++) {
3394 Py_ssize_t value = PyNumber_AsSsize_t(
3395 PyList_GET_ITEM(x, i), PyExc_ValueError);
3396 if (value == -1 && PyErr_Occurred()) {
3397 Py_DECREF(new);
3398 return NULL;
3399 }
3400 if (value < 0 || value >= 256) {
3401 PyErr_SetString(PyExc_ValueError,
3402 "bytes must be in range(0, 256)");
3403 Py_DECREF(new);
3404 return NULL;
3405 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003406 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003407 }
3408 return new;
3409 }
3410 if (PyTuple_CheckExact(x)) {
3411 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3412 if (new == NULL)
3413 return NULL;
3414 for (i = 0; i < Py_SIZE(x); i++) {
3415 Py_ssize_t value = PyNumber_AsSsize_t(
3416 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
3417 if (value == -1 && PyErr_Occurred()) {
3418 Py_DECREF(new);
3419 return NULL;
3420 }
3421 if (value < 0 || value >= 256) {
3422 PyErr_SetString(PyExc_ValueError,
3423 "bytes must be in range(0, 256)");
3424 Py_DECREF(new);
3425 return NULL;
3426 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003427 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003428 }
3429 return new;
3430 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00003431
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003432 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003433 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003434 if (size == -1 && PyErr_Occurred())
3435 return NULL;
3436 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
3437 returning a shared empty bytes string. This required because we
3438 want to call _PyBytes_Resize() the returned object, which we can
3439 only do on bytes objects with refcount == 1. */
Victor Stinner88d146b2014-08-17 21:12:18 +02003440 if (size == 0)
3441 size = 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003442 new = PyBytes_FromStringAndSize(NULL, size);
3443 if (new == NULL)
3444 return NULL;
Victor Stinner88d146b2014-08-17 21:12:18 +02003445 assert(Py_REFCNT(new) == 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003446
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003447 /* Get the iterator */
3448 it = PyObject_GetIter(x);
3449 if (it == NULL)
3450 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003451
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003452 /* Run the iterator to exhaustion */
3453 for (i = 0; ; i++) {
3454 PyObject *item;
3455 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003456
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003457 /* Get the next item */
3458 item = PyIter_Next(it);
3459 if (item == NULL) {
3460 if (PyErr_Occurred())
3461 goto error;
3462 break;
3463 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003464
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003465 /* Interpret it as an int (__index__) */
3466 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3467 Py_DECREF(item);
3468 if (value == -1 && PyErr_Occurred())
3469 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003470
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003471 /* Range check */
3472 if (value < 0 || value >= 256) {
3473 PyErr_SetString(PyExc_ValueError,
3474 "bytes must be in range(0, 256)");
3475 goto error;
3476 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003477
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003478 /* Append the byte */
3479 if (i >= size) {
3480 size = 2 * size + 1;
3481 if (_PyBytes_Resize(&new, size) < 0)
3482 goto error;
3483 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003484 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003485 }
3486 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003487
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003488 /* Clean up and return success */
3489 Py_DECREF(it);
3490 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003491
3492 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003493 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01003494 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003495 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003496}
3497
3498static PyObject *
3499str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3500{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003501 PyObject *tmp, *pnew;
3502 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003503
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003504 assert(PyType_IsSubtype(type, &PyBytes_Type));
3505 tmp = bytes_new(&PyBytes_Type, args, kwds);
3506 if (tmp == NULL)
3507 return NULL;
3508 assert(PyBytes_CheckExact(tmp));
3509 n = PyBytes_GET_SIZE(tmp);
3510 pnew = type->tp_alloc(type, n);
3511 if (pnew != NULL) {
3512 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3513 PyBytes_AS_STRING(tmp), n+1);
3514 ((PyBytesObject *)pnew)->ob_shash =
3515 ((PyBytesObject *)tmp)->ob_shash;
3516 }
3517 Py_DECREF(tmp);
3518 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003519}
3520
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003521PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003522"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003523bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003524bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003525bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3526bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003527\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003528Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003529 - an iterable yielding integers in range(256)\n\
3530 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003531 - any object implementing the buffer API.\n\
3532 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003533
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003534static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003535
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003536PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003537 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3538 "bytes",
3539 PyBytesObject_SIZE,
3540 sizeof(char),
3541 bytes_dealloc, /* tp_dealloc */
3542 0, /* tp_print */
3543 0, /* tp_getattr */
3544 0, /* tp_setattr */
3545 0, /* tp_reserved */
3546 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08003547 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003548 &bytes_as_sequence, /* tp_as_sequence */
3549 &bytes_as_mapping, /* tp_as_mapping */
3550 (hashfunc)bytes_hash, /* tp_hash */
3551 0, /* tp_call */
3552 bytes_str, /* tp_str */
3553 PyObject_GenericGetAttr, /* tp_getattro */
3554 0, /* tp_setattro */
3555 &bytes_as_buffer, /* tp_as_buffer */
3556 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3557 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3558 bytes_doc, /* tp_doc */
3559 0, /* tp_traverse */
3560 0, /* tp_clear */
3561 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3562 0, /* tp_weaklistoffset */
3563 bytes_iter, /* tp_iter */
3564 0, /* tp_iternext */
3565 bytes_methods, /* tp_methods */
3566 0, /* tp_members */
3567 0, /* tp_getset */
3568 &PyBaseObject_Type, /* tp_base */
3569 0, /* tp_dict */
3570 0, /* tp_descr_get */
3571 0, /* tp_descr_set */
3572 0, /* tp_dictoffset */
3573 0, /* tp_init */
3574 0, /* tp_alloc */
3575 bytes_new, /* tp_new */
3576 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003577};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003578
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003579void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003580PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003581{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003582 assert(pv != NULL);
3583 if (*pv == NULL)
3584 return;
3585 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003586 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003587 return;
3588 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003589
3590 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3591 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003592 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003593 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02003594
Antoine Pitrou161d6952014-05-01 14:36:20 +02003595 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003596 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02003597 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3598 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3599 Py_CLEAR(*pv);
3600 return;
3601 }
3602
3603 oldsize = PyBytes_GET_SIZE(*pv);
3604 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3605 PyErr_NoMemory();
3606 goto error;
3607 }
3608 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3609 goto error;
3610
3611 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3612 PyBuffer_Release(&wb);
3613 return;
3614
3615 error:
3616 PyBuffer_Release(&wb);
3617 Py_CLEAR(*pv);
3618 return;
3619 }
3620
3621 else {
3622 /* Multiple references, need to create new object */
3623 PyObject *v;
3624 v = bytes_concat(*pv, w);
3625 Py_DECREF(*pv);
3626 *pv = v;
3627 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003628}
3629
3630void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003631PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003632{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003633 PyBytes_Concat(pv, w);
3634 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003635}
3636
3637
Ethan Furmanb95b5612015-01-23 20:05:18 -08003638/* The following function breaks the notion that bytes are immutable:
3639 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003640 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08003641 as creating a new bytes object and destroying the old one, only
3642 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003643 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08003644 Note that if there's not enough memory to resize the bytes object, the
3645 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003646 memory" exception is set, and -1 is returned. Else (on success) 0 is
3647 returned, and the value in *pv may or may not be the same as on input.
3648 As always, an extra byte is allocated for a trailing \0 byte (newsize
3649 does *not* include that), and a trailing \0 byte is stored.
3650*/
3651
3652int
3653_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3654{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003655 PyObject *v;
3656 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003657 v = *pv;
3658 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3659 *pv = 0;
3660 Py_DECREF(v);
3661 PyErr_BadInternalCall();
3662 return -1;
3663 }
3664 /* XXX UNREF/NEWREF interface should be more symmetrical */
3665 _Py_DEC_REFTOTAL;
3666 _Py_ForgetReference(v);
3667 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003668 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003669 if (*pv == NULL) {
3670 PyObject_Del(v);
3671 PyErr_NoMemory();
3672 return -1;
3673 }
3674 _Py_NewReference(*pv);
3675 sv = (PyBytesObject *) *pv;
3676 Py_SIZE(sv) = newsize;
3677 sv->ob_sval[newsize] = '\0';
3678 sv->ob_shash = -1; /* invalidate cached hash value */
3679 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003680}
3681
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003682void
3683PyBytes_Fini(void)
3684{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003685 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003686 for (i = 0; i < UCHAR_MAX + 1; i++)
3687 Py_CLEAR(characters[i]);
3688 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003689}
3690
Benjamin Peterson4116f362008-05-27 00:36:20 +00003691/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003692
3693typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003694 PyObject_HEAD
3695 Py_ssize_t it_index;
3696 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003697} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003698
3699static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003700striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003701{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003702 _PyObject_GC_UNTRACK(it);
3703 Py_XDECREF(it->it_seq);
3704 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003705}
3706
3707static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003708striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003709{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003710 Py_VISIT(it->it_seq);
3711 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003712}
3713
3714static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003715striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003716{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003717 PyBytesObject *seq;
3718 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003719
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003720 assert(it != NULL);
3721 seq = it->it_seq;
3722 if (seq == NULL)
3723 return NULL;
3724 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003725
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003726 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3727 item = PyLong_FromLong(
3728 (unsigned char)seq->ob_sval[it->it_index]);
3729 if (item != NULL)
3730 ++it->it_index;
3731 return item;
3732 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003733
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003734 Py_DECREF(seq);
3735 it->it_seq = NULL;
3736 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003737}
3738
3739static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003740striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003741{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003742 Py_ssize_t len = 0;
3743 if (it->it_seq)
3744 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3745 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003746}
3747
3748PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003749 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003750
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003751static PyObject *
3752striter_reduce(striterobject *it)
3753{
3754 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003755 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003756 it->it_seq, it->it_index);
3757 } else {
3758 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3759 if (u == NULL)
3760 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02003761 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003762 }
3763}
3764
3765PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3766
3767static PyObject *
3768striter_setstate(striterobject *it, PyObject *state)
3769{
3770 Py_ssize_t index = PyLong_AsSsize_t(state);
3771 if (index == -1 && PyErr_Occurred())
3772 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003773 if (it->it_seq != NULL) {
3774 if (index < 0)
3775 index = 0;
3776 else if (index > PyBytes_GET_SIZE(it->it_seq))
3777 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3778 it->it_index = index;
3779 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003780 Py_RETURN_NONE;
3781}
3782
3783PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3784
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003785static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003786 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3787 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003788 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3789 reduce_doc},
3790 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3791 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003792 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003793};
3794
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003795PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003796 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3797 "bytes_iterator", /* tp_name */
3798 sizeof(striterobject), /* tp_basicsize */
3799 0, /* tp_itemsize */
3800 /* methods */
3801 (destructor)striter_dealloc, /* tp_dealloc */
3802 0, /* tp_print */
3803 0, /* tp_getattr */
3804 0, /* tp_setattr */
3805 0, /* tp_reserved */
3806 0, /* tp_repr */
3807 0, /* tp_as_number */
3808 0, /* tp_as_sequence */
3809 0, /* tp_as_mapping */
3810 0, /* tp_hash */
3811 0, /* tp_call */
3812 0, /* tp_str */
3813 PyObject_GenericGetAttr, /* tp_getattro */
3814 0, /* tp_setattro */
3815 0, /* tp_as_buffer */
3816 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3817 0, /* tp_doc */
3818 (traverseproc)striter_traverse, /* tp_traverse */
3819 0, /* tp_clear */
3820 0, /* tp_richcompare */
3821 0, /* tp_weaklistoffset */
3822 PyObject_SelfIter, /* tp_iter */
3823 (iternextfunc)striter_next, /* tp_iternext */
3824 striter_methods, /* tp_methods */
3825 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003826};
3827
3828static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003829bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003830{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003831 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003832
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003833 if (!PyBytes_Check(seq)) {
3834 PyErr_BadInternalCall();
3835 return NULL;
3836 }
3837 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3838 if (it == NULL)
3839 return NULL;
3840 it->it_index = 0;
3841 Py_INCREF(seq);
3842 it->it_seq = (PyBytesObject *)seq;
3843 _PyObject_GC_TRACK(it);
3844 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003845}
Victor Stinner00165072015-10-09 01:53:21 +02003846
3847
3848/* _PyBytesWriter API */
3849
3850#ifdef MS_WINDOWS
3851 /* On Windows, overallocate by 50% is the best factor */
3852# define OVERALLOCATE_FACTOR 2
3853#else
3854 /* On Linux, overallocate by 25% is the best factor */
3855# define OVERALLOCATE_FACTOR 4
3856#endif
3857
3858void
3859_PyBytesWriter_Init(_PyBytesWriter *writer)
3860{
3861 writer->buffer = NULL;
3862 writer->allocated = 0;
Victor Stinner53926a12015-10-09 12:37:03 +02003863 writer->min_size = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003864 writer->overallocate = 0;
Victor Stinnerb3653a32015-10-09 03:38:24 +02003865 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003866#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003867 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003868#endif
3869}
3870
3871void
3872_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3873{
3874 Py_CLEAR(writer->buffer);
3875}
3876
3877Py_LOCAL_INLINE(char*)
3878_PyBytesWriter_AsString(_PyBytesWriter *writer)
3879{
Victor Stinnerb3653a32015-10-09 03:38:24 +02003880 if (!writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003881 assert(writer->buffer != NULL);
3882 return PyBytes_AS_STRING(writer->buffer);
3883 }
3884 else {
3885 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003886 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003887 }
3888}
3889
3890Py_LOCAL_INLINE(Py_ssize_t)
3891_PyBytesWriter_GetPos(_PyBytesWriter *writer, char *str)
3892{
3893 char *start = _PyBytesWriter_AsString(writer);
3894 assert(str != NULL);
3895 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003896 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003897 return str - start;
3898}
3899
3900Py_LOCAL_INLINE(void)
3901_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3902{
3903#ifdef Py_DEBUG
3904 char *start, *end;
3905
Victor Stinnerb3653a32015-10-09 03:38:24 +02003906 if (!writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003907 assert(writer->buffer != NULL);
3908 assert(PyBytes_CheckExact(writer->buffer));
3909 assert(Py_REFCNT(writer->buffer) == 1);
3910 }
3911 else {
3912 assert(writer->buffer == NULL);
3913 }
3914
3915 start = _PyBytesWriter_AsString(writer);
Victor Stinner53926a12015-10-09 12:37:03 +02003916 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003917 /* the last byte must always be null */
3918 assert(start[writer->allocated] == 0);
3919
3920 end = start + writer->allocated;
3921 assert(str != NULL);
3922 assert(start <= str && str <= end);
3923#endif
3924}
3925
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003926void*
3927_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02003928{
3929 Py_ssize_t allocated, pos;
3930
3931 _PyBytesWriter_CheckConsistency(writer, str);
3932 assert(size >= 0);
3933
3934 if (size == 0) {
3935 /* nothing to do */
3936 return str;
3937 }
3938
Victor Stinner53926a12015-10-09 12:37:03 +02003939 if (writer->min_size > PY_SSIZE_T_MAX - size) {
Victor Stinner00165072015-10-09 01:53:21 +02003940 PyErr_NoMemory();
3941 _PyBytesWriter_Dealloc(writer);
3942 return NULL;
3943 }
Victor Stinner53926a12015-10-09 12:37:03 +02003944 writer->min_size += size;
Victor Stinner00165072015-10-09 01:53:21 +02003945
3946 allocated = writer->allocated;
Victor Stinner53926a12015-10-09 12:37:03 +02003947 if (writer->min_size <= allocated)
Victor Stinner00165072015-10-09 01:53:21 +02003948 return str;
3949
Victor Stinner53926a12015-10-09 12:37:03 +02003950 allocated = writer->min_size;
Victor Stinner00165072015-10-09 01:53:21 +02003951 if (writer->overallocate
3952 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3953 /* overallocate to limit the number of realloc() */
3954 allocated += allocated / OVERALLOCATE_FACTOR;
3955 }
3956
3957 pos = _PyBytesWriter_GetPos(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003958 if (!writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003959 /* Note: Don't use a bytearray object because the conversion from
3960 byterray to bytes requires to copy all bytes. */
3961 if (_PyBytes_Resize(&writer->buffer, allocated)) {
3962 assert(writer->buffer == NULL);
3963 return NULL;
3964 }
3965 }
3966 else {
3967 /* convert from stack buffer to bytes object buffer */
3968 assert(writer->buffer == NULL);
3969
3970 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3971 if (writer->buffer == NULL)
3972 return NULL;
3973
3974 if (pos != 0) {
3975 Py_MEMCPY(PyBytes_AS_STRING(writer->buffer),
Victor Stinnerb3653a32015-10-09 03:38:24 +02003976 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003977 pos);
3978 }
3979
Victor Stinnerb3653a32015-10-09 03:38:24 +02003980 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003981#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003982 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003983#endif
Victor Stinner00165072015-10-09 01:53:21 +02003984 }
3985 writer->allocated = allocated;
3986
3987 str = _PyBytesWriter_AsString(writer) + pos;
3988 _PyBytesWriter_CheckConsistency(writer, str);
3989 return str;
3990}
3991
3992/* Allocate the buffer to write size bytes.
3993 Return the pointer to the beginning of buffer data.
3994 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003995void*
Victor Stinner00165072015-10-09 01:53:21 +02003996_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3997{
3998 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003999 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02004000 assert(size >= 0);
4001
Victor Stinnerb3653a32015-10-09 03:38:24 +02004002 writer->use_small_buffer = 1;
Victor Stinnerb13b97d2015-10-09 02:52:16 +02004003#ifdef Py_DEBUG
Victor Stinner00165072015-10-09 01:53:21 +02004004 /* the last byte is reserved, it must be '\0' */
Victor Stinnerb3653a32015-10-09 03:38:24 +02004005 writer->allocated = sizeof(writer->small_buffer) - 1;
4006 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02004007#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02004008 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02004009#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02004010 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02004011}
4012
4013PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004014_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02004015{
4016 Py_ssize_t pos;
4017 PyObject *result;
4018
4019 _PyBytesWriter_CheckConsistency(writer, str);
4020
4021 pos = _PyBytesWriter_GetPos(writer, str);
Victor Stinner6c2cdae2015-10-12 13:29:43 +02004022 if (pos == 0) {
4023 Py_CLEAR(writer->buffer);
4024 /* Get the empty byte string singleton */
4025 result = PyBytes_FromStringAndSize(NULL, 0);
4026 }
4027 else if (writer->use_small_buffer) {
4028 result = PyBytes_FromStringAndSize(writer->small_buffer, pos);
4029 }
4030 else {
4031 result = writer->buffer;
4032 writer->buffer = NULL;
4033
Victor Stinner00165072015-10-09 01:53:21 +02004034 if (pos != writer->allocated) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02004035 if (_PyBytes_Resize(&result, pos)) {
4036 assert(result == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02004037 return NULL;
4038 }
4039 }
Victor Stinner00165072015-10-09 01:53:21 +02004040 }
Victor Stinner00165072015-10-09 01:53:21 +02004041 return result;
4042}
Victor Stinnerce179bf2015-10-09 12:57:22 +02004043
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004044void*
4045_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *str,
4046 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02004047{
4048 str = _PyBytesWriter_Prepare(writer, str, size);
4049 if (str == NULL)
4050 return NULL;
4051
4052 Py_MEMCPY(str, bytes, size);
4053 str += size;
4054
4055 return str;
4056}