blob: fd46048520d17e353080538c440a4ea0365ed194 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +00008#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00009#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000010
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020011/*[clinic input]
Martin v. Löwis0efea322014-07-27 17:29:17 +020012class bytes "PyBytesObject*" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020013[clinic start generated code]*/
Martin v. Löwis0efea322014-07-27 17:29:17 +020014/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030016#include "clinic/bytesobject.c.h"
17
Christian Heimes2c9c7a52008-05-26 13:42:13 +000018#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000019Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000020#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000021
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022static PyBytesObject *characters[UCHAR_MAX + 1];
23static PyBytesObject *nullstring;
24
Mark Dickinsonfd24b322008-12-06 15:33:31 +000025/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
26 for a string of length n should request PyBytesObject_SIZE + n bytes.
27
28 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29 3 bytes per string allocation on a typical system.
30*/
31#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32
Christian Heimes2c9c7a52008-05-26 13:42:13 +000033/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000034 For PyBytes_FromString(), the parameter `str' points to a null-terminated
35 string containing exactly `size' bytes.
36
37 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
38 either NULL or else points to a string containing at least `size' bytes.
39 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
40 not have to be null-terminated. (Therefore it is safe to construct a
41 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
42 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
43 bytes (setting the last byte to the null terminating character) and you can
44 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000045 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000046 alter the data yourself, since the strings may be shared.
47
48 The PyObject member `op->ob_size', which denotes the number of "extra
49 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020050 allocated for string data, not counting the null terminating character.
51 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000052 PyBytes_FromStringAndSize()) or the length of the string in the `str'
53 parameter (for PyBytes_FromString()).
54*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020055static PyObject *
56_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000057{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020058 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020059 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020060
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000061 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000062#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000063 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000064#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 Py_INCREF(op);
66 return (PyObject *)op;
67 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000068
Victor Stinner049e5092014-08-17 22:20:00 +020069 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000070 PyErr_SetString(PyExc_OverflowError,
71 "byte string is too large");
72 return NULL;
73 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000074
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000075 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020076 if (use_calloc)
77 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
78 else
79 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000080 if (op == NULL)
81 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010082 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020084 if (!use_calloc)
85 op->ob_sval[size] = '\0';
86 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 if (size == 0) {
88 nullstring = op;
89 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020090 }
91 return (PyObject *) op;
92}
93
94PyObject *
95PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
96{
97 PyBytesObject *op;
98 if (size < 0) {
99 PyErr_SetString(PyExc_SystemError,
100 "Negative size passed to PyBytes_FromStringAndSize");
101 return NULL;
102 }
103 if (size == 1 && str != NULL &&
104 (op = characters[*str & UCHAR_MAX]) != NULL)
105 {
106#ifdef COUNT_ALLOCS
107 one_strings++;
108#endif
109 Py_INCREF(op);
110 return (PyObject *)op;
111 }
112
113 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
114 if (op == NULL)
115 return NULL;
116 if (str == NULL)
117 return (PyObject *) op;
118
119 Py_MEMCPY(op->ob_sval, str, size);
120 /* share short strings */
121 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 characters[*str & UCHAR_MAX] = op;
123 Py_INCREF(op);
124 }
125 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000126}
127
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000128PyObject *
129PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000130{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200131 size_t size;
132 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 assert(str != NULL);
135 size = strlen(str);
136 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
137 PyErr_SetString(PyExc_OverflowError,
138 "byte string is too long");
139 return NULL;
140 }
141 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000142#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000144#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 Py_INCREF(op);
146 return (PyObject *)op;
147 }
148 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000149#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000151#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 Py_INCREF(op);
153 return (PyObject *)op;
154 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 /* Inline PyObject_NewVar */
157 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
158 if (op == NULL)
159 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100160 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 op->ob_shash = -1;
162 Py_MEMCPY(op->ob_sval, str, size+1);
163 /* share short strings */
164 if (size == 0) {
165 nullstring = op;
166 Py_INCREF(op);
167 } else if (size == 1) {
168 characters[*str & UCHAR_MAX] = op;
169 Py_INCREF(op);
170 }
171 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000172}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000173
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000174PyObject *
175PyBytes_FromFormatV(const char *format, va_list vargs)
176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 va_list count;
178 Py_ssize_t n = 0;
179 const char* f;
180 char *s;
181 PyObject* string;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000182
Alexander Belopolskyf0f45142010-08-11 17:31:17 +0000183 Py_VA_COPY(count, vargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 /* step 1: figure out how large a buffer we need */
185 for (f = format; *f; f++) {
186 if (*f == '%') {
187 const char* p = f;
David Malcolm96960882010-11-05 17:23:41 +0000188 while (*++f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 ;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
192 * they don't affect the amount of space we reserve.
193 */
194 if ((*f == 'l' || *f == 'z') &&
195 (f[1] == 'd' || f[1] == 'u'))
196 ++f;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000197
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000198 switch (*f) {
199 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100200 {
201 int c = va_arg(count, int);
202 if (c < 0 || c > 255) {
203 PyErr_SetString(PyExc_OverflowError,
204 "PyBytes_FromFormatV(): %c format "
205 "expects an integer in range [0; 255]");
206 return NULL;
207 }
208 n++;
209 break;
210 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 case '%':
212 n++;
213 break;
214 case 'd': case 'u': case 'i': case 'x':
215 (void) va_arg(count, int);
216 /* 20 bytes is enough to hold a 64-bit
217 integer. Decimal takes the most space.
218 This isn't enough for octal. */
219 n += 20;
220 break;
221 case 's':
222 s = va_arg(count, char*);
223 n += strlen(s);
224 break;
225 case 'p':
226 (void) va_arg(count, int);
227 /* maximum 64-bit pointer representation:
228 * 0xffffffffffffffff
229 * so 19 characters is enough.
230 * XXX I count 18 -- what's the extra for?
231 */
232 n += 19;
233 break;
234 default:
235 /* if we stumble upon an unknown
236 formatting code, copy the rest of
237 the format string to the output
238 string. (we cannot just skip the
239 code, since there's no way to know
240 what's in the argument list) */
241 n += strlen(p);
242 goto expand;
243 }
244 } else
245 n++;
246 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000247 expand:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 /* step 2: fill the buffer */
249 /* Since we've analyzed how much space we need for the worst case,
250 use sprintf directly instead of the slower PyOS_snprintf. */
251 string = PyBytes_FromStringAndSize(NULL, n);
252 if (!string)
253 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000254
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 s = PyBytes_AsString(string);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000256
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000257 for (f = format; *f; f++) {
258 if (*f == '%') {
259 const char* p = f++;
260 Py_ssize_t i;
261 int longflag = 0;
262 int size_tflag = 0;
263 /* parse the width.precision part (we're only
264 interested in the precision value, if any) */
265 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000266 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000267 n = (n*10) + *f++ - '0';
268 if (*f == '.') {
269 f++;
270 n = 0;
David Malcolm96960882010-11-05 17:23:41 +0000271 while (Py_ISDIGIT(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 n = (n*10) + *f++ - '0';
273 }
David Malcolm96960882010-11-05 17:23:41 +0000274 while (*f && *f != '%' && !Py_ISALPHA(*f))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 f++;
276 /* handle the long flag, but only for %ld and %lu.
277 others can be added when necessary. */
278 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
279 longflag = 1;
280 ++f;
281 }
282 /* handle the size_t flag. */
283 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
284 size_tflag = 1;
285 ++f;
286 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000287
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000288 switch (*f) {
289 case 'c':
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100290 {
291 int c = va_arg(vargs, int);
292 /* c has been checked for overflow in the first step */
293 *s++ = (unsigned char)c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000294 break;
Victor Stinnerc9362cf2013-12-13 12:14:44 +0100295 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 case 'd':
297 if (longflag)
298 sprintf(s, "%ld", va_arg(vargs, long));
299 else if (size_tflag)
300 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
301 va_arg(vargs, Py_ssize_t));
302 else
303 sprintf(s, "%d", va_arg(vargs, int));
304 s += strlen(s);
305 break;
306 case 'u':
307 if (longflag)
308 sprintf(s, "%lu",
309 va_arg(vargs, unsigned long));
310 else if (size_tflag)
311 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
312 va_arg(vargs, size_t));
313 else
314 sprintf(s, "%u",
315 va_arg(vargs, unsigned int));
316 s += strlen(s);
317 break;
318 case 'i':
319 sprintf(s, "%i", va_arg(vargs, int));
320 s += strlen(s);
321 break;
322 case 'x':
323 sprintf(s, "%x", va_arg(vargs, int));
324 s += strlen(s);
325 break;
326 case 's':
327 p = va_arg(vargs, char*);
328 i = strlen(p);
329 if (n > 0 && i > n)
330 i = n;
331 Py_MEMCPY(s, p, i);
332 s += i;
333 break;
334 case 'p':
335 sprintf(s, "%p", va_arg(vargs, void*));
336 /* %p is ill-defined: ensure leading 0x. */
337 if (s[1] == 'X')
338 s[1] = 'x';
339 else if (s[1] != 'x') {
340 memmove(s+2, s, strlen(s)+1);
341 s[0] = '0';
342 s[1] = 'x';
343 }
344 s += strlen(s);
345 break;
346 case '%':
347 *s++ = '%';
348 break;
349 default:
350 strcpy(s, p);
351 s += strlen(s);
352 goto end;
353 }
354 } else
355 *s++ = *f;
356 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000357
358 end:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
360 return string;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000361}
362
363PyObject *
364PyBytes_FromFormat(const char *format, ...)
365{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 PyObject* ret;
367 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000368
369#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000373#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000374 ret = PyBytes_FromFormatV(format, vargs);
375 va_end(vargs);
376 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000377}
378
Ethan Furmanb95b5612015-01-23 20:05:18 -0800379/* Helpers for formatstring */
380
381Py_LOCAL_INLINE(PyObject *)
382getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
383{
384 Py_ssize_t argidx = *p_argidx;
385 if (argidx < arglen) {
386 (*p_argidx)++;
387 if (arglen < 0)
388 return args;
389 else
390 return PyTuple_GetItem(args, argidx);
391 }
392 PyErr_SetString(PyExc_TypeError,
393 "not enough arguments for format string");
394 return NULL;
395}
396
397/* Format codes
398 * F_LJUST '-'
399 * F_SIGN '+'
400 * F_BLANK ' '
401 * F_ALT '#'
402 * F_ZERO '0'
403 */
404#define F_LJUST (1<<0)
405#define F_SIGN (1<<1)
406#define F_BLANK (1<<2)
407#define F_ALT (1<<3)
408#define F_ZERO (1<<4)
409
410/* Returns a new reference to a PyBytes object, or NULL on failure. */
411
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200412static char*
413formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200414 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800415{
416 char *p;
417 PyObject *result;
418 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200419 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800420
421 x = PyFloat_AsDouble(v);
422 if (x == -1.0 && PyErr_Occurred()) {
423 PyErr_Format(PyExc_TypeError, "float argument required, "
424 "not %.200s", Py_TYPE(v)->tp_name);
425 return NULL;
426 }
427
428 if (prec < 0)
429 prec = 6;
430
431 p = PyOS_double_to_string(x, type, prec,
432 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
433
434 if (p == NULL)
435 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200436
437 len = strlen(p);
438 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200439 str = _PyBytesWriter_Prepare(writer, str, len);
440 if (str == NULL)
441 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200442 Py_MEMCPY(str, p, len);
443 str += len;
444 return str;
445 }
446
447 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800448 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200449 *p_result = result;
450 return str;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800451}
452
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300453static PyObject *
454formatlong(PyObject *v, int flags, int prec, int type)
455{
456 PyObject *result, *iobj;
457 if (type == 'i')
458 type = 'd';
459 if (PyLong_Check(v))
460 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
461 if (PyNumber_Check(v)) {
462 /* make sure number is a type of integer for o, x, and X */
463 if (type == 'o' || type == 'x' || type == 'X')
464 iobj = PyNumber_Index(v);
465 else
466 iobj = PyNumber_Long(v);
467 if (iobj == NULL) {
468 if (!PyErr_ExceptionMatches(PyExc_TypeError))
469 return NULL;
470 }
471 else if (!PyLong_Check(iobj))
472 Py_CLEAR(iobj);
473 if (iobj != NULL) {
474 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
475 Py_DECREF(iobj);
476 return result;
477 }
478 }
479 PyErr_Format(PyExc_TypeError,
480 "%%%c format: %s is required, not %.200s", type,
481 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
482 : "a number",
483 Py_TYPE(v)->tp_name);
484 return NULL;
485}
486
487static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200488byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800489{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200490 if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) {
491 *p = PyBytes_AS_STRING(arg)[0];
492 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800493 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200494 else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) {
495 *p = PyByteArray_AS_STRING(arg)[0];
496 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800497 }
498 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300499 PyObject *iobj;
500 long ival;
501 int overflow;
502 /* make sure number is a type of integer */
503 if (PyLong_Check(arg)) {
504 ival = PyLong_AsLongAndOverflow(arg, &overflow);
505 }
506 else {
507 iobj = PyNumber_Index(arg);
508 if (iobj == NULL) {
509 if (!PyErr_ExceptionMatches(PyExc_TypeError))
510 return 0;
511 goto onError;
512 }
513 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
514 Py_DECREF(iobj);
515 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300516 if (!overflow && ival == -1 && PyErr_Occurred())
517 goto onError;
518 if (overflow || !(0 <= ival && ival <= 255)) {
519 PyErr_SetString(PyExc_OverflowError,
520 "%c arg not in range(256)");
521 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800522 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300523 *p = (char)ival;
524 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800525 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300526 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200527 PyErr_SetString(PyExc_TypeError,
528 "%c requires an integer in range(256) or a single byte");
529 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800530}
531
532static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200533format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800534{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200535 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800536 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800537 /* is it a bytes object? */
538 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200539 *pbuf = PyBytes_AS_STRING(v);
540 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800541 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200542 return v;
543 }
544 if (PyByteArray_Check(v)) {
545 *pbuf = PyByteArray_AS_STRING(v);
546 *plen = PyByteArray_GET_SIZE(v);
547 Py_INCREF(v);
548 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800549 }
550 /* does it support __bytes__? */
551 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
552 if (func != NULL) {
553 result = PyObject_CallFunctionObjArgs(func, NULL);
554 Py_DECREF(func);
555 if (result == NULL)
556 return NULL;
557 if (!PyBytes_Check(result)) {
558 PyErr_Format(PyExc_TypeError,
559 "__bytes__ returned non-bytes (type %.200s)",
560 Py_TYPE(result)->tp_name);
561 Py_DECREF(result);
562 return NULL;
563 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200564 *pbuf = PyBytes_AS_STRING(result);
565 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800566 return result;
567 }
568 PyErr_Format(PyExc_TypeError,
569 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
570 Py_TYPE(v)->tp_name);
571 return NULL;
572}
573
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200574/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800575
576PyObject *
577_PyBytes_Format(PyObject *format, PyObject *args)
578{
579 char *fmt, *res;
580 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200581 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800582 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800583 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200584 _PyBytesWriter writer;
585
Ethan Furmanb95b5612015-01-23 20:05:18 -0800586 if (format == NULL || !PyBytes_Check(format) || args == NULL) {
587 PyErr_BadInternalCall();
588 return NULL;
589 }
590 fmt = PyBytes_AS_STRING(format);
591 fmtcnt = PyBytes_GET_SIZE(format);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200592
593 _PyBytesWriter_Init(&writer);
594
595 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
596 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800597 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200598 writer.overallocate = 1;
599
Ethan Furmanb95b5612015-01-23 20:05:18 -0800600 if (PyTuple_Check(args)) {
601 arglen = PyTuple_GET_SIZE(args);
602 argidx = 0;
603 }
604 else {
605 arglen = -1;
606 argidx = -2;
607 }
608 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
609 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
610 !PyByteArray_Check(args)) {
611 dict = args;
612 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200613
Ethan Furmanb95b5612015-01-23 20:05:18 -0800614 while (--fmtcnt >= 0) {
615 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200616 Py_ssize_t len;
617 char *pos;
618
619 pos = strchr(fmt + 1, '%');
620 if (pos != NULL)
621 len = pos - fmt;
622 else {
623 len = PyBytes_GET_SIZE(format);
624 len -= (fmt - PyBytes_AS_STRING(format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800625 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200626 assert(len != 0);
627
628 Py_MEMCPY(res, fmt, len);
629 res += len;
630 fmt += len;
631 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800632 }
633 else {
634 /* Got a format specifier */
635 int flags = 0;
636 Py_ssize_t width = -1;
637 int prec = -1;
638 int c = '\0';
639 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800640 PyObject *v = NULL;
641 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200642 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800643 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200644 Py_ssize_t len = 0;
645 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200646 Py_ssize_t alloc;
647#ifdef Py_DEBUG
648 char *before;
649#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -0800650
Ethan Furmanb95b5612015-01-23 20:05:18 -0800651 fmt++;
652 if (*fmt == '(') {
653 char *keystart;
654 Py_ssize_t keylen;
655 PyObject *key;
656 int pcount = 1;
657
658 if (dict == NULL) {
659 PyErr_SetString(PyExc_TypeError,
660 "format requires a mapping");
661 goto error;
662 }
663 ++fmt;
664 --fmtcnt;
665 keystart = fmt;
666 /* Skip over balanced parentheses */
667 while (pcount > 0 && --fmtcnt >= 0) {
668 if (*fmt == ')')
669 --pcount;
670 else if (*fmt == '(')
671 ++pcount;
672 fmt++;
673 }
674 keylen = fmt - keystart - 1;
675 if (fmtcnt < 0 || pcount > 0) {
676 PyErr_SetString(PyExc_ValueError,
677 "incomplete format key");
678 goto error;
679 }
680 key = PyBytes_FromStringAndSize(keystart,
681 keylen);
682 if (key == NULL)
683 goto error;
684 if (args_owned) {
685 Py_DECREF(args);
686 args_owned = 0;
687 }
688 args = PyObject_GetItem(dict, key);
689 Py_DECREF(key);
690 if (args == NULL) {
691 goto error;
692 }
693 args_owned = 1;
694 arglen = -1;
695 argidx = -2;
696 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200697
698 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800699 while (--fmtcnt >= 0) {
700 switch (c = *fmt++) {
701 case '-': flags |= F_LJUST; continue;
702 case '+': flags |= F_SIGN; continue;
703 case ' ': flags |= F_BLANK; continue;
704 case '#': flags |= F_ALT; continue;
705 case '0': flags |= F_ZERO; continue;
706 }
707 break;
708 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200709
710 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800711 if (c == '*') {
712 v = getnextarg(args, arglen, &argidx);
713 if (v == NULL)
714 goto error;
715 if (!PyLong_Check(v)) {
716 PyErr_SetString(PyExc_TypeError,
717 "* wants int");
718 goto error;
719 }
720 width = PyLong_AsSsize_t(v);
721 if (width == -1 && PyErr_Occurred())
722 goto error;
723 if (width < 0) {
724 flags |= F_LJUST;
725 width = -width;
726 }
727 if (--fmtcnt >= 0)
728 c = *fmt++;
729 }
730 else if (c >= 0 && isdigit(c)) {
731 width = c - '0';
732 while (--fmtcnt >= 0) {
733 c = Py_CHARMASK(*fmt++);
734 if (!isdigit(c))
735 break;
736 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
737 PyErr_SetString(
738 PyExc_ValueError,
739 "width too big");
740 goto error;
741 }
742 width = width*10 + (c - '0');
743 }
744 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200745
746 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800747 if (c == '.') {
748 prec = 0;
749 if (--fmtcnt >= 0)
750 c = *fmt++;
751 if (c == '*') {
752 v = getnextarg(args, arglen, &argidx);
753 if (v == NULL)
754 goto error;
755 if (!PyLong_Check(v)) {
756 PyErr_SetString(
757 PyExc_TypeError,
758 "* wants int");
759 goto error;
760 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200761 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800762 if (prec == -1 && PyErr_Occurred())
763 goto error;
764 if (prec < 0)
765 prec = 0;
766 if (--fmtcnt >= 0)
767 c = *fmt++;
768 }
769 else if (c >= 0 && isdigit(c)) {
770 prec = c - '0';
771 while (--fmtcnt >= 0) {
772 c = Py_CHARMASK(*fmt++);
773 if (!isdigit(c))
774 break;
775 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
776 PyErr_SetString(
777 PyExc_ValueError,
778 "prec too big");
779 goto error;
780 }
781 prec = prec*10 + (c - '0');
782 }
783 }
784 } /* prec */
785 if (fmtcnt >= 0) {
786 if (c == 'h' || c == 'l' || c == 'L') {
787 if (--fmtcnt >= 0)
788 c = *fmt++;
789 }
790 }
791 if (fmtcnt < 0) {
792 PyErr_SetString(PyExc_ValueError,
793 "incomplete format");
794 goto error;
795 }
796 if (c != '%') {
797 v = getnextarg(args, arglen, &argidx);
798 if (v == NULL)
799 goto error;
800 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200801
802 if (fmtcnt < 0) {
803 /* last writer: disable writer overallocation */
804 writer.overallocate = 0;
805 }
806
Ethan Furmanb95b5612015-01-23 20:05:18 -0800807 sign = 0;
808 fill = ' ';
809 switch (c) {
810 case '%':
811 pbuf = "%";
812 len = 1;
813 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200814
Ethan Furman62e977f2015-03-11 08:17:00 -0700815 case 'r':
816 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800817 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200818 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800819 if (temp == NULL)
820 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200821 assert(PyUnicode_IS_ASCII(temp));
822 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
823 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800824 if (prec >= 0 && len > prec)
825 len = prec;
826 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200827
Ethan Furmanb95b5612015-01-23 20:05:18 -0800828 case 's':
829 // %s is only for 2/3 code; 3 only code should use %b
830 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200831 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800832 if (temp == NULL)
833 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800834 if (prec >= 0 && len > prec)
835 len = prec;
836 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200837
Ethan Furmanb95b5612015-01-23 20:05:18 -0800838 case 'i':
839 case 'd':
840 case 'u':
841 case 'o':
842 case 'x':
843 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200844 if (PyLong_CheckExact(v)
845 && width == -1 && prec == -1
846 && !(flags & (F_SIGN | F_BLANK))
847 && c != 'X')
848 {
849 /* Fast path */
850 int alternate = flags & F_ALT;
851 int base;
852
853 switch(c)
854 {
855 default:
856 assert(0 && "'type' not in [diuoxX]");
857 case 'd':
858 case 'i':
859 case 'u':
860 base = 10;
861 break;
862 case 'o':
863 base = 8;
864 break;
865 case 'x':
866 case 'X':
867 base = 16;
868 break;
869 }
870
871 /* Fast path */
872 writer.min_size -= 2; /* size preallocated by "%d" */
873 res = _PyLong_FormatBytesWriter(&writer, res,
874 v, base, alternate);
875 if (res == NULL)
876 goto error;
877 continue;
878 }
879
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300880 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200881 if (!temp)
882 goto error;
883 assert(PyUnicode_IS_ASCII(temp));
884 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
885 len = PyUnicode_GET_LENGTH(temp);
886 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800887 if (flags & F_ZERO)
888 fill = '0';
889 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200890
Ethan Furmanb95b5612015-01-23 20:05:18 -0800891 case 'e':
892 case 'E':
893 case 'f':
894 case 'F':
895 case 'g':
896 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200897 if (width == -1 && prec == -1
898 && !(flags & (F_SIGN | F_BLANK)))
899 {
900 /* Fast path */
Victor Stinnerad771582015-10-09 12:38:53 +0200901 writer.min_size -= 2; /* size preallocated by "%f" */
902 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200903 if (res == NULL)
904 goto error;
905 continue;
906 }
907
Victor Stinnerad771582015-10-09 12:38:53 +0200908 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800909 goto error;
910 pbuf = PyBytes_AS_STRING(temp);
911 len = PyBytes_GET_SIZE(temp);
912 sign = 1;
913 if (flags & F_ZERO)
914 fill = '0';
915 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200916
Ethan Furmanb95b5612015-01-23 20:05:18 -0800917 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200918 pbuf = &onechar;
919 len = byte_converter(v, &onechar);
920 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800921 goto error;
922 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200923
Ethan Furmanb95b5612015-01-23 20:05:18 -0800924 default:
925 PyErr_Format(PyExc_ValueError,
926 "unsupported format character '%c' (0x%x) "
927 "at index %zd",
928 c, c,
929 (Py_ssize_t)(fmt - 1 -
930 PyBytes_AsString(format)));
931 goto error;
932 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200933
Ethan Furmanb95b5612015-01-23 20:05:18 -0800934 if (sign) {
935 if (*pbuf == '-' || *pbuf == '+') {
936 sign = *pbuf++;
937 len--;
938 }
939 else if (flags & F_SIGN)
940 sign = '+';
941 else if (flags & F_BLANK)
942 sign = ' ';
943 else
944 sign = 0;
945 }
946 if (width < len)
947 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200948
949 alloc = width;
950 if (sign != 0 && len == width)
951 alloc++;
952 if (alloc > 1) {
953 res = _PyBytesWriter_Prepare(&writer, res, alloc - 1);
954 if (res == NULL)
955 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800956 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200957#ifdef Py_DEBUG
958 before = res;
959#endif
960
961 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800962 if (sign) {
963 if (fill != ' ')
964 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800965 if (width > len)
966 width--;
967 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200968
969 /* Write the numeric prefix for "x", "X" and "o" formats
970 if the alternate form is used.
971 For example, write "0x" for the "%#x" format. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800972 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
973 assert(pbuf[0] == '0');
974 assert(pbuf[1] == c);
975 if (fill != ' ') {
976 *res++ = *pbuf++;
977 *res++ = *pbuf++;
978 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800979 width -= 2;
980 if (width < 0)
981 width = 0;
982 len -= 2;
983 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200984
985 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800986 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200987 memset(res, fill, width - len);
988 res += (width - len);
989 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800990 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200991
992 /* If padding with spaces: write sign if needed and/or numeric
993 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800994 if (fill == ' ') {
995 if (sign)
996 *res++ = sign;
997 if ((flags & F_ALT) &&
998 (c == 'x' || c == 'X')) {
999 assert(pbuf[0] == '0');
1000 assert(pbuf[1] == c);
1001 *res++ = *pbuf++;
1002 *res++ = *pbuf++;
1003 }
1004 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001005
1006 /* Copy bytes */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001007 Py_MEMCPY(res, pbuf, len);
1008 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001009
1010 /* Pad right with the fill character if needed */
1011 if (width > len) {
1012 memset(res, ' ', width - len);
1013 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001014 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001015
Ethan Furmanb95b5612015-01-23 20:05:18 -08001016 if (dict && (argidx < arglen) && c != '%') {
1017 PyErr_SetString(PyExc_TypeError,
1018 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001019 Py_XDECREF(temp);
1020 goto error;
1021 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001022 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001023
1024#ifdef Py_DEBUG
1025 /* check that we computed the exact size for this write */
1026 assert((res - before) == alloc);
1027#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001028 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001029
1030 /* If overallocation was disabled, ensure that it was the last
1031 write. Otherwise, we missed an optimization */
1032 assert(writer.overallocate || fmtcnt < 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001033 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001034
Ethan Furmanb95b5612015-01-23 20:05:18 -08001035 if (argidx < arglen && !dict) {
1036 PyErr_SetString(PyExc_TypeError,
1037 "not all arguments converted during bytes formatting");
1038 goto error;
1039 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001040
Ethan Furmanb95b5612015-01-23 20:05:18 -08001041 if (args_owned) {
1042 Py_DECREF(args);
1043 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001044 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001045
1046 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001047 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001048 if (args_owned) {
1049 Py_DECREF(args);
1050 }
1051 return NULL;
1052}
1053
1054/* =-= */
1055
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001056static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001057bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001058{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001059 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001060}
1061
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001062/* Unescape a backslash-escaped string. If unicode is non-zero,
1063 the string is a u-literal. If recode_encoding is non-zero,
1064 the string is UTF-8 encoded and should be re-encoded in the
1065 specified encoding. */
1066
1067PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001068 Py_ssize_t len,
1069 const char *errors,
1070 Py_ssize_t unicode,
1071 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001072{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001073 int c;
1074 char *p, *buf;
1075 const char *end;
1076 PyObject *v;
1077 Py_ssize_t newlen = recode_encoding ? 4*len:len;
1078 v = PyBytes_FromStringAndSize((char *)NULL, newlen);
1079 if (v == NULL)
1080 return NULL;
1081 p = buf = PyBytes_AsString(v);
1082 end = s + len;
1083 while (s < end) {
1084 if (*s != '\\') {
1085 non_esc:
1086 if (recode_encoding && (*s & 0x80)) {
1087 PyObject *u, *w;
1088 char *r;
1089 const char* t;
1090 Py_ssize_t rn;
1091 t = s;
1092 /* Decode non-ASCII bytes as UTF-8. */
1093 while (t < end && (*t & 0x80)) t++;
1094 u = PyUnicode_DecodeUTF8(s, t - s, errors);
1095 if(!u) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 /* Recode them in target encoding. */
1098 w = PyUnicode_AsEncodedString(
1099 u, recode_encoding, errors);
1100 Py_DECREF(u);
1101 if (!w) goto failed;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001102
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001103 /* Append bytes to output buffer. */
1104 assert(PyBytes_Check(w));
1105 r = PyBytes_AS_STRING(w);
1106 rn = PyBytes_GET_SIZE(w);
1107 Py_MEMCPY(p, r, rn);
1108 p += rn;
1109 Py_DECREF(w);
1110 s = t;
1111 } else {
1112 *p++ = *s++;
1113 }
1114 continue;
1115 }
1116 s++;
1117 if (s==end) {
1118 PyErr_SetString(PyExc_ValueError,
1119 "Trailing \\ in string");
1120 goto failed;
1121 }
1122 switch (*s++) {
1123 /* XXX This assumes ASCII! */
1124 case '\n': break;
1125 case '\\': *p++ = '\\'; break;
1126 case '\'': *p++ = '\''; break;
1127 case '\"': *p++ = '\"'; break;
1128 case 'b': *p++ = '\b'; break;
1129 case 'f': *p++ = '\014'; break; /* FF */
1130 case 't': *p++ = '\t'; break;
1131 case 'n': *p++ = '\n'; break;
1132 case 'r': *p++ = '\r'; break;
1133 case 'v': *p++ = '\013'; break; /* VT */
1134 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1135 case '0': case '1': case '2': case '3':
1136 case '4': case '5': case '6': case '7':
1137 c = s[-1] - '0';
1138 if (s < end && '0' <= *s && *s <= '7') {
1139 c = (c<<3) + *s++ - '0';
1140 if (s < end && '0' <= *s && *s <= '7')
1141 c = (c<<3) + *s++ - '0';
1142 }
1143 *p++ = c;
1144 break;
1145 case 'x':
David Malcolm96960882010-11-05 17:23:41 +00001146 if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001147 unsigned int x = 0;
1148 c = Py_CHARMASK(*s);
1149 s++;
David Malcolm96960882010-11-05 17:23:41 +00001150 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001151 x = c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001152 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001153 x = 10 + c - 'a';
1154 else
1155 x = 10 + c - 'A';
1156 x = x << 4;
1157 c = Py_CHARMASK(*s);
1158 s++;
David Malcolm96960882010-11-05 17:23:41 +00001159 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001160 x += c - '0';
David Malcolm96960882010-11-05 17:23:41 +00001161 else if (Py_ISLOWER(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001162 x += 10 + c - 'a';
1163 else
1164 x += 10 + c - 'A';
1165 *p++ = x;
1166 break;
1167 }
1168 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001169 PyErr_Format(PyExc_ValueError,
1170 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001171 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001172 goto failed;
1173 }
1174 if (strcmp(errors, "replace") == 0) {
1175 *p++ = '?';
1176 } else if (strcmp(errors, "ignore") == 0)
1177 /* do nothing */;
1178 else {
1179 PyErr_Format(PyExc_ValueError,
1180 "decoding error; unknown "
1181 "error handling code: %.400s",
1182 errors);
1183 goto failed;
1184 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001185 /* skip \x */
1186 if (s < end && Py_ISXDIGIT(s[0]))
1187 s++; /* and a hexdigit */
1188 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001189 default:
1190 *p++ = '\\';
1191 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001192 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001193 UTF-8 bytes may follow. */
1194 }
1195 }
1196 if (p-buf < newlen)
1197 _PyBytes_Resize(&v, p - buf);
1198 return v;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001199 failed:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001200 Py_DECREF(v);
1201 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001202}
1203
1204/* -------------------------------------------------------------------- */
1205/* object api */
1206
1207Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001208PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001209{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001210 if (!PyBytes_Check(op)) {
1211 PyErr_Format(PyExc_TypeError,
1212 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1213 return -1;
1214 }
1215 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001216}
1217
1218char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001219PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001220{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001221 if (!PyBytes_Check(op)) {
1222 PyErr_Format(PyExc_TypeError,
1223 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1224 return NULL;
1225 }
1226 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001227}
1228
1229int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001230PyBytes_AsStringAndSize(PyObject *obj,
1231 char **s,
1232 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001233{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001234 if (s == NULL) {
1235 PyErr_BadInternalCall();
1236 return -1;
1237 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001238
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001239 if (!PyBytes_Check(obj)) {
1240 PyErr_Format(PyExc_TypeError,
1241 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1242 return -1;
1243 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001244
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001245 *s = PyBytes_AS_STRING(obj);
1246 if (len != NULL)
1247 *len = PyBytes_GET_SIZE(obj);
1248 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001249 PyErr_SetString(PyExc_ValueError,
1250 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001251 return -1;
1252 }
1253 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001254}
Neal Norwitz6968b052007-02-27 19:02:19 +00001255
1256/* -------------------------------------------------------------------- */
1257/* Methods */
1258
Eric Smith0923d1d2009-04-16 20:16:10 +00001259#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001260
1261#include "stringlib/fastsearch.h"
1262#include "stringlib/count.h"
1263#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001264#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001265#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001266#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001267#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001268
Eric Smith0f78bff2009-11-30 01:01:42 +00001269#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001270
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001271PyObject *
1272PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001273{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001274 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001275 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001276 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001277 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001278 unsigned char quote, *s, *p;
1279
1280 /* Compute size of output string */
1281 squotes = dquotes = 0;
1282 newsize = 3; /* b'' */
1283 s = (unsigned char*)op->ob_sval;
1284 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001285 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001286 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001287 case '\'': squotes++; break;
1288 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001289 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001290 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001291 default:
1292 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001293 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001294 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001295 if (newsize > PY_SSIZE_T_MAX - incr)
1296 goto overflow;
1297 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001298 }
1299 quote = '\'';
1300 if (smartquotes && squotes && !dquotes)
1301 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001302 if (squotes && quote == '\'') {
1303 if (newsize > PY_SSIZE_T_MAX - squotes)
1304 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001305 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001306 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001307
1308 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001309 if (v == NULL) {
1310 return NULL;
1311 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001312 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001313
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001314 *p++ = 'b', *p++ = quote;
1315 for (i = 0; i < length; i++) {
1316 unsigned char c = op->ob_sval[i];
1317 if (c == quote || c == '\\')
1318 *p++ = '\\', *p++ = c;
1319 else if (c == '\t')
1320 *p++ = '\\', *p++ = 't';
1321 else if (c == '\n')
1322 *p++ = '\\', *p++ = 'n';
1323 else if (c == '\r')
1324 *p++ = '\\', *p++ = 'r';
1325 else if (c < ' ' || c >= 0x7f) {
1326 *p++ = '\\';
1327 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001328 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1329 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001330 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001331 else
1332 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001333 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001334 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001335 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001336 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001337
1338 overflow:
1339 PyErr_SetString(PyExc_OverflowError,
1340 "bytes object is too large to make repr");
1341 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001342}
1343
Neal Norwitz6968b052007-02-27 19:02:19 +00001344static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001345bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001346{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001347 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001348}
1349
Neal Norwitz6968b052007-02-27 19:02:19 +00001350static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001351bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001352{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001353 if (Py_BytesWarningFlag) {
1354 if (PyErr_WarnEx(PyExc_BytesWarning,
1355 "str() on a bytes instance", 1))
1356 return NULL;
1357 }
1358 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001359}
1360
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001361static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001362bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001363{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001364 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001365}
Neal Norwitz6968b052007-02-27 19:02:19 +00001366
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001367/* This is also used by PyBytes_Concat() */
1368static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001369bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001370{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001371 Py_ssize_t size;
1372 Py_buffer va, vb;
1373 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001374
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001375 va.len = -1;
1376 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001377 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1378 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001379 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1380 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1381 goto done;
1382 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001383
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001384 /* Optimize end cases */
1385 if (va.len == 0 && PyBytes_CheckExact(b)) {
1386 result = b;
1387 Py_INCREF(result);
1388 goto done;
1389 }
1390 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1391 result = a;
1392 Py_INCREF(result);
1393 goto done;
1394 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001396 size = va.len + vb.len;
1397 if (size < 0) {
1398 PyErr_NoMemory();
1399 goto done;
1400 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001401
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001402 result = PyBytes_FromStringAndSize(NULL, size);
1403 if (result != NULL) {
1404 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1405 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1406 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001407
1408 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001409 if (va.len != -1)
1410 PyBuffer_Release(&va);
1411 if (vb.len != -1)
1412 PyBuffer_Release(&vb);
1413 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001414}
Neal Norwitz6968b052007-02-27 19:02:19 +00001415
1416static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001417bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001418{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001419 Py_ssize_t i;
1420 Py_ssize_t j;
1421 Py_ssize_t size;
1422 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001423 size_t nbytes;
1424 if (n < 0)
1425 n = 0;
1426 /* watch out for overflows: the size can overflow int,
1427 * and the # of bytes needed can overflow size_t
1428 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001429 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 PyErr_SetString(PyExc_OverflowError,
1431 "repeated bytes are too long");
1432 return NULL;
1433 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001434 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001435 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1436 Py_INCREF(a);
1437 return (PyObject *)a;
1438 }
1439 nbytes = (size_t)size;
1440 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1441 PyErr_SetString(PyExc_OverflowError,
1442 "repeated bytes are too long");
1443 return NULL;
1444 }
1445 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1446 if (op == NULL)
1447 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001448 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001449 op->ob_shash = -1;
1450 op->ob_sval[size] = '\0';
1451 if (Py_SIZE(a) == 1 && n > 0) {
1452 memset(op->ob_sval, a->ob_sval[0] , n);
1453 return (PyObject *) op;
1454 }
1455 i = 0;
1456 if (i < size) {
1457 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1458 i = Py_SIZE(a);
1459 }
1460 while (i < size) {
1461 j = (i <= size-i) ? i : size-i;
1462 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1463 i += j;
1464 }
1465 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001466}
1467
Guido van Rossum98297ee2007-11-06 21:34:58 +00001468static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001469bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001470{
1471 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1472 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001473 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +00001474 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +00001475 PyErr_Clear();
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001476 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
Antoine Pitroud1188562010-06-09 16:38:55 +00001477 return -1;
1478 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
1479 varg.buf, varg.len, 0);
1480 PyBuffer_Release(&varg);
1481 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001482 }
1483 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001484 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1485 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001486 }
1487
Antoine Pitrou0010d372010-08-15 17:12:55 +00001488 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001489}
1490
Neal Norwitz6968b052007-02-27 19:02:19 +00001491static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001492bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001493{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001494 if (i < 0 || i >= Py_SIZE(a)) {
1495 PyErr_SetString(PyExc_IndexError, "index out of range");
1496 return NULL;
1497 }
1498 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001499}
1500
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001501Py_LOCAL(int)
1502bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1503{
1504 int cmp;
1505 Py_ssize_t len;
1506
1507 len = Py_SIZE(a);
1508 if (Py_SIZE(b) != len)
1509 return 0;
1510
1511 if (a->ob_sval[0] != b->ob_sval[0])
1512 return 0;
1513
1514 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1515 return (cmp == 0);
1516}
1517
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001518static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001519bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001520{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001521 int c;
1522 Py_ssize_t len_a, len_b;
1523 Py_ssize_t min_len;
1524 PyObject *result;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001525 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001526
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001527 /* Make sure both arguments are strings. */
1528 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001529 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001530 rc = PyObject_IsInstance((PyObject*)a,
1531 (PyObject*)&PyUnicode_Type);
1532 if (!rc)
1533 rc = PyObject_IsInstance((PyObject*)b,
1534 (PyObject*)&PyUnicode_Type);
1535 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001536 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001537 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001538 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001539 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001540 return NULL;
1541 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001542 else {
1543 rc = PyObject_IsInstance((PyObject*)a,
1544 (PyObject*)&PyLong_Type);
1545 if (!rc)
1546 rc = PyObject_IsInstance((PyObject*)b,
1547 (PyObject*)&PyLong_Type);
1548 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001549 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001550 if (rc) {
1551 if (PyErr_WarnEx(PyExc_BytesWarning,
1552 "Comparison between bytes and int", 1))
1553 return NULL;
1554 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001555 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001556 }
1557 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001558 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001559 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001560 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001561 case Py_EQ:
1562 case Py_LE:
1563 case Py_GE:
1564 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001565 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001566 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001567 case Py_NE:
1568 case Py_LT:
1569 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001570 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001571 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001572 default:
1573 PyErr_BadArgument();
1574 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001575 }
1576 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001577 else if (op == Py_EQ || op == Py_NE) {
1578 int eq = bytes_compare_eq(a, b);
1579 eq ^= (op == Py_NE);
1580 result = eq ? Py_True : Py_False;
1581 }
1582 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001583 len_a = Py_SIZE(a);
1584 len_b = Py_SIZE(b);
1585 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001586 if (min_len > 0) {
1587 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001588 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001589 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001590 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001591 else
1592 c = 0;
1593 if (c == 0)
1594 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1595 switch (op) {
1596 case Py_LT: c = c < 0; break;
1597 case Py_LE: c = c <= 0; break;
1598 case Py_GT: c = c > 0; break;
1599 case Py_GE: c = c >= 0; break;
1600 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001601 PyErr_BadArgument();
1602 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001603 }
1604 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001605 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001606
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001607 Py_INCREF(result);
1608 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001609}
1610
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001611static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001612bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001613{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001614 if (a->ob_shash == -1) {
1615 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001616 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001617 }
1618 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001619}
1620
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001621static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001622bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001623{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001624 if (PyIndex_Check(item)) {
1625 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1626 if (i == -1 && PyErr_Occurred())
1627 return NULL;
1628 if (i < 0)
1629 i += PyBytes_GET_SIZE(self);
1630 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1631 PyErr_SetString(PyExc_IndexError,
1632 "index out of range");
1633 return NULL;
1634 }
1635 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1636 }
1637 else if (PySlice_Check(item)) {
1638 Py_ssize_t start, stop, step, slicelength, cur, i;
1639 char* source_buf;
1640 char* result_buf;
1641 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001642
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001643 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001644 PyBytes_GET_SIZE(self),
1645 &start, &stop, &step, &slicelength) < 0) {
1646 return NULL;
1647 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001648
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001649 if (slicelength <= 0) {
1650 return PyBytes_FromStringAndSize("", 0);
1651 }
1652 else if (start == 0 && step == 1 &&
1653 slicelength == PyBytes_GET_SIZE(self) &&
1654 PyBytes_CheckExact(self)) {
1655 Py_INCREF(self);
1656 return (PyObject *)self;
1657 }
1658 else if (step == 1) {
1659 return PyBytes_FromStringAndSize(
1660 PyBytes_AS_STRING(self) + start,
1661 slicelength);
1662 }
1663 else {
1664 source_buf = PyBytes_AS_STRING(self);
1665 result = PyBytes_FromStringAndSize(NULL, slicelength);
1666 if (result == NULL)
1667 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001668
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001669 result_buf = PyBytes_AS_STRING(result);
1670 for (cur = start, i = 0; i < slicelength;
1671 cur += step, i++) {
1672 result_buf[i] = source_buf[cur];
1673 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001674
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001675 return result;
1676 }
1677 }
1678 else {
1679 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001680 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001681 Py_TYPE(item)->tp_name);
1682 return NULL;
1683 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001684}
1685
1686static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001687bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001688{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001689 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1690 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001691}
1692
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001693static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001694 (lenfunc)bytes_length, /*sq_length*/
1695 (binaryfunc)bytes_concat, /*sq_concat*/
1696 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1697 (ssizeargfunc)bytes_item, /*sq_item*/
1698 0, /*sq_slice*/
1699 0, /*sq_ass_item*/
1700 0, /*sq_ass_slice*/
1701 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001702};
1703
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001704static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001705 (lenfunc)bytes_length,
1706 (binaryfunc)bytes_subscript,
1707 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001708};
1709
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001710static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001711 (getbufferproc)bytes_buffer_getbuffer,
1712 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001713};
1714
1715
1716#define LEFTSTRIP 0
1717#define RIGHTSTRIP 1
1718#define BOTHSTRIP 2
1719
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001720/*[clinic input]
1721bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001722
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001723 sep: object = None
1724 The delimiter according which to split the bytes.
1725 None (the default value) means split on ASCII whitespace characters
1726 (space, tab, return, newline, formfeed, vertical tab).
1727 maxsplit: Py_ssize_t = -1
1728 Maximum number of splits to do.
1729 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001730
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001731Return a list of the sections in the bytes, using sep as the delimiter.
1732[clinic start generated code]*/
1733
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001734static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001735bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001736/*[clinic end generated code: output=8bde44dacb36ef2e input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001737{
1738 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001739 const char *s = PyBytes_AS_STRING(self), *sub;
1740 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001741 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001742
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001743 if (maxsplit < 0)
1744 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001745 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001746 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001747 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001748 return NULL;
1749 sub = vsub.buf;
1750 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001751
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001752 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1753 PyBuffer_Release(&vsub);
1754 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001755}
1756
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001757/*[clinic input]
1758bytes.partition
1759
1760 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001761 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001762 /
1763
1764Partition the bytes into three parts using the given separator.
1765
1766This will search for the separator sep in the bytes. If the separator is found,
1767returns a 3-tuple containing the part before the separator, the separator
1768itself, and the part after it.
1769
1770If the separator is not found, returns a 3-tuple containing the original bytes
1771object and two empty bytes objects.
1772[clinic start generated code]*/
1773
Neal Norwitz6968b052007-02-27 19:02:19 +00001774static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001775bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001776/*[clinic end generated code: output=f532b392a17ff695 input=bc855dc63ca949de]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001777{
Neal Norwitz6968b052007-02-27 19:02:19 +00001778 return stringlib_partition(
1779 (PyObject*) self,
1780 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001781 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001782 );
1783}
1784
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001785/*[clinic input]
1786bytes.rpartition
1787
1788 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001789 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001790 /
1791
1792Partition the bytes into three parts using the given separator.
1793
1794This will search for the separator sep in the bytes, starting and the end. If
1795the separator is found, returns a 3-tuple containing the part before the
1796separator, the separator itself, and the part after it.
1797
1798If the separator is not found, returns a 3-tuple containing two empty bytes
1799objects and the original bytes object.
1800[clinic start generated code]*/
1801
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001802static PyObject *
1803bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001804/*[clinic end generated code: output=191b114cbb028e50 input=6588fff262a9170e]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001805{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001806 return stringlib_rpartition(
1807 (PyObject*) self,
1808 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001809 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001810 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001811}
1812
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001813/*[clinic input]
1814bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001815
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001816Return a list of the sections in the bytes, using sep as the delimiter.
1817
1818Splitting is done starting at the end of the bytes and working to the front.
1819[clinic start generated code]*/
1820
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001821static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001822bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001823/*[clinic end generated code: output=0b6570b977911d88 input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001824{
1825 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001826 const char *s = PyBytes_AS_STRING(self), *sub;
1827 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001828 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001829
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001830 if (maxsplit < 0)
1831 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001832 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001833 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001834 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001835 return NULL;
1836 sub = vsub.buf;
1837 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001838
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001839 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1840 PyBuffer_Release(&vsub);
1841 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001842}
1843
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001844
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001845/*[clinic input]
1846bytes.join
1847
1848 iterable_of_bytes: object
1849 /
1850
1851Concatenate any number of bytes objects.
1852
1853The bytes whose method is called is inserted in between each pair.
1854
1855The result is returned as a new bytes object.
1856
1857Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1858[clinic start generated code]*/
1859
Neal Norwitz6968b052007-02-27 19:02:19 +00001860static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001861bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001862/*[clinic end generated code: output=634aff14764ff997 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001863{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001864 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001865}
1866
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001867PyObject *
1868_PyBytes_Join(PyObject *sep, PyObject *x)
1869{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001870 assert(sep != NULL && PyBytes_Check(sep));
1871 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001872 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001873}
1874
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001875/* helper macro to fixup start/end slice values */
1876#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001877 if (end > len) \
1878 end = len; \
1879 else if (end < 0) { \
1880 end += len; \
1881 if (end < 0) \
1882 end = 0; \
1883 } \
1884 if (start < 0) { \
1885 start += len; \
1886 if (start < 0) \
1887 start = 0; \
1888 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001889
1890Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001891bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001892{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001893 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001894 char byte;
1895 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001896 const char *sub;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001897 Py_ssize_t len, sub_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001898 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001899 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001900
Antoine Pitrouac65d962011-10-20 23:54:17 +02001901 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1902 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001903 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001904
Antoine Pitrouac65d962011-10-20 23:54:17 +02001905 if (subobj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001906 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001907 return -2;
1908
1909 sub = subbuf.buf;
1910 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001911 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001912 else {
1913 sub = &byte;
1914 sub_len = 1;
1915 }
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001916 len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001917
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001918 ADJUST_INDICES(start, end, len);
1919 if (end - start < sub_len)
1920 res = -1;
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001921 else if (sub_len == 1
1922#ifndef HAVE_MEMRCHR
1923 && dir > 0
1924#endif
1925 ) {
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001926 unsigned char needle = *sub;
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001927 int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001928 res = stringlib_fastsearch_memchr_1char(
1929 PyBytes_AS_STRING(self) + start, end - start,
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001930 needle, needle, mode);
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001931 if (res >= 0)
1932 res += start;
1933 }
1934 else {
1935 if (dir > 0)
1936 res = stringlib_find_slice(
1937 PyBytes_AS_STRING(self), len,
1938 sub, sub_len, start, end);
1939 else
1940 res = stringlib_rfind_slice(
1941 PyBytes_AS_STRING(self), len,
1942 sub, sub_len, start, end);
1943 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001944
1945 if (subobj)
1946 PyBuffer_Release(&subbuf);
1947
1948 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001949}
1950
1951
1952PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001953"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001954\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001955Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001956such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001957arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001958\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001959Return -1 on failure.");
1960
Neal Norwitz6968b052007-02-27 19:02:19 +00001961static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001962bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001963{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001964 Py_ssize_t result = bytes_find_internal(self, args, +1);
1965 if (result == -2)
1966 return NULL;
1967 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001968}
1969
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001970
1971PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001972"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001973\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001974Like B.find() but raise ValueError when the substring is not found.");
1975
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001976static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001977bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001978{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001979 Py_ssize_t result = bytes_find_internal(self, args, +1);
1980 if (result == -2)
1981 return NULL;
1982 if (result == -1) {
1983 PyErr_SetString(PyExc_ValueError,
1984 "substring not found");
1985 return NULL;
1986 }
1987 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001988}
1989
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001990
1991PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001992"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001993\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001994Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001995such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001996arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001997\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001998Return -1 on failure.");
1999
Neal Norwitz6968b052007-02-27 19:02:19 +00002000static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002001bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00002002{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002003 Py_ssize_t result = bytes_find_internal(self, args, -1);
2004 if (result == -2)
2005 return NULL;
2006 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00002007}
2008
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002009
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002010PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002011"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002012\n\
2013Like B.rfind() but raise ValueError when the substring is not found.");
2014
2015static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002016bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002017{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002018 Py_ssize_t result = bytes_find_internal(self, args, -1);
2019 if (result == -2)
2020 return NULL;
2021 if (result == -1) {
2022 PyErr_SetString(PyExc_ValueError,
2023 "substring not found");
2024 return NULL;
2025 }
2026 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002027}
2028
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002029
2030Py_LOCAL_INLINE(PyObject *)
2031do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002032{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002033 Py_buffer vsep;
2034 char *s = PyBytes_AS_STRING(self);
2035 Py_ssize_t len = PyBytes_GET_SIZE(self);
2036 char *sep;
2037 Py_ssize_t seplen;
2038 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002039
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002040 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002041 return NULL;
2042 sep = vsep.buf;
2043 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002044
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002045 i = 0;
2046 if (striptype != RIGHTSTRIP) {
2047 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2048 i++;
2049 }
2050 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002051
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002052 j = len;
2053 if (striptype != LEFTSTRIP) {
2054 do {
2055 j--;
2056 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2057 j++;
2058 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002059
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002060 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002061
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002062 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2063 Py_INCREF(self);
2064 return (PyObject*)self;
2065 }
2066 else
2067 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002068}
2069
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002070
2071Py_LOCAL_INLINE(PyObject *)
2072do_strip(PyBytesObject *self, int striptype)
2073{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002074 char *s = PyBytes_AS_STRING(self);
2075 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002077 i = 0;
2078 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00002079 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002080 i++;
2081 }
2082 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002083
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002084 j = len;
2085 if (striptype != LEFTSTRIP) {
2086 do {
2087 j--;
David Malcolm96960882010-11-05 17:23:41 +00002088 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002089 j++;
2090 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002092 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2093 Py_INCREF(self);
2094 return (PyObject*)self;
2095 }
2096 else
2097 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002098}
2099
2100
2101Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002102do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002103{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002104 if (bytes != NULL && bytes != Py_None) {
2105 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002106 }
2107 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002108}
2109
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002110/*[clinic input]
2111bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002112
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002113 self: self(type="PyBytesObject *")
2114 bytes: object = None
2115 /
2116
2117Strip leading and trailing bytes contained in the argument.
2118
2119If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2120[clinic start generated code]*/
2121
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002122static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002123bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002124/*[clinic end generated code: output=c7c228d3bd104a1b input=37daa5fad1395d95]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002125{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002126 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002127}
2128
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002129/*[clinic input]
2130bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002131
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002132 self: self(type="PyBytesObject *")
2133 bytes: object = None
2134 /
2135
2136Strip leading bytes contained in the argument.
2137
2138If the argument is omitted or None, strip leading ASCII whitespace.
2139[clinic start generated code]*/
2140
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002141static PyObject *
2142bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002143/*[clinic end generated code: output=28602e586f524e82 input=88811b09dfbc2988]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002144{
2145 return do_argstrip(self, LEFTSTRIP, bytes);
2146}
2147
2148/*[clinic input]
2149bytes.rstrip
2150
2151 self: self(type="PyBytesObject *")
2152 bytes: object = None
2153 /
2154
2155Strip trailing bytes contained in the argument.
2156
2157If the argument is omitted or None, strip trailing ASCII whitespace.
2158[clinic start generated code]*/
2159
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002160static PyObject *
2161bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002162/*[clinic end generated code: output=547e3815c95447da input=8f93c9cd361f0140]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002163{
2164 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002165}
Neal Norwitz6968b052007-02-27 19:02:19 +00002166
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002167
2168PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002169"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002170\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002171Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002172string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002173as in slice notation.");
2174
2175static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002176bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002177{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002178 PyObject *sub_obj;
2179 const char *str = PyBytes_AS_STRING(self), *sub;
2180 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02002181 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002182 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002183
Antoine Pitrouac65d962011-10-20 23:54:17 +02002184 Py_buffer vsub;
2185 PyObject *count_obj;
2186
2187 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
2188 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002189 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002190
Antoine Pitrouac65d962011-10-20 23:54:17 +02002191 if (sub_obj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002192 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02002193 return NULL;
2194
2195 sub = vsub.buf;
2196 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002197 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02002198 else {
2199 sub = &byte;
2200 sub_len = 1;
2201 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002202
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002203 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002204
Antoine Pitrouac65d962011-10-20 23:54:17 +02002205 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002206 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2207 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02002208
2209 if (sub_obj)
2210 PyBuffer_Release(&vsub);
2211
2212 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002213}
2214
2215
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002216/*[clinic input]
2217bytes.translate
2218
2219 self: self(type="PyBytesObject *")
Victor Stinner049e5092014-08-17 22:20:00 +02002220 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002221 Translation table, which must be a bytes object of length 256.
2222 [
2223 deletechars: object
2224 ]
2225 /
2226
2227Return a copy with each character mapped by the given translation table.
2228
2229All characters occurring in the optional argument deletechars are removed.
2230The remaining characters are mapped through the given translation table.
2231[clinic start generated code]*/
2232
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002233static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002234bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1,
2235 PyObject *deletechars)
2236/*[clinic end generated code: output=233df850eb50bf8d input=d8fa5519d7cc4be7]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002237{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002238 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002239 Py_buffer table_view = {NULL, NULL};
2240 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002241 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002242 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002243 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002244 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002245 Py_ssize_t inlen, tablen, dellen = 0;
2246 PyObject *result;
2247 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002248
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002249 if (PyBytes_Check(table)) {
2250 table_chars = PyBytes_AS_STRING(table);
2251 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002252 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002253 else if (table == Py_None) {
2254 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002255 tablen = 256;
2256 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002257 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002258 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002259 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002260 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002261 tablen = table_view.len;
2262 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002263
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002264 if (tablen != 256) {
2265 PyErr_SetString(PyExc_ValueError,
2266 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002267 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002268 return NULL;
2269 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002270
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002271 if (deletechars != NULL) {
2272 if (PyBytes_Check(deletechars)) {
2273 del_table_chars = PyBytes_AS_STRING(deletechars);
2274 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002275 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002276 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002277 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002278 PyBuffer_Release(&table_view);
2279 return NULL;
2280 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002281 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002282 dellen = del_table_view.len;
2283 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002284 }
2285 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002286 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002287 dellen = 0;
2288 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002289
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002290 inlen = PyBytes_GET_SIZE(input_obj);
2291 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002292 if (result == NULL) {
2293 PyBuffer_Release(&del_table_view);
2294 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002295 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002296 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002297 output_start = output = PyBytes_AsString(result);
2298 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002299
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002300 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002301 /* If no deletions are required, use faster code */
2302 for (i = inlen; --i >= 0; ) {
2303 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002304 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002305 changed = 1;
2306 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002307 if (!changed && PyBytes_CheckExact(input_obj)) {
2308 Py_INCREF(input_obj);
2309 Py_DECREF(result);
2310 result = input_obj;
2311 }
2312 PyBuffer_Release(&del_table_view);
2313 PyBuffer_Release(&table_view);
2314 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002315 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002316
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002317 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002318 for (i = 0; i < 256; i++)
2319 trans_table[i] = Py_CHARMASK(i);
2320 } else {
2321 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002322 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002323 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002324 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002325
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002326 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002327 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002328 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002329
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002330 for (i = inlen; --i >= 0; ) {
2331 c = Py_CHARMASK(*input++);
2332 if (trans_table[c] != -1)
2333 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2334 continue;
2335 changed = 1;
2336 }
2337 if (!changed && PyBytes_CheckExact(input_obj)) {
2338 Py_DECREF(result);
2339 Py_INCREF(input_obj);
2340 return input_obj;
2341 }
2342 /* Fix the size of the resulting string */
2343 if (inlen > 0)
2344 _PyBytes_Resize(&result, output - output_start);
2345 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002346}
2347
2348
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002349/*[clinic input]
2350
2351@staticmethod
2352bytes.maketrans
2353
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002354 frm: Py_buffer
2355 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002356 /
2357
2358Return a translation table useable for the bytes or bytearray translate method.
2359
2360The returned table will be one where each byte in frm is mapped to the byte at
2361the same position in to.
2362
2363The bytes objects frm and to must be of the same length.
2364[clinic start generated code]*/
2365
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002366static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002367bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002368/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002369{
2370 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002371}
2372
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002373/* find and count characters and substrings */
2374
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002375#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002376 ((char *)memchr((const void *)(target), c, target_len))
2377
2378/* String ops must return a string. */
2379/* If the object is subclass of string, create a copy */
2380Py_LOCAL(PyBytesObject *)
2381return_self(PyBytesObject *self)
2382{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002383 if (PyBytes_CheckExact(self)) {
2384 Py_INCREF(self);
2385 return self;
2386 }
2387 return (PyBytesObject *)PyBytes_FromStringAndSize(
2388 PyBytes_AS_STRING(self),
2389 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002390}
2391
2392Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00002393countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002394{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002395 Py_ssize_t count=0;
2396 const char *start=target;
2397 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002398
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002399 while ( (start=findchar(start, end-start, c)) != NULL ) {
2400 count++;
2401 if (count >= maxcount)
2402 break;
2403 start += 1;
2404 }
2405 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002406}
2407
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002408
2409/* Algorithms for different cases of string replacement */
2410
2411/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2412Py_LOCAL(PyBytesObject *)
2413replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002414 const char *to_s, Py_ssize_t to_len,
2415 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002416{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002417 char *self_s, *result_s;
2418 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002419 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002420 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002422 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002423
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002424 /* 1 at the end plus 1 after every character;
2425 count = min(maxcount, self_len + 1) */
2426 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002427 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002428 else
2429 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2430 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002431
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002432 /* Check for overflow */
2433 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002434 assert(count > 0);
2435 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002436 PyErr_SetString(PyExc_OverflowError,
2437 "replacement bytes are too long");
2438 return NULL;
2439 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002440 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002441
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002442 if (! (result = (PyBytesObject *)
2443 PyBytes_FromStringAndSize(NULL, result_len)) )
2444 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002445
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002446 self_s = PyBytes_AS_STRING(self);
2447 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002448
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002449 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002450
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002451 /* Lay the first one down (guaranteed this will occur) */
2452 Py_MEMCPY(result_s, to_s, to_len);
2453 result_s += to_len;
2454 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002455
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002456 for (i=0; i<count; i++) {
2457 *result_s++ = *self_s++;
2458 Py_MEMCPY(result_s, to_s, to_len);
2459 result_s += to_len;
2460 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002461
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002462 /* Copy the rest of the original string */
2463 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002464
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002465 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002466}
2467
2468/* Special case for deleting a single character */
2469/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2470Py_LOCAL(PyBytesObject *)
2471replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002472 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002473{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002474 char *self_s, *result_s;
2475 char *start, *next, *end;
2476 Py_ssize_t self_len, result_len;
2477 Py_ssize_t count;
2478 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002479
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002480 self_len = PyBytes_GET_SIZE(self);
2481 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002482
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002483 count = countchar(self_s, self_len, from_c, maxcount);
2484 if (count == 0) {
2485 return return_self(self);
2486 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002487
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002488 result_len = self_len - count; /* from_len == 1 */
2489 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002490
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002491 if ( (result = (PyBytesObject *)
2492 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2493 return NULL;
2494 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002495
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002496 start = self_s;
2497 end = self_s + self_len;
2498 while (count-- > 0) {
2499 next = findchar(start, end-start, from_c);
2500 if (next == NULL)
2501 break;
2502 Py_MEMCPY(result_s, start, next-start);
2503 result_s += (next-start);
2504 start = next+1;
2505 }
2506 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002507
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002508 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002509}
2510
2511/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2512
2513Py_LOCAL(PyBytesObject *)
2514replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002515 const char *from_s, Py_ssize_t from_len,
2516 Py_ssize_t maxcount) {
2517 char *self_s, *result_s;
2518 char *start, *next, *end;
2519 Py_ssize_t self_len, result_len;
2520 Py_ssize_t count, offset;
2521 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002522
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002523 self_len = PyBytes_GET_SIZE(self);
2524 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002525
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002526 count = stringlib_count(self_s, self_len,
2527 from_s, from_len,
2528 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002529
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002530 if (count == 0) {
2531 /* no matches */
2532 return return_self(self);
2533 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002534
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002535 result_len = self_len - (count * from_len);
2536 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002537
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002538 if ( (result = (PyBytesObject *)
2539 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2540 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002541
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002542 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002543
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002544 start = self_s;
2545 end = self_s + self_len;
2546 while (count-- > 0) {
2547 offset = stringlib_find(start, end-start,
2548 from_s, from_len,
2549 0);
2550 if (offset == -1)
2551 break;
2552 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002553
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002554 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002555
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002556 result_s += (next-start);
2557 start = next+from_len;
2558 }
2559 Py_MEMCPY(result_s, start, end-start);
2560 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002561}
2562
2563/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2564Py_LOCAL(PyBytesObject *)
2565replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002566 char from_c, char to_c,
2567 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002568{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002569 char *self_s, *result_s, *start, *end, *next;
2570 Py_ssize_t self_len;
2571 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002572
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002573 /* The result string will be the same size */
2574 self_s = PyBytes_AS_STRING(self);
2575 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002576
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002577 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002578
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002579 if (next == NULL) {
2580 /* No matches; return the original string */
2581 return return_self(self);
2582 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002583
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002584 /* Need to make a new string */
2585 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2586 if (result == NULL)
2587 return NULL;
2588 result_s = PyBytes_AS_STRING(result);
2589 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002590
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002591 /* change everything in-place, starting with this one */
2592 start = result_s + (next-self_s);
2593 *start = to_c;
2594 start++;
2595 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002596
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002597 while (--maxcount > 0) {
2598 next = findchar(start, end-start, from_c);
2599 if (next == NULL)
2600 break;
2601 *next = to_c;
2602 start = next+1;
2603 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002604
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002605 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002606}
2607
2608/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2609Py_LOCAL(PyBytesObject *)
2610replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002611 const char *from_s, Py_ssize_t from_len,
2612 const char *to_s, Py_ssize_t to_len,
2613 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002614{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002615 char *result_s, *start, *end;
2616 char *self_s;
2617 Py_ssize_t self_len, offset;
2618 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002619
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002620 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002621
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002622 self_s = PyBytes_AS_STRING(self);
2623 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002624
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002625 offset = stringlib_find(self_s, self_len,
2626 from_s, from_len,
2627 0);
2628 if (offset == -1) {
2629 /* No matches; return the original string */
2630 return return_self(self);
2631 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002632
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002633 /* Need to make a new string */
2634 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2635 if (result == NULL)
2636 return NULL;
2637 result_s = PyBytes_AS_STRING(result);
2638 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002639
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002640 /* change everything in-place, starting with this one */
2641 start = result_s + offset;
2642 Py_MEMCPY(start, to_s, from_len);
2643 start += from_len;
2644 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002645
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002646 while ( --maxcount > 0) {
2647 offset = stringlib_find(start, end-start,
2648 from_s, from_len,
2649 0);
2650 if (offset==-1)
2651 break;
2652 Py_MEMCPY(start+offset, to_s, from_len);
2653 start += offset+from_len;
2654 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002655
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002656 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002657}
2658
2659/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2660Py_LOCAL(PyBytesObject *)
2661replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002662 char from_c,
2663 const char *to_s, Py_ssize_t to_len,
2664 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002665{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002666 char *self_s, *result_s;
2667 char *start, *next, *end;
2668 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002669 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002670 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002672 self_s = PyBytes_AS_STRING(self);
2673 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002674
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002675 count = countchar(self_s, self_len, from_c, maxcount);
2676 if (count == 0) {
2677 /* no matches, return unchanged */
2678 return return_self(self);
2679 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002680
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002681 /* use the difference between current and new, hence the "-1" */
2682 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002683 assert(count > 0);
2684 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002685 PyErr_SetString(PyExc_OverflowError,
2686 "replacement bytes are too long");
2687 return NULL;
2688 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002689 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002691 if ( (result = (PyBytesObject *)
2692 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2693 return NULL;
2694 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002695
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002696 start = self_s;
2697 end = self_s + self_len;
2698 while (count-- > 0) {
2699 next = findchar(start, end-start, from_c);
2700 if (next == NULL)
2701 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002702
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002703 if (next == start) {
2704 /* replace with the 'to' */
2705 Py_MEMCPY(result_s, to_s, to_len);
2706 result_s += to_len;
2707 start += 1;
2708 } else {
2709 /* copy the unchanged old then the 'to' */
2710 Py_MEMCPY(result_s, start, next-start);
2711 result_s += (next-start);
2712 Py_MEMCPY(result_s, to_s, to_len);
2713 result_s += to_len;
2714 start = next+1;
2715 }
2716 }
2717 /* Copy the remainder of the remaining string */
2718 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002719
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002720 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002721}
2722
2723/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2724Py_LOCAL(PyBytesObject *)
2725replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002726 const char *from_s, Py_ssize_t from_len,
2727 const char *to_s, Py_ssize_t to_len,
2728 Py_ssize_t maxcount) {
2729 char *self_s, *result_s;
2730 char *start, *next, *end;
2731 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002732 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002733 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002734
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002735 self_s = PyBytes_AS_STRING(self);
2736 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002737
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002738 count = stringlib_count(self_s, self_len,
2739 from_s, from_len,
2740 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002741
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002742 if (count == 0) {
2743 /* no matches, return unchanged */
2744 return return_self(self);
2745 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002746
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002747 /* Check for overflow */
2748 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002749 assert(count > 0);
2750 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002751 PyErr_SetString(PyExc_OverflowError,
2752 "replacement bytes are too long");
2753 return NULL;
2754 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002755 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002756
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002757 if ( (result = (PyBytesObject *)
2758 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2759 return NULL;
2760 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002761
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002762 start = self_s;
2763 end = self_s + self_len;
2764 while (count-- > 0) {
2765 offset = stringlib_find(start, end-start,
2766 from_s, from_len,
2767 0);
2768 if (offset == -1)
2769 break;
2770 next = start+offset;
2771 if (next == start) {
2772 /* replace with the 'to' */
2773 Py_MEMCPY(result_s, to_s, to_len);
2774 result_s += to_len;
2775 start += from_len;
2776 } else {
2777 /* copy the unchanged old then the 'to' */
2778 Py_MEMCPY(result_s, start, next-start);
2779 result_s += (next-start);
2780 Py_MEMCPY(result_s, to_s, to_len);
2781 result_s += to_len;
2782 start = next+from_len;
2783 }
2784 }
2785 /* Copy the remainder of the remaining string */
2786 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002787
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002788 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002789}
2790
2791
2792Py_LOCAL(PyBytesObject *)
2793replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002794 const char *from_s, Py_ssize_t from_len,
2795 const char *to_s, Py_ssize_t to_len,
2796 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002797{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002798 if (maxcount < 0) {
2799 maxcount = PY_SSIZE_T_MAX;
2800 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2801 /* nothing to do; return the original string */
2802 return return_self(self);
2803 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002804
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002805 if (maxcount == 0 ||
2806 (from_len == 0 && to_len == 0)) {
2807 /* nothing to do; return the original string */
2808 return return_self(self);
2809 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002810
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002811 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002812
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002813 if (from_len == 0) {
2814 /* insert the 'to' string everywhere. */
2815 /* >>> "Python".replace("", ".") */
2816 /* '.P.y.t.h.o.n.' */
2817 return replace_interleave(self, to_s, to_len, maxcount);
2818 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002819
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002820 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2821 /* point for an empty self string to generate a non-empty string */
2822 /* Special case so the remaining code always gets a non-empty string */
2823 if (PyBytes_GET_SIZE(self) == 0) {
2824 return return_self(self);
2825 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002826
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002827 if (to_len == 0) {
2828 /* delete all occurrences of 'from' string */
2829 if (from_len == 1) {
2830 return replace_delete_single_character(
2831 self, from_s[0], maxcount);
2832 } else {
2833 return replace_delete_substring(self, from_s,
2834 from_len, maxcount);
2835 }
2836 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002837
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002838 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002839
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002840 if (from_len == to_len) {
2841 if (from_len == 1) {
2842 return replace_single_character_in_place(
2843 self,
2844 from_s[0],
2845 to_s[0],
2846 maxcount);
2847 } else {
2848 return replace_substring_in_place(
2849 self, from_s, from_len, to_s, to_len,
2850 maxcount);
2851 }
2852 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002853
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002854 /* Otherwise use the more generic algorithms */
2855 if (from_len == 1) {
2856 return replace_single_character(self, from_s[0],
2857 to_s, to_len, maxcount);
2858 } else {
2859 /* len('from')>=2, len('to')>=1 */
2860 return replace_substring(self, from_s, from_len, to_s, to_len,
2861 maxcount);
2862 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002863}
2864
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002865
2866/*[clinic input]
2867bytes.replace
2868
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002869 old: Py_buffer
2870 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002871 count: Py_ssize_t = -1
2872 Maximum number of occurrences to replace.
2873 -1 (the default value) means replace all occurrences.
2874 /
2875
2876Return a copy with all occurrences of substring old replaced by new.
2877
2878If the optional argument count is given, only the first count occurrences are
2879replaced.
2880[clinic start generated code]*/
2881
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002882static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002883bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new,
2884 Py_ssize_t count)
2885/*[clinic end generated code: output=403dc9d7a83c5a1d input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002886{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002887 return (PyObject *)replace((PyBytesObject *) self,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002888 (const char *)old->buf, old->len,
2889 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002890}
2891
2892/** End DALKE **/
2893
2894/* Matches the end (direction >= 0) or start (direction < 0) of self
2895 * against substr, using the start and end arguments. Returns
2896 * -1 on error, 0 if not found and 1 if found.
2897 */
2898Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002899_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002900 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002901{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002902 Py_ssize_t len = PyBytes_GET_SIZE(self);
2903 Py_ssize_t slen;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002904 Py_buffer sub_view = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002905 const char* sub;
2906 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002907
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002908 if (PyBytes_Check(substr)) {
2909 sub = PyBytes_AS_STRING(substr);
2910 slen = PyBytes_GET_SIZE(substr);
2911 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002912 else {
2913 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
2914 return -1;
2915 sub = sub_view.buf;
2916 slen = sub_view.len;
2917 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002918 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002919
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002920 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002921
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002922 if (direction < 0) {
2923 /* startswith */
2924 if (start+slen > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002925 goto notfound;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002926 } else {
2927 /* endswith */
2928 if (end-start < slen || start > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002929 goto notfound;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002930
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002931 if (end-slen > start)
2932 start = end - slen;
2933 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002934 if (end-start < slen)
2935 goto notfound;
2936 if (memcmp(str+start, sub, slen) != 0)
2937 goto notfound;
2938
2939 PyBuffer_Release(&sub_view);
2940 return 1;
2941
2942notfound:
2943 PyBuffer_Release(&sub_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002944 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002945}
2946
2947
2948PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002949"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002950\n\
2951Return True if B starts with the specified prefix, False otherwise.\n\
2952With optional start, test B beginning at that position.\n\
2953With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002954prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002955
2956static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002957bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002958{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002959 Py_ssize_t start = 0;
2960 Py_ssize_t end = PY_SSIZE_T_MAX;
2961 PyObject *subobj;
2962 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002963
Jesus Ceaac451502011-04-20 17:09:23 +02002964 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002965 return NULL;
2966 if (PyTuple_Check(subobj)) {
2967 Py_ssize_t i;
2968 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2969 result = _bytes_tailmatch(self,
2970 PyTuple_GET_ITEM(subobj, i),
2971 start, end, -1);
2972 if (result == -1)
2973 return NULL;
2974 else if (result) {
2975 Py_RETURN_TRUE;
2976 }
2977 }
2978 Py_RETURN_FALSE;
2979 }
2980 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002981 if (result == -1) {
2982 if (PyErr_ExceptionMatches(PyExc_TypeError))
2983 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2984 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002985 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03002986 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002987 else
2988 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002989}
2990
2991
2992PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002993"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002994\n\
2995Return True if B ends with the specified suffix, False otherwise.\n\
2996With optional start, test B beginning at that position.\n\
2997With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002998suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002999
3000static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003001bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003002{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003003 Py_ssize_t start = 0;
3004 Py_ssize_t end = PY_SSIZE_T_MAX;
3005 PyObject *subobj;
3006 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003007
Jesus Ceaac451502011-04-20 17:09:23 +02003008 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003009 return NULL;
3010 if (PyTuple_Check(subobj)) {
3011 Py_ssize_t i;
3012 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3013 result = _bytes_tailmatch(self,
3014 PyTuple_GET_ITEM(subobj, i),
3015 start, end, +1);
3016 if (result == -1)
3017 return NULL;
3018 else if (result) {
3019 Py_RETURN_TRUE;
3020 }
3021 }
3022 Py_RETURN_FALSE;
3023 }
3024 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03003025 if (result == -1) {
3026 if (PyErr_ExceptionMatches(PyExc_TypeError))
3027 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
3028 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003029 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03003030 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003031 else
3032 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003033}
3034
3035
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003036/*[clinic input]
3037bytes.decode
3038
3039 encoding: str(c_default="NULL") = 'utf-8'
3040 The encoding with which to decode the bytes.
3041 errors: str(c_default="NULL") = 'strict'
3042 The error handling scheme to use for the handling of decoding errors.
3043 The default is 'strict' meaning that decoding errors raise a
3044 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
3045 as well as any other name registered with codecs.register_error that
3046 can handle UnicodeDecodeErrors.
3047
3048Decode the bytes using the codec registered for encoding.
3049[clinic start generated code]*/
3050
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003051static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04003052bytes_decode_impl(PyBytesObject*self, const char *encoding,
3053 const char *errors)
3054/*[clinic end generated code: output=2d2016ff8e0bb176 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003055{
Martin v. Löwis0efea322014-07-27 17:29:17 +02003056 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00003057}
3058
Guido van Rossum20188312006-05-05 15:15:40 +00003059
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003060/*[clinic input]
3061bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003062
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03003063 keepends: int(c_default="0") = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003064
3065Return a list of the lines in the bytes, breaking at line boundaries.
3066
3067Line breaks are not included in the resulting list unless keepends is given and
3068true.
3069[clinic start generated code]*/
3070
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003071static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003072bytes_splitlines_impl(PyBytesObject*self, int keepends)
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03003073/*[clinic end generated code: output=995c3598f7833cad input=7f4aac67144f9944]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003074{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003075 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00003076 (PyObject*) self, PyBytes_AS_STRING(self),
3077 PyBytes_GET_SIZE(self), keepends
3078 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003079}
3080
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003081static int
Victor Stinner6430fd52011-09-29 04:02:13 +02003082hex_digit_to_int(Py_UCS4 c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003083{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003084 if (c >= 128)
3085 return -1;
David Malcolm96960882010-11-05 17:23:41 +00003086 if (Py_ISDIGIT(c))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003087 return c - '0';
3088 else {
David Malcolm96960882010-11-05 17:23:41 +00003089 if (Py_ISUPPER(c))
3090 c = Py_TOLOWER(c);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003091 if (c >= 'a' && c <= 'f')
3092 return c - 'a' + 10;
3093 }
3094 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003095}
3096
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003097/*[clinic input]
3098@classmethod
3099bytes.fromhex
3100
3101 string: unicode
3102 /
3103
3104Create a bytes object from a string of hexadecimal numbers.
3105
3106Spaces between two numbers are accepted.
3107Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
3108[clinic start generated code]*/
3109
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003110static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003111bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03003112/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003113{
3114 PyObject *newstring;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003115 char *buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003116 Py_ssize_t hexlen, byteslen, i, j;
3117 int top, bot;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003118 void *data;
3119 unsigned int kind;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003120
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003121 assert(PyUnicode_Check(string));
3122 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003123 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003124 kind = PyUnicode_KIND(string);
3125 data = PyUnicode_DATA(string);
3126 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003127
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003128 byteslen = hexlen/2; /* This overestimates if there are spaces */
3129 newstring = PyBytes_FromStringAndSize(NULL, byteslen);
3130 if (!newstring)
3131 return NULL;
3132 buf = PyBytes_AS_STRING(newstring);
3133 for (i = j = 0; i < hexlen; i += 2) {
3134 /* skip over spaces in the input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003135 while (PyUnicode_READ(kind, data, i) == ' ')
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003136 i++;
3137 if (i >= hexlen)
3138 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003139 top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
3140 bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003141 if (top == -1 || bot == -1) {
3142 PyErr_Format(PyExc_ValueError,
3143 "non-hexadecimal number found in "
3144 "fromhex() arg at position %zd", i);
3145 goto error;
3146 }
3147 buf[j++] = (top << 4) + bot;
3148 }
3149 if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
3150 goto error;
3151 return newstring;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003152
3153 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003154 Py_XDECREF(newstring);
3155 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003156}
3157
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003158PyDoc_STRVAR(hex__doc__,
3159"B.hex() -> string\n\
3160\n\
3161Create a string of hexadecimal numbers from a bytes object.\n\
3162Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
3163
3164static PyObject *
3165bytes_hex(PyBytesObject *self)
3166{
3167 char* argbuf = PyBytes_AS_STRING(self);
3168 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
3169 return _Py_strhex(argbuf, arglen);
3170}
3171
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003172static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003173bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003174{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003175 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003176}
3177
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003178
3179static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003180bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003181 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
3182 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3183 _Py_capitalize__doc__},
3184 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3185 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003186 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003187 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
3188 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02003189 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003190 expandtabs__doc__},
3191 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003192 BYTES_FROMHEX_METHODDEF
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003193 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003194 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3195 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3196 _Py_isalnum__doc__},
3197 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3198 _Py_isalpha__doc__},
3199 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3200 _Py_isdigit__doc__},
3201 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3202 _Py_islower__doc__},
3203 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3204 _Py_isspace__doc__},
3205 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3206 _Py_istitle__doc__},
3207 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3208 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003209 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003210 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3211 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003212 BYTES_LSTRIP_METHODDEF
3213 BYTES_MAKETRANS_METHODDEF
3214 BYTES_PARTITION_METHODDEF
3215 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003216 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3217 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3218 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003219 BYTES_RPARTITION_METHODDEF
3220 BYTES_RSPLIT_METHODDEF
3221 BYTES_RSTRIP_METHODDEF
3222 BYTES_SPLIT_METHODDEF
3223 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003224 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
3225 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003226 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003227 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3228 _Py_swapcase__doc__},
3229 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003230 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003231 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3232 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003233 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003234};
3235
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003236static PyObject *
Ethan Furmanb95b5612015-01-23 20:05:18 -08003237bytes_mod(PyObject *v, PyObject *w)
3238{
3239 if (!PyBytes_Check(v))
3240 Py_RETURN_NOTIMPLEMENTED;
3241 return _PyBytes_Format(v, w);
3242}
3243
3244static PyNumberMethods bytes_as_number = {
3245 0, /*nb_add*/
3246 0, /*nb_subtract*/
3247 0, /*nb_multiply*/
3248 bytes_mod, /*nb_remainder*/
3249};
3250
3251static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003252str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3253
3254static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003255bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003256{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003257 PyObject *x = NULL;
3258 const char *encoding = NULL;
3259 const char *errors = NULL;
3260 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003261 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003262 Py_ssize_t size;
3263 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003264 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003265
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003266 if (type != &PyBytes_Type)
3267 return str_subtype_new(type, args, kwds);
3268 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3269 &encoding, &errors))
3270 return NULL;
3271 if (x == NULL) {
3272 if (encoding != NULL || errors != NULL) {
3273 PyErr_SetString(PyExc_TypeError,
3274 "encoding or errors without sequence "
3275 "argument");
3276 return NULL;
3277 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003278 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003279 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003280
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003281 if (PyUnicode_Check(x)) {
3282 /* Encode via the codec registry */
3283 if (encoding == NULL) {
3284 PyErr_SetString(PyExc_TypeError,
3285 "string argument without an encoding");
3286 return NULL;
3287 }
3288 new = PyUnicode_AsEncodedString(x, encoding, errors);
3289 if (new == NULL)
3290 return NULL;
3291 assert(PyBytes_Check(new));
3292 return new;
3293 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003294
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003295 /* If it's not unicode, there can't be encoding or errors */
3296 if (encoding != NULL || errors != NULL) {
3297 PyErr_SetString(PyExc_TypeError,
3298 "encoding or errors without a string argument");
3299 return NULL;
3300 }
3301
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003302 /* We'd like to call PyObject_Bytes here, but we need to check for an
3303 integer argument before deferring to PyBytes_FromObject, something
3304 PyObject_Bytes doesn't do. */
3305 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3306 if (func != NULL) {
3307 new = PyObject_CallFunctionObjArgs(func, NULL);
3308 Py_DECREF(func);
3309 if (new == NULL)
3310 return NULL;
3311 if (!PyBytes_Check(new)) {
3312 PyErr_Format(PyExc_TypeError,
3313 "__bytes__ returned non-bytes (type %.200s)",
3314 Py_TYPE(new)->tp_name);
3315 Py_DECREF(new);
3316 return NULL;
3317 }
3318 return new;
3319 }
3320 else if (PyErr_Occurred())
3321 return NULL;
3322
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003323 /* Is it an integer? */
3324 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3325 if (size == -1 && PyErr_Occurred()) {
3326 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3327 return NULL;
3328 PyErr_Clear();
3329 }
3330 else if (size < 0) {
3331 PyErr_SetString(PyExc_ValueError, "negative count");
3332 return NULL;
3333 }
3334 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003335 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003336 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003337 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003338 return new;
3339 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003340
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003341 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003342}
3343
3344PyObject *
3345PyBytes_FromObject(PyObject *x)
3346{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003347 PyObject *new, *it;
3348 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003349
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003350 if (x == NULL) {
3351 PyErr_BadInternalCall();
3352 return NULL;
3353 }
Larry Hastingsca28e992012-05-24 22:58:30 -07003354
3355 if (PyBytes_CheckExact(x)) {
3356 Py_INCREF(x);
3357 return x;
3358 }
3359
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003360 /* Use the modern buffer interface */
3361 if (PyObject_CheckBuffer(x)) {
3362 Py_buffer view;
3363 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3364 return NULL;
3365 new = PyBytes_FromStringAndSize(NULL, view.len);
3366 if (!new)
3367 goto fail;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003368 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3369 &view, view.len, 'C') < 0)
3370 goto fail;
3371 PyBuffer_Release(&view);
3372 return new;
3373 fail:
3374 Py_XDECREF(new);
3375 PyBuffer_Release(&view);
3376 return NULL;
3377 }
3378 if (PyUnicode_Check(x)) {
3379 PyErr_SetString(PyExc_TypeError,
3380 "cannot convert unicode object to bytes");
3381 return NULL;
3382 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003383
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003384 if (PyList_CheckExact(x)) {
3385 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3386 if (new == NULL)
3387 return NULL;
3388 for (i = 0; i < Py_SIZE(x); i++) {
3389 Py_ssize_t value = PyNumber_AsSsize_t(
3390 PyList_GET_ITEM(x, i), PyExc_ValueError);
3391 if (value == -1 && PyErr_Occurred()) {
3392 Py_DECREF(new);
3393 return NULL;
3394 }
3395 if (value < 0 || value >= 256) {
3396 PyErr_SetString(PyExc_ValueError,
3397 "bytes must be in range(0, 256)");
3398 Py_DECREF(new);
3399 return NULL;
3400 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003401 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003402 }
3403 return new;
3404 }
3405 if (PyTuple_CheckExact(x)) {
3406 new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
3407 if (new == NULL)
3408 return NULL;
3409 for (i = 0; i < Py_SIZE(x); i++) {
3410 Py_ssize_t value = PyNumber_AsSsize_t(
3411 PyTuple_GET_ITEM(x, i), PyExc_ValueError);
3412 if (value == -1 && PyErr_Occurred()) {
3413 Py_DECREF(new);
3414 return NULL;
3415 }
3416 if (value < 0 || value >= 256) {
3417 PyErr_SetString(PyExc_ValueError,
3418 "bytes must be in range(0, 256)");
3419 Py_DECREF(new);
3420 return NULL;
3421 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003422 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003423 }
3424 return new;
3425 }
Alexandre Vassalottia5c565a2010-01-09 22:14:46 +00003426
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003427 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003428 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003429 if (size == -1 && PyErr_Occurred())
3430 return NULL;
3431 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
3432 returning a shared empty bytes string. This required because we
3433 want to call _PyBytes_Resize() the returned object, which we can
3434 only do on bytes objects with refcount == 1. */
Victor Stinner88d146b2014-08-17 21:12:18 +02003435 if (size == 0)
3436 size = 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003437 new = PyBytes_FromStringAndSize(NULL, size);
3438 if (new == NULL)
3439 return NULL;
Victor Stinner88d146b2014-08-17 21:12:18 +02003440 assert(Py_REFCNT(new) == 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003441
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003442 /* Get the iterator */
3443 it = PyObject_GetIter(x);
3444 if (it == NULL)
3445 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003446
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003447 /* Run the iterator to exhaustion */
3448 for (i = 0; ; i++) {
3449 PyObject *item;
3450 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003451
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003452 /* Get the next item */
3453 item = PyIter_Next(it);
3454 if (item == NULL) {
3455 if (PyErr_Occurred())
3456 goto error;
3457 break;
3458 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003459
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003460 /* Interpret it as an int (__index__) */
3461 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3462 Py_DECREF(item);
3463 if (value == -1 && PyErr_Occurred())
3464 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003465
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003466 /* Range check */
3467 if (value < 0 || value >= 256) {
3468 PyErr_SetString(PyExc_ValueError,
3469 "bytes must be in range(0, 256)");
3470 goto error;
3471 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003472
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003473 /* Append the byte */
3474 if (i >= size) {
3475 size = 2 * size + 1;
3476 if (_PyBytes_Resize(&new, size) < 0)
3477 goto error;
3478 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003479 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003480 }
3481 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003482
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003483 /* Clean up and return success */
3484 Py_DECREF(it);
3485 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003486
3487 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003488 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01003489 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003490 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003491}
3492
3493static PyObject *
3494str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3495{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003496 PyObject *tmp, *pnew;
3497 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003498
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003499 assert(PyType_IsSubtype(type, &PyBytes_Type));
3500 tmp = bytes_new(&PyBytes_Type, args, kwds);
3501 if (tmp == NULL)
3502 return NULL;
3503 assert(PyBytes_CheckExact(tmp));
3504 n = PyBytes_GET_SIZE(tmp);
3505 pnew = type->tp_alloc(type, n);
3506 if (pnew != NULL) {
3507 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3508 PyBytes_AS_STRING(tmp), n+1);
3509 ((PyBytesObject *)pnew)->ob_shash =
3510 ((PyBytesObject *)tmp)->ob_shash;
3511 }
3512 Py_DECREF(tmp);
3513 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003514}
3515
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003516PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003517"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003518bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003519bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003520bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3521bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003522\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003523Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003524 - an iterable yielding integers in range(256)\n\
3525 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003526 - any object implementing the buffer API.\n\
3527 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003528
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003529static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003530
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003531PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003532 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3533 "bytes",
3534 PyBytesObject_SIZE,
3535 sizeof(char),
3536 bytes_dealloc, /* tp_dealloc */
3537 0, /* tp_print */
3538 0, /* tp_getattr */
3539 0, /* tp_setattr */
3540 0, /* tp_reserved */
3541 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08003542 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003543 &bytes_as_sequence, /* tp_as_sequence */
3544 &bytes_as_mapping, /* tp_as_mapping */
3545 (hashfunc)bytes_hash, /* tp_hash */
3546 0, /* tp_call */
3547 bytes_str, /* tp_str */
3548 PyObject_GenericGetAttr, /* tp_getattro */
3549 0, /* tp_setattro */
3550 &bytes_as_buffer, /* tp_as_buffer */
3551 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3552 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3553 bytes_doc, /* tp_doc */
3554 0, /* tp_traverse */
3555 0, /* tp_clear */
3556 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3557 0, /* tp_weaklistoffset */
3558 bytes_iter, /* tp_iter */
3559 0, /* tp_iternext */
3560 bytes_methods, /* tp_methods */
3561 0, /* tp_members */
3562 0, /* tp_getset */
3563 &PyBaseObject_Type, /* tp_base */
3564 0, /* tp_dict */
3565 0, /* tp_descr_get */
3566 0, /* tp_descr_set */
3567 0, /* tp_dictoffset */
3568 0, /* tp_init */
3569 0, /* tp_alloc */
3570 bytes_new, /* tp_new */
3571 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003572};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003573
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003574void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003575PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003576{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003577 assert(pv != NULL);
3578 if (*pv == NULL)
3579 return;
3580 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003581 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003582 return;
3583 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003584
3585 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3586 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003587 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003588 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02003589
Antoine Pitrou161d6952014-05-01 14:36:20 +02003590 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003591 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02003592 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3593 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3594 Py_CLEAR(*pv);
3595 return;
3596 }
3597
3598 oldsize = PyBytes_GET_SIZE(*pv);
3599 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3600 PyErr_NoMemory();
3601 goto error;
3602 }
3603 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3604 goto error;
3605
3606 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3607 PyBuffer_Release(&wb);
3608 return;
3609
3610 error:
3611 PyBuffer_Release(&wb);
3612 Py_CLEAR(*pv);
3613 return;
3614 }
3615
3616 else {
3617 /* Multiple references, need to create new object */
3618 PyObject *v;
3619 v = bytes_concat(*pv, w);
3620 Py_DECREF(*pv);
3621 *pv = v;
3622 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003623}
3624
3625void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003626PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003627{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003628 PyBytes_Concat(pv, w);
3629 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003630}
3631
3632
Ethan Furmanb95b5612015-01-23 20:05:18 -08003633/* The following function breaks the notion that bytes are immutable:
3634 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003635 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08003636 as creating a new bytes object and destroying the old one, only
3637 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003638 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08003639 Note that if there's not enough memory to resize the bytes object, the
3640 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003641 memory" exception is set, and -1 is returned. Else (on success) 0 is
3642 returned, and the value in *pv may or may not be the same as on input.
3643 As always, an extra byte is allocated for a trailing \0 byte (newsize
3644 does *not* include that), and a trailing \0 byte is stored.
3645*/
3646
3647int
3648_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3649{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003650 PyObject *v;
3651 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003652 v = *pv;
3653 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3654 *pv = 0;
3655 Py_DECREF(v);
3656 PyErr_BadInternalCall();
3657 return -1;
3658 }
3659 /* XXX UNREF/NEWREF interface should be more symmetrical */
3660 _Py_DEC_REFTOTAL;
3661 _Py_ForgetReference(v);
3662 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003663 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003664 if (*pv == NULL) {
3665 PyObject_Del(v);
3666 PyErr_NoMemory();
3667 return -1;
3668 }
3669 _Py_NewReference(*pv);
3670 sv = (PyBytesObject *) *pv;
3671 Py_SIZE(sv) = newsize;
3672 sv->ob_sval[newsize] = '\0';
3673 sv->ob_shash = -1; /* invalidate cached hash value */
3674 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003675}
3676
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003677void
3678PyBytes_Fini(void)
3679{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003680 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003681 for (i = 0; i < UCHAR_MAX + 1; i++)
3682 Py_CLEAR(characters[i]);
3683 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003684}
3685
Benjamin Peterson4116f362008-05-27 00:36:20 +00003686/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003687
3688typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003689 PyObject_HEAD
3690 Py_ssize_t it_index;
3691 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003692} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003693
3694static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003695striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003696{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003697 _PyObject_GC_UNTRACK(it);
3698 Py_XDECREF(it->it_seq);
3699 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003700}
3701
3702static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003703striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003704{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003705 Py_VISIT(it->it_seq);
3706 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003707}
3708
3709static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003710striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003711{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003712 PyBytesObject *seq;
3713 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003714
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003715 assert(it != NULL);
3716 seq = it->it_seq;
3717 if (seq == NULL)
3718 return NULL;
3719 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003720
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003721 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3722 item = PyLong_FromLong(
3723 (unsigned char)seq->ob_sval[it->it_index]);
3724 if (item != NULL)
3725 ++it->it_index;
3726 return item;
3727 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003728
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003729 Py_DECREF(seq);
3730 it->it_seq = NULL;
3731 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003732}
3733
3734static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003735striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003736{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003737 Py_ssize_t len = 0;
3738 if (it->it_seq)
3739 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3740 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003741}
3742
3743PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003744 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003745
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003746static PyObject *
3747striter_reduce(striterobject *it)
3748{
3749 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003750 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003751 it->it_seq, it->it_index);
3752 } else {
3753 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3754 if (u == NULL)
3755 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02003756 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003757 }
3758}
3759
3760PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3761
3762static PyObject *
3763striter_setstate(striterobject *it, PyObject *state)
3764{
3765 Py_ssize_t index = PyLong_AsSsize_t(state);
3766 if (index == -1 && PyErr_Occurred())
3767 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003768 if (it->it_seq != NULL) {
3769 if (index < 0)
3770 index = 0;
3771 else if (index > PyBytes_GET_SIZE(it->it_seq))
3772 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3773 it->it_index = index;
3774 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003775 Py_RETURN_NONE;
3776}
3777
3778PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3779
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003780static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003781 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3782 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003783 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3784 reduce_doc},
3785 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3786 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003787 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003788};
3789
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003790PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003791 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3792 "bytes_iterator", /* tp_name */
3793 sizeof(striterobject), /* tp_basicsize */
3794 0, /* tp_itemsize */
3795 /* methods */
3796 (destructor)striter_dealloc, /* tp_dealloc */
3797 0, /* tp_print */
3798 0, /* tp_getattr */
3799 0, /* tp_setattr */
3800 0, /* tp_reserved */
3801 0, /* tp_repr */
3802 0, /* tp_as_number */
3803 0, /* tp_as_sequence */
3804 0, /* tp_as_mapping */
3805 0, /* tp_hash */
3806 0, /* tp_call */
3807 0, /* tp_str */
3808 PyObject_GenericGetAttr, /* tp_getattro */
3809 0, /* tp_setattro */
3810 0, /* tp_as_buffer */
3811 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3812 0, /* tp_doc */
3813 (traverseproc)striter_traverse, /* tp_traverse */
3814 0, /* tp_clear */
3815 0, /* tp_richcompare */
3816 0, /* tp_weaklistoffset */
3817 PyObject_SelfIter, /* tp_iter */
3818 (iternextfunc)striter_next, /* tp_iternext */
3819 striter_methods, /* tp_methods */
3820 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003821};
3822
3823static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003824bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003825{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003826 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003827
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003828 if (!PyBytes_Check(seq)) {
3829 PyErr_BadInternalCall();
3830 return NULL;
3831 }
3832 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3833 if (it == NULL)
3834 return NULL;
3835 it->it_index = 0;
3836 Py_INCREF(seq);
3837 it->it_seq = (PyBytesObject *)seq;
3838 _PyObject_GC_TRACK(it);
3839 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003840}
Victor Stinner00165072015-10-09 01:53:21 +02003841
3842
3843/* _PyBytesWriter API */
3844
3845#ifdef MS_WINDOWS
3846 /* On Windows, overallocate by 50% is the best factor */
3847# define OVERALLOCATE_FACTOR 2
3848#else
3849 /* On Linux, overallocate by 25% is the best factor */
3850# define OVERALLOCATE_FACTOR 4
3851#endif
3852
3853void
3854_PyBytesWriter_Init(_PyBytesWriter *writer)
3855{
3856 writer->buffer = NULL;
3857 writer->allocated = 0;
Victor Stinner53926a12015-10-09 12:37:03 +02003858 writer->min_size = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003859 writer->overallocate = 0;
Victor Stinnerb3653a32015-10-09 03:38:24 +02003860 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003861#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003862 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003863#endif
3864}
3865
3866void
3867_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3868{
3869 Py_CLEAR(writer->buffer);
3870}
3871
3872Py_LOCAL_INLINE(char*)
3873_PyBytesWriter_AsString(_PyBytesWriter *writer)
3874{
Victor Stinnerb3653a32015-10-09 03:38:24 +02003875 if (!writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003876 assert(writer->buffer != NULL);
3877 return PyBytes_AS_STRING(writer->buffer);
3878 }
3879 else {
3880 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003881 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003882 }
3883}
3884
3885Py_LOCAL_INLINE(Py_ssize_t)
3886_PyBytesWriter_GetPos(_PyBytesWriter *writer, char *str)
3887{
3888 char *start = _PyBytesWriter_AsString(writer);
3889 assert(str != NULL);
3890 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003891 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003892 return str - start;
3893}
3894
3895Py_LOCAL_INLINE(void)
3896_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3897{
3898#ifdef Py_DEBUG
3899 char *start, *end;
3900
Victor Stinnerb3653a32015-10-09 03:38:24 +02003901 if (!writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003902 assert(writer->buffer != NULL);
3903 assert(PyBytes_CheckExact(writer->buffer));
3904 assert(Py_REFCNT(writer->buffer) == 1);
3905 }
3906 else {
3907 assert(writer->buffer == NULL);
3908 }
3909
3910 start = _PyBytesWriter_AsString(writer);
Victor Stinner53926a12015-10-09 12:37:03 +02003911 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003912 /* the last byte must always be null */
3913 assert(start[writer->allocated] == 0);
3914
3915 end = start + writer->allocated;
3916 assert(str != NULL);
3917 assert(start <= str && str <= end);
3918#endif
3919}
3920
3921char*
3922_PyBytesWriter_Prepare(_PyBytesWriter *writer, char *str, Py_ssize_t size)
3923{
3924 Py_ssize_t allocated, pos;
3925
3926 _PyBytesWriter_CheckConsistency(writer, str);
3927 assert(size >= 0);
3928
3929 if (size == 0) {
3930 /* nothing to do */
3931 return str;
3932 }
3933
Victor Stinner53926a12015-10-09 12:37:03 +02003934 if (writer->min_size > PY_SSIZE_T_MAX - size) {
Victor Stinner00165072015-10-09 01:53:21 +02003935 PyErr_NoMemory();
3936 _PyBytesWriter_Dealloc(writer);
3937 return NULL;
3938 }
Victor Stinner53926a12015-10-09 12:37:03 +02003939 writer->min_size += size;
Victor Stinner00165072015-10-09 01:53:21 +02003940
3941 allocated = writer->allocated;
Victor Stinner53926a12015-10-09 12:37:03 +02003942 if (writer->min_size <= allocated)
Victor Stinner00165072015-10-09 01:53:21 +02003943 return str;
3944
Victor Stinner53926a12015-10-09 12:37:03 +02003945 allocated = writer->min_size;
Victor Stinner00165072015-10-09 01:53:21 +02003946 if (writer->overallocate
3947 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3948 /* overallocate to limit the number of realloc() */
3949 allocated += allocated / OVERALLOCATE_FACTOR;
3950 }
3951
3952 pos = _PyBytesWriter_GetPos(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003953 if (!writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003954 /* Note: Don't use a bytearray object because the conversion from
3955 byterray to bytes requires to copy all bytes. */
3956 if (_PyBytes_Resize(&writer->buffer, allocated)) {
3957 assert(writer->buffer == NULL);
3958 return NULL;
3959 }
3960 }
3961 else {
3962 /* convert from stack buffer to bytes object buffer */
3963 assert(writer->buffer == NULL);
3964
3965 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3966 if (writer->buffer == NULL)
3967 return NULL;
3968
3969 if (pos != 0) {
3970 Py_MEMCPY(PyBytes_AS_STRING(writer->buffer),
Victor Stinnerb3653a32015-10-09 03:38:24 +02003971 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02003972 pos);
3973 }
3974
Victor Stinnerb3653a32015-10-09 03:38:24 +02003975 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02003976#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003977 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003978#endif
Victor Stinner00165072015-10-09 01:53:21 +02003979 }
3980 writer->allocated = allocated;
3981
3982 str = _PyBytesWriter_AsString(writer) + pos;
3983 _PyBytesWriter_CheckConsistency(writer, str);
3984 return str;
3985}
3986
3987/* Allocate the buffer to write size bytes.
3988 Return the pointer to the beginning of buffer data.
3989 Raise an exception and return NULL on error. */
3990char*
3991_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3992{
3993 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02003994 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02003995 assert(size >= 0);
3996
Victor Stinnerb3653a32015-10-09 03:38:24 +02003997 writer->use_small_buffer = 1;
Victor Stinnerb13b97d2015-10-09 02:52:16 +02003998#ifdef Py_DEBUG
Victor Stinner00165072015-10-09 01:53:21 +02003999 /* the last byte is reserved, it must be '\0' */
Victor Stinnerb3653a32015-10-09 03:38:24 +02004000 writer->allocated = sizeof(writer->small_buffer) - 1;
4001 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02004002#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02004003 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02004004#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02004005 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02004006}
4007
4008PyObject *
4009_PyBytesWriter_Finish(_PyBytesWriter *writer, char *str)
4010{
4011 Py_ssize_t pos;
4012 PyObject *result;
4013
4014 _PyBytesWriter_CheckConsistency(writer, str);
4015
4016 pos = _PyBytesWriter_GetPos(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02004017 if (!writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02004018 if (pos != writer->allocated) {
4019 if (_PyBytes_Resize(&writer->buffer, pos)) {
4020 assert(writer->buffer == NULL);
4021 return NULL;
4022 }
4023 }
4024
4025 result = writer->buffer;
4026 writer->buffer = NULL;
4027 }
4028 else {
Victor Stinnerb3653a32015-10-09 03:38:24 +02004029 result = PyBytes_FromStringAndSize(writer->small_buffer, pos);
Victor Stinner00165072015-10-09 01:53:21 +02004030 }
4031
4032 return result;
4033}
Victor Stinnerce179bf2015-10-09 12:57:22 +02004034
4035char*
4036_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, char *str,
4037 char *bytes, Py_ssize_t size)
4038{
4039 str = _PyBytesWriter_Prepare(writer, str, size);
4040 if (str == NULL)
4041 return NULL;
4042
4043 Py_MEMCPY(str, bytes, size);
4044 str += size;
4045
4046 return str;
4047}