blob: ae7b1ea35021b40df7ed738fb3b9e231604c3e13 [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +00008#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00009#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000010
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020011/*[clinic input]
Martin v. Löwis0efea322014-07-27 17:29:17 +020012class bytes "PyBytesObject*" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020013[clinic start generated code]*/
Martin v. Löwis0efea322014-07-27 17:29:17 +020014/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030016#include "clinic/bytesobject.c.h"
17
Christian Heimes2c9c7a52008-05-26 13:42:13 +000018#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000019Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000020#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000021
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022static PyBytesObject *characters[UCHAR_MAX + 1];
23static PyBytesObject *nullstring;
24
Mark Dickinsonfd24b322008-12-06 15:33:31 +000025/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
26 for a string of length n should request PyBytesObject_SIZE + n bytes.
27
28 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29 3 bytes per string allocation on a typical system.
30*/
31#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32
Victor Stinner2bf89932015-10-14 11:25:33 +020033/* Forward declaration */
34Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
35 char *str);
36
Christian Heimes2c9c7a52008-05-26 13:42:13 +000037/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000038 For PyBytes_FromString(), the parameter `str' points to a null-terminated
39 string containing exactly `size' bytes.
40
41 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
42 either NULL or else points to a string containing at least `size' bytes.
43 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
44 not have to be null-terminated. (Therefore it is safe to construct a
45 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
46 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
47 bytes (setting the last byte to the null terminating character) and you can
48 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000049 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000050 alter the data yourself, since the strings may be shared.
51
52 The PyObject member `op->ob_size', which denotes the number of "extra
53 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020054 allocated for string data, not counting the null terminating character.
55 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000056 PyBytes_FromStringAndSize()) or the length of the string in the `str'
57 parameter (for PyBytes_FromString()).
58*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020059static PyObject *
60_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000061{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020062 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020063 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000066#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000068#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 Py_INCREF(op);
70 return (PyObject *)op;
71 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000072
Victor Stinner049e5092014-08-17 22:20:00 +020073 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 PyErr_SetString(PyExc_OverflowError,
75 "byte string is too large");
76 return NULL;
77 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020080 if (use_calloc)
81 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
82 else
83 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 if (op == NULL)
85 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010086 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020088 if (!use_calloc)
89 op->ob_sval[size] = '\0';
90 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 if (size == 0) {
92 nullstring = op;
93 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020094 }
95 return (PyObject *) op;
96}
97
98PyObject *
99PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
100{
101 PyBytesObject *op;
102 if (size < 0) {
103 PyErr_SetString(PyExc_SystemError,
104 "Negative size passed to PyBytes_FromStringAndSize");
105 return NULL;
106 }
107 if (size == 1 && str != NULL &&
108 (op = characters[*str & UCHAR_MAX]) != NULL)
109 {
110#ifdef COUNT_ALLOCS
111 one_strings++;
112#endif
113 Py_INCREF(op);
114 return (PyObject *)op;
115 }
116
117 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
118 if (op == NULL)
119 return NULL;
120 if (str == NULL)
121 return (PyObject *) op;
122
123 Py_MEMCPY(op->ob_sval, str, size);
124 /* share short strings */
125 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000126 characters[*str & UCHAR_MAX] = op;
127 Py_INCREF(op);
128 }
129 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000130}
131
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000132PyObject *
133PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000134{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200135 size_t size;
136 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 assert(str != NULL);
139 size = strlen(str);
140 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
141 PyErr_SetString(PyExc_OverflowError,
142 "byte string is too long");
143 return NULL;
144 }
145 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000146#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000148#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 Py_INCREF(op);
150 return (PyObject *)op;
151 }
152 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000153#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000155#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 Py_INCREF(op);
157 return (PyObject *)op;
158 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000160 /* Inline PyObject_NewVar */
161 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
162 if (op == NULL)
163 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100164 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 op->ob_shash = -1;
166 Py_MEMCPY(op->ob_sval, str, size+1);
167 /* share short strings */
168 if (size == 0) {
169 nullstring = op;
170 Py_INCREF(op);
171 } else if (size == 1) {
172 characters[*str & UCHAR_MAX] = op;
173 Py_INCREF(op);
174 }
175 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000176}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000177
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000178PyObject *
179PyBytes_FromFormatV(const char *format, va_list vargs)
180{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200182 const char *f;
183 const char *p;
184 Py_ssize_t prec;
185 int longflag;
186 int size_tflag;
187 /* Longest 64-bit formatted numbers:
188 - "18446744073709551615\0" (21 bytes)
189 - "-9223372036854775808\0" (21 bytes)
190 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000191
Victor Stinner03dab782015-10-14 00:21:35 +0200192 Longest 64-bit pointer representation:
193 "0xffffffffffffffff\0" (19 bytes). */
194 char buffer[21];
195 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000196
Victor Stinner03dab782015-10-14 00:21:35 +0200197 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000198
Victor Stinner03dab782015-10-14 00:21:35 +0200199 s = _PyBytesWriter_Alloc(&writer, strlen(format));
200 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200202 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000203
Victor Stinner03dab782015-10-14 00:21:35 +0200204#define WRITE_BYTES(str) \
205 do { \
206 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
207 if (s == NULL) \
208 goto error; \
209 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000210
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200212 if (*f != '%') {
213 *s++ = *f;
214 continue;
215 }
216
217 p = f++;
218
219 /* ignore the width (ex: 10 in "%10s") */
220 while (Py_ISDIGIT(*f))
221 f++;
222
223 /* parse the precision (ex: 10 in "%.10s") */
224 prec = 0;
225 if (*f == '.') {
226 f++;
227 for (; Py_ISDIGIT(*f); f++) {
228 prec = (prec * 10) + (*f - '0');
229 }
230 }
231
232 while (*f && *f != '%' && !Py_ISALPHA(*f))
233 f++;
234
235 /* handle the long flag ('l'), but only for %ld and %lu.
236 others can be added when necessary. */
237 longflag = 0;
238 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
239 longflag = 1;
240 ++f;
241 }
242
243 /* handle the size_t flag ('z'). */
244 size_tflag = 0;
245 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
246 size_tflag = 1;
247 ++f;
248 }
249
250 /* substract bytes preallocated for the format string
251 (ex: 2 for "%s") */
252 writer.min_size -= (f - p + 1);
253
254 switch (*f) {
255 case 'c':
256 {
257 int c = va_arg(vargs, int);
258 if (c < 0 || c > 255) {
259 PyErr_SetString(PyExc_OverflowError,
260 "PyBytes_FromFormatV(): %c format "
261 "expects an integer in range [0; 255]");
262 goto error;
263 }
264 writer.min_size++;
265 *s++ = (unsigned char)c;
266 break;
267 }
268
269 case 'd':
270 if (longflag)
271 sprintf(buffer, "%ld", va_arg(vargs, long));
272 else if (size_tflag)
273 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
274 va_arg(vargs, Py_ssize_t));
275 else
276 sprintf(buffer, "%d", va_arg(vargs, int));
277 assert(strlen(buffer) < sizeof(buffer));
278 WRITE_BYTES(buffer);
279 break;
280
281 case 'u':
282 if (longflag)
283 sprintf(buffer, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(buffer, "%u",
290 va_arg(vargs, unsigned int));
291 assert(strlen(buffer) < sizeof(buffer));
292 WRITE_BYTES(buffer);
293 break;
294
295 case 'i':
296 sprintf(buffer, "%i", va_arg(vargs, int));
297 assert(strlen(buffer) < sizeof(buffer));
298 WRITE_BYTES(buffer);
299 break;
300
301 case 'x':
302 sprintf(buffer, "%x", va_arg(vargs, int));
303 assert(strlen(buffer) < sizeof(buffer));
304 WRITE_BYTES(buffer);
305 break;
306
307 case 's':
308 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000309 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200310
311 p = va_arg(vargs, char*);
312 i = strlen(p);
313 if (prec > 0 && i > prec)
314 i = prec;
315 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
316 if (s == NULL)
317 goto error;
318 break;
319 }
320
321 case 'p':
322 sprintf(buffer, "%p", va_arg(vargs, void*));
323 assert(strlen(buffer) < sizeof(buffer));
324 /* %p is ill-defined: ensure leading 0x. */
325 if (buffer[1] == 'X')
326 buffer[1] = 'x';
327 else if (buffer[1] != 'x') {
328 memmove(buffer+2, buffer, strlen(buffer)+1);
329 buffer[0] = '0';
330 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000331 }
Victor Stinner03dab782015-10-14 00:21:35 +0200332 WRITE_BYTES(buffer);
333 break;
334
335 case '%':
336 writer.min_size++;
337 *s++ = '%';
338 break;
339
340 default:
341 if (*f == 0) {
342 /* fix min_size if we reached the end of the format string */
343 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000345
Victor Stinner03dab782015-10-14 00:21:35 +0200346 /* invalid format string: copy unformatted string and exit */
347 WRITE_BYTES(p);
348 return _PyBytesWriter_Finish(&writer, s);
349 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000351
Victor Stinner03dab782015-10-14 00:21:35 +0200352#undef WRITE_BYTES
353
354 return _PyBytesWriter_Finish(&writer, s);
355
356 error:
357 _PyBytesWriter_Dealloc(&writer);
358 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000359}
360
361PyObject *
362PyBytes_FromFormat(const char *format, ...)
363{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000364 PyObject* ret;
365 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000366
367#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000368 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000369#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 ret = PyBytes_FromFormatV(format, vargs);
373 va_end(vargs);
374 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000375}
376
Ethan Furmanb95b5612015-01-23 20:05:18 -0800377/* Helpers for formatstring */
378
379Py_LOCAL_INLINE(PyObject *)
380getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
381{
382 Py_ssize_t argidx = *p_argidx;
383 if (argidx < arglen) {
384 (*p_argidx)++;
385 if (arglen < 0)
386 return args;
387 else
388 return PyTuple_GetItem(args, argidx);
389 }
390 PyErr_SetString(PyExc_TypeError,
391 "not enough arguments for format string");
392 return NULL;
393}
394
395/* Format codes
396 * F_LJUST '-'
397 * F_SIGN '+'
398 * F_BLANK ' '
399 * F_ALT '#'
400 * F_ZERO '0'
401 */
402#define F_LJUST (1<<0)
403#define F_SIGN (1<<1)
404#define F_BLANK (1<<2)
405#define F_ALT (1<<3)
406#define F_ZERO (1<<4)
407
408/* Returns a new reference to a PyBytes object, or NULL on failure. */
409
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200410static char*
411formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200412 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800413{
414 char *p;
415 PyObject *result;
416 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200417 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800418
419 x = PyFloat_AsDouble(v);
420 if (x == -1.0 && PyErr_Occurred()) {
421 PyErr_Format(PyExc_TypeError, "float argument required, "
422 "not %.200s", Py_TYPE(v)->tp_name);
423 return NULL;
424 }
425
426 if (prec < 0)
427 prec = 6;
428
429 p = PyOS_double_to_string(x, type, prec,
430 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
431
432 if (p == NULL)
433 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200434
435 len = strlen(p);
436 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200437 str = _PyBytesWriter_Prepare(writer, str, len);
438 if (str == NULL)
439 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200440 Py_MEMCPY(str, p, len);
441 str += len;
442 return str;
443 }
444
445 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800446 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200447 *p_result = result;
448 return str;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800449}
450
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300451static PyObject *
452formatlong(PyObject *v, int flags, int prec, int type)
453{
454 PyObject *result, *iobj;
455 if (type == 'i')
456 type = 'd';
457 if (PyLong_Check(v))
458 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
459 if (PyNumber_Check(v)) {
460 /* make sure number is a type of integer for o, x, and X */
461 if (type == 'o' || type == 'x' || type == 'X')
462 iobj = PyNumber_Index(v);
463 else
464 iobj = PyNumber_Long(v);
465 if (iobj == NULL) {
466 if (!PyErr_ExceptionMatches(PyExc_TypeError))
467 return NULL;
468 }
469 else if (!PyLong_Check(iobj))
470 Py_CLEAR(iobj);
471 if (iobj != NULL) {
472 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
473 Py_DECREF(iobj);
474 return result;
475 }
476 }
477 PyErr_Format(PyExc_TypeError,
478 "%%%c format: %s is required, not %.200s", type,
479 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
480 : "a number",
481 Py_TYPE(v)->tp_name);
482 return NULL;
483}
484
485static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200486byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800487{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200488 if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) {
489 *p = PyBytes_AS_STRING(arg)[0];
490 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800491 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200492 else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) {
493 *p = PyByteArray_AS_STRING(arg)[0];
494 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800495 }
496 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300497 PyObject *iobj;
498 long ival;
499 int overflow;
500 /* make sure number is a type of integer */
501 if (PyLong_Check(arg)) {
502 ival = PyLong_AsLongAndOverflow(arg, &overflow);
503 }
504 else {
505 iobj = PyNumber_Index(arg);
506 if (iobj == NULL) {
507 if (!PyErr_ExceptionMatches(PyExc_TypeError))
508 return 0;
509 goto onError;
510 }
511 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
512 Py_DECREF(iobj);
513 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300514 if (!overflow && ival == -1 && PyErr_Occurred())
515 goto onError;
516 if (overflow || !(0 <= ival && ival <= 255)) {
517 PyErr_SetString(PyExc_OverflowError,
518 "%c arg not in range(256)");
519 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800520 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300521 *p = (char)ival;
522 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800523 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300524 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200525 PyErr_SetString(PyExc_TypeError,
526 "%c requires an integer in range(256) or a single byte");
527 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800528}
529
530static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200531format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800532{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200533 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800534 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800535 /* is it a bytes object? */
536 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200537 *pbuf = PyBytes_AS_STRING(v);
538 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800539 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200540 return v;
541 }
542 if (PyByteArray_Check(v)) {
543 *pbuf = PyByteArray_AS_STRING(v);
544 *plen = PyByteArray_GET_SIZE(v);
545 Py_INCREF(v);
546 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800547 }
548 /* does it support __bytes__? */
549 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
550 if (func != NULL) {
551 result = PyObject_CallFunctionObjArgs(func, NULL);
552 Py_DECREF(func);
553 if (result == NULL)
554 return NULL;
555 if (!PyBytes_Check(result)) {
556 PyErr_Format(PyExc_TypeError,
557 "__bytes__ returned non-bytes (type %.200s)",
558 Py_TYPE(result)->tp_name);
559 Py_DECREF(result);
560 return NULL;
561 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200562 *pbuf = PyBytes_AS_STRING(result);
563 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800564 return result;
565 }
566 PyErr_Format(PyExc_TypeError,
567 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
568 Py_TYPE(v)->tp_name);
569 return NULL;
570}
571
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200572/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800573
574PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200575_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
576 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800577{
Victor Stinner772b2b02015-10-14 09:56:53 +0200578 const char *fmt;
579 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800580 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200581 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800582 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800583 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200584 _PyBytesWriter writer;
585
Victor Stinner772b2b02015-10-14 09:56:53 +0200586 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800587 PyErr_BadInternalCall();
588 return NULL;
589 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200590 fmt = format;
591 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200592
593 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200594 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200595
596 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
597 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800598 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200599 if (!use_bytearray)
600 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200601
Ethan Furmanb95b5612015-01-23 20:05:18 -0800602 if (PyTuple_Check(args)) {
603 arglen = PyTuple_GET_SIZE(args);
604 argidx = 0;
605 }
606 else {
607 arglen = -1;
608 argidx = -2;
609 }
610 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
611 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
612 !PyByteArray_Check(args)) {
613 dict = args;
614 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200615
Ethan Furmanb95b5612015-01-23 20:05:18 -0800616 while (--fmtcnt >= 0) {
617 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200618 Py_ssize_t len;
619 char *pos;
620
621 pos = strchr(fmt + 1, '%');
622 if (pos != NULL)
623 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200624 else
625 len = format_len - (fmt - format);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200626 assert(len != 0);
627
628 Py_MEMCPY(res, fmt, len);
629 res += len;
630 fmt += len;
631 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800632 }
633 else {
634 /* Got a format specifier */
635 int flags = 0;
636 Py_ssize_t width = -1;
637 int prec = -1;
638 int c = '\0';
639 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800640 PyObject *v = NULL;
641 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200642 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800643 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200644 Py_ssize_t len = 0;
645 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200646 Py_ssize_t alloc;
647#ifdef Py_DEBUG
648 char *before;
649#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -0800650
Ethan Furmanb95b5612015-01-23 20:05:18 -0800651 fmt++;
652 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200653 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800654 Py_ssize_t keylen;
655 PyObject *key;
656 int pcount = 1;
657
658 if (dict == NULL) {
659 PyErr_SetString(PyExc_TypeError,
660 "format requires a mapping");
661 goto error;
662 }
663 ++fmt;
664 --fmtcnt;
665 keystart = fmt;
666 /* Skip over balanced parentheses */
667 while (pcount > 0 && --fmtcnt >= 0) {
668 if (*fmt == ')')
669 --pcount;
670 else if (*fmt == '(')
671 ++pcount;
672 fmt++;
673 }
674 keylen = fmt - keystart - 1;
675 if (fmtcnt < 0 || pcount > 0) {
676 PyErr_SetString(PyExc_ValueError,
677 "incomplete format key");
678 goto error;
679 }
680 key = PyBytes_FromStringAndSize(keystart,
681 keylen);
682 if (key == NULL)
683 goto error;
684 if (args_owned) {
685 Py_DECREF(args);
686 args_owned = 0;
687 }
688 args = PyObject_GetItem(dict, key);
689 Py_DECREF(key);
690 if (args == NULL) {
691 goto error;
692 }
693 args_owned = 1;
694 arglen = -1;
695 argidx = -2;
696 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200697
698 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800699 while (--fmtcnt >= 0) {
700 switch (c = *fmt++) {
701 case '-': flags |= F_LJUST; continue;
702 case '+': flags |= F_SIGN; continue;
703 case ' ': flags |= F_BLANK; continue;
704 case '#': flags |= F_ALT; continue;
705 case '0': flags |= F_ZERO; continue;
706 }
707 break;
708 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200709
710 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800711 if (c == '*') {
712 v = getnextarg(args, arglen, &argidx);
713 if (v == NULL)
714 goto error;
715 if (!PyLong_Check(v)) {
716 PyErr_SetString(PyExc_TypeError,
717 "* wants int");
718 goto error;
719 }
720 width = PyLong_AsSsize_t(v);
721 if (width == -1 && PyErr_Occurred())
722 goto error;
723 if (width < 0) {
724 flags |= F_LJUST;
725 width = -width;
726 }
727 if (--fmtcnt >= 0)
728 c = *fmt++;
729 }
730 else if (c >= 0 && isdigit(c)) {
731 width = c - '0';
732 while (--fmtcnt >= 0) {
733 c = Py_CHARMASK(*fmt++);
734 if (!isdigit(c))
735 break;
736 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
737 PyErr_SetString(
738 PyExc_ValueError,
739 "width too big");
740 goto error;
741 }
742 width = width*10 + (c - '0');
743 }
744 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200745
746 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800747 if (c == '.') {
748 prec = 0;
749 if (--fmtcnt >= 0)
750 c = *fmt++;
751 if (c == '*') {
752 v = getnextarg(args, arglen, &argidx);
753 if (v == NULL)
754 goto error;
755 if (!PyLong_Check(v)) {
756 PyErr_SetString(
757 PyExc_TypeError,
758 "* wants int");
759 goto error;
760 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200761 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800762 if (prec == -1 && PyErr_Occurred())
763 goto error;
764 if (prec < 0)
765 prec = 0;
766 if (--fmtcnt >= 0)
767 c = *fmt++;
768 }
769 else if (c >= 0 && isdigit(c)) {
770 prec = c - '0';
771 while (--fmtcnt >= 0) {
772 c = Py_CHARMASK(*fmt++);
773 if (!isdigit(c))
774 break;
775 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
776 PyErr_SetString(
777 PyExc_ValueError,
778 "prec too big");
779 goto error;
780 }
781 prec = prec*10 + (c - '0');
782 }
783 }
784 } /* prec */
785 if (fmtcnt >= 0) {
786 if (c == 'h' || c == 'l' || c == 'L') {
787 if (--fmtcnt >= 0)
788 c = *fmt++;
789 }
790 }
791 if (fmtcnt < 0) {
792 PyErr_SetString(PyExc_ValueError,
793 "incomplete format");
794 goto error;
795 }
796 if (c != '%') {
797 v = getnextarg(args, arglen, &argidx);
798 if (v == NULL)
799 goto error;
800 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200801
802 if (fmtcnt < 0) {
803 /* last writer: disable writer overallocation */
804 writer.overallocate = 0;
805 }
806
Ethan Furmanb95b5612015-01-23 20:05:18 -0800807 sign = 0;
808 fill = ' ';
809 switch (c) {
810 case '%':
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200811 *res++ = '%';
812 continue;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200813
Ethan Furman62e977f2015-03-11 08:17:00 -0700814 case 'r':
815 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800816 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200817 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800818 if (temp == NULL)
819 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200820 assert(PyUnicode_IS_ASCII(temp));
821 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
822 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800823 if (prec >= 0 && len > prec)
824 len = prec;
825 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200826
Ethan Furmanb95b5612015-01-23 20:05:18 -0800827 case 's':
828 // %s is only for 2/3 code; 3 only code should use %b
829 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200830 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800831 if (temp == NULL)
832 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800833 if (prec >= 0 && len > prec)
834 len = prec;
835 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200836
Ethan Furmanb95b5612015-01-23 20:05:18 -0800837 case 'i':
838 case 'd':
839 case 'u':
840 case 'o':
841 case 'x':
842 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200843 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200844 && width == -1 && prec == -1
845 && !(flags & (F_SIGN | F_BLANK))
846 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200847 {
848 /* Fast path */
849 int alternate = flags & F_ALT;
850 int base;
851
852 switch(c)
853 {
854 default:
855 assert(0 && "'type' not in [diuoxX]");
856 case 'd':
857 case 'i':
858 case 'u':
859 base = 10;
860 break;
861 case 'o':
862 base = 8;
863 break;
864 case 'x':
865 case 'X':
866 base = 16;
867 break;
868 }
869
870 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200871 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200872 res = _PyLong_FormatBytesWriter(&writer, res,
873 v, base, alternate);
874 if (res == NULL)
875 goto error;
876 continue;
877 }
878
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300879 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200880 if (!temp)
881 goto error;
882 assert(PyUnicode_IS_ASCII(temp));
883 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
884 len = PyUnicode_GET_LENGTH(temp);
885 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800886 if (flags & F_ZERO)
887 fill = '0';
888 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200889
Ethan Furmanb95b5612015-01-23 20:05:18 -0800890 case 'e':
891 case 'E':
892 case 'f':
893 case 'F':
894 case 'g':
895 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200896 if (width == -1 && prec == -1
897 && !(flags & (F_SIGN | F_BLANK)))
898 {
899 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200900 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200901 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200902 if (res == NULL)
903 goto error;
904 continue;
905 }
906
Victor Stinnerad771582015-10-09 12:38:53 +0200907 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800908 goto error;
909 pbuf = PyBytes_AS_STRING(temp);
910 len = PyBytes_GET_SIZE(temp);
911 sign = 1;
912 if (flags & F_ZERO)
913 fill = '0';
914 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200915
Ethan Furmanb95b5612015-01-23 20:05:18 -0800916 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200917 pbuf = &onechar;
918 len = byte_converter(v, &onechar);
919 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800920 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200921 if (width == -1) {
922 /* Fast path */
923 *res++ = onechar;
924 continue;
925 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800926 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200927
Ethan Furmanb95b5612015-01-23 20:05:18 -0800928 default:
929 PyErr_Format(PyExc_ValueError,
930 "unsupported format character '%c' (0x%x) "
931 "at index %zd",
932 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200933 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800934 goto error;
935 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200936
Ethan Furmanb95b5612015-01-23 20:05:18 -0800937 if (sign) {
938 if (*pbuf == '-' || *pbuf == '+') {
939 sign = *pbuf++;
940 len--;
941 }
942 else if (flags & F_SIGN)
943 sign = '+';
944 else if (flags & F_BLANK)
945 sign = ' ';
946 else
947 sign = 0;
948 }
949 if (width < len)
950 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200951
952 alloc = width;
953 if (sign != 0 && len == width)
954 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200955 /* 2: size preallocated for %s */
956 if (alloc > 2) {
957 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200958 if (res == NULL)
959 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800960 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200961#ifdef Py_DEBUG
962 before = res;
963#endif
964
965 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800966 if (sign) {
967 if (fill != ' ')
968 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800969 if (width > len)
970 width--;
971 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200972
973 /* Write the numeric prefix for "x", "X" and "o" formats
974 if the alternate form is used.
975 For example, write "0x" for the "%#x" format. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800976 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
977 assert(pbuf[0] == '0');
978 assert(pbuf[1] == c);
979 if (fill != ' ') {
980 *res++ = *pbuf++;
981 *res++ = *pbuf++;
982 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800983 width -= 2;
984 if (width < 0)
985 width = 0;
986 len -= 2;
987 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200988
989 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800990 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200991 memset(res, fill, width - len);
992 res += (width - len);
993 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800994 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200995
996 /* If padding with spaces: write sign if needed and/or numeric
997 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800998 if (fill == ' ') {
999 if (sign)
1000 *res++ = sign;
1001 if ((flags & F_ALT) &&
1002 (c == 'x' || c == 'X')) {
1003 assert(pbuf[0] == '0');
1004 assert(pbuf[1] == c);
1005 *res++ = *pbuf++;
1006 *res++ = *pbuf++;
1007 }
1008 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001009
1010 /* Copy bytes */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001011 Py_MEMCPY(res, pbuf, len);
1012 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001013
1014 /* Pad right with the fill character if needed */
1015 if (width > len) {
1016 memset(res, ' ', width - len);
1017 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001018 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001019
Ethan Furmanb95b5612015-01-23 20:05:18 -08001020 if (dict && (argidx < arglen) && c != '%') {
1021 PyErr_SetString(PyExc_TypeError,
1022 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001023 Py_XDECREF(temp);
1024 goto error;
1025 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001026 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001027
1028#ifdef Py_DEBUG
1029 /* check that we computed the exact size for this write */
1030 assert((res - before) == alloc);
1031#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001032 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001033
1034 /* If overallocation was disabled, ensure that it was the last
1035 write. Otherwise, we missed an optimization */
Victor Stinner772b2b02015-10-14 09:56:53 +02001036 assert(writer.overallocate || fmtcnt < 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001037 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001038
Ethan Furmanb95b5612015-01-23 20:05:18 -08001039 if (argidx < arglen && !dict) {
1040 PyErr_SetString(PyExc_TypeError,
1041 "not all arguments converted during bytes formatting");
1042 goto error;
1043 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001044
Ethan Furmanb95b5612015-01-23 20:05:18 -08001045 if (args_owned) {
1046 Py_DECREF(args);
1047 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001048 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001049
1050 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001051 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001052 if (args_owned) {
1053 Py_DECREF(args);
1054 }
1055 return NULL;
1056}
1057
1058/* =-= */
1059
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001060static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001061bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001062{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001063 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001064}
1065
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001066/* Unescape a backslash-escaped string. If unicode is non-zero,
1067 the string is a u-literal. If recode_encoding is non-zero,
1068 the string is UTF-8 encoded and should be re-encoded in the
1069 specified encoding. */
1070
Victor Stinner2ec80632015-10-14 13:32:13 +02001071static char *
1072_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1073 const char *errors, const char *recode_encoding,
1074 _PyBytesWriter *writer, char *p)
1075{
1076 PyObject *u, *w;
1077 const char* t;
1078
1079 t = *s;
1080 /* Decode non-ASCII bytes as UTF-8. */
1081 while (t < end && (*t & 0x80))
1082 t++;
1083 u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1084 if (u == NULL)
1085 return NULL;
1086
1087 /* Recode them in target encoding. */
1088 w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1089 Py_DECREF(u);
1090 if (w == NULL)
1091 return NULL;
1092 assert(PyBytes_Check(w));
1093
1094 /* Append bytes to output buffer. */
1095 writer->min_size--; /* substract 1 preallocated byte */
1096 p = _PyBytesWriter_WriteBytes(writer, p,
1097 PyBytes_AS_STRING(w),
1098 PyBytes_GET_SIZE(w));
1099 Py_DECREF(w);
1100 if (p == NULL)
1101 return NULL;
1102
1103 *s = t;
1104 return p;
1105}
1106
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001107PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 Py_ssize_t len,
1109 const char *errors,
1110 Py_ssize_t unicode,
1111 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001112{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001113 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001114 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001115 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001116 _PyBytesWriter writer;
1117
1118 _PyBytesWriter_Init(&writer);
1119
1120 p = _PyBytesWriter_Alloc(&writer, len);
1121 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001122 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001123 writer.overallocate = 1;
1124
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001125 end = s + len;
1126 while (s < end) {
1127 if (*s != '\\') {
1128 non_esc:
Victor Stinner2ec80632015-10-14 13:32:13 +02001129 if (!(recode_encoding && (*s & 0x80))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001130 *p++ = *s++;
1131 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001132 else {
1133 /* non-ASCII character and need to recode */
1134 p = _PyBytes_DecodeEscapeRecode(&s, end,
1135 errors, recode_encoding,
1136 &writer, p);
1137 if (p == NULL)
1138 goto failed;
1139 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001140 continue;
1141 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001143 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001144 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001145 PyErr_SetString(PyExc_ValueError,
1146 "Trailing \\ in string");
1147 goto failed;
1148 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001149
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001150 switch (*s++) {
1151 /* XXX This assumes ASCII! */
1152 case '\n': break;
1153 case '\\': *p++ = '\\'; break;
1154 case '\'': *p++ = '\''; break;
1155 case '\"': *p++ = '\"'; break;
1156 case 'b': *p++ = '\b'; break;
1157 case 'f': *p++ = '\014'; break; /* FF */
1158 case 't': *p++ = '\t'; break;
1159 case 'n': *p++ = '\n'; break;
1160 case 'r': *p++ = '\r'; break;
1161 case 'v': *p++ = '\013'; break; /* VT */
1162 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1163 case '0': case '1': case '2': case '3':
1164 case '4': case '5': case '6': case '7':
1165 c = s[-1] - '0';
1166 if (s < end && '0' <= *s && *s <= '7') {
1167 c = (c<<3) + *s++ - '0';
1168 if (s < end && '0' <= *s && *s <= '7')
1169 c = (c<<3) + *s++ - '0';
1170 }
1171 *p++ = c;
1172 break;
1173 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001174 if (s+1 < end) {
1175 int digit1, digit2;
1176 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1177 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1178 if (digit1 < 16 && digit2 < 16) {
1179 *p++ = (unsigned char)((digit1 << 4) + digit2);
1180 s += 2;
1181 break;
1182 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001183 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001184 /* invalid hexadecimal digits */
1185
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001186 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001187 PyErr_Format(PyExc_ValueError,
1188 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001189 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001190 goto failed;
1191 }
1192 if (strcmp(errors, "replace") == 0) {
1193 *p++ = '?';
1194 } else if (strcmp(errors, "ignore") == 0)
1195 /* do nothing */;
1196 else {
1197 PyErr_Format(PyExc_ValueError,
1198 "decoding error; unknown "
1199 "error handling code: %.400s",
1200 errors);
1201 goto failed;
1202 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001203 /* skip \x */
1204 if (s < end && Py_ISXDIGIT(s[0]))
1205 s++; /* and a hexdigit */
1206 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001207
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001208 default:
1209 *p++ = '\\';
1210 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001211 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001212 UTF-8 bytes may follow. */
1213 }
1214 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001215
1216 return _PyBytesWriter_Finish(&writer, p);
1217
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001218 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001219 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001220 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001221}
1222
1223/* -------------------------------------------------------------------- */
1224/* object api */
1225
1226Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001227PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001228{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001229 if (!PyBytes_Check(op)) {
1230 PyErr_Format(PyExc_TypeError,
1231 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1232 return -1;
1233 }
1234 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001235}
1236
1237char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001238PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001239{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001240 if (!PyBytes_Check(op)) {
1241 PyErr_Format(PyExc_TypeError,
1242 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1243 return NULL;
1244 }
1245 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001246}
1247
1248int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001249PyBytes_AsStringAndSize(PyObject *obj,
1250 char **s,
1251 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001252{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001253 if (s == NULL) {
1254 PyErr_BadInternalCall();
1255 return -1;
1256 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001257
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001258 if (!PyBytes_Check(obj)) {
1259 PyErr_Format(PyExc_TypeError,
1260 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1261 return -1;
1262 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001263
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001264 *s = PyBytes_AS_STRING(obj);
1265 if (len != NULL)
1266 *len = PyBytes_GET_SIZE(obj);
1267 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001268 PyErr_SetString(PyExc_ValueError,
1269 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001270 return -1;
1271 }
1272 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001273}
Neal Norwitz6968b052007-02-27 19:02:19 +00001274
1275/* -------------------------------------------------------------------- */
1276/* Methods */
1277
Eric Smith0923d1d2009-04-16 20:16:10 +00001278#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001279
1280#include "stringlib/fastsearch.h"
1281#include "stringlib/count.h"
1282#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001283#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001284#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001285#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001286#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001287
Eric Smith0f78bff2009-11-30 01:01:42 +00001288#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001289
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001290PyObject *
1291PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001292{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001293 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001294 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001295 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001296 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001297 unsigned char quote, *s, *p;
1298
1299 /* Compute size of output string */
1300 squotes = dquotes = 0;
1301 newsize = 3; /* b'' */
1302 s = (unsigned char*)op->ob_sval;
1303 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001304 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001305 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001306 case '\'': squotes++; break;
1307 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001308 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001309 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001310 default:
1311 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001312 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001313 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001314 if (newsize > PY_SSIZE_T_MAX - incr)
1315 goto overflow;
1316 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001317 }
1318 quote = '\'';
1319 if (smartquotes && squotes && !dquotes)
1320 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001321 if (squotes && quote == '\'') {
1322 if (newsize > PY_SSIZE_T_MAX - squotes)
1323 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001324 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001325 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001326
1327 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001328 if (v == NULL) {
1329 return NULL;
1330 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001331 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001332
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001333 *p++ = 'b', *p++ = quote;
1334 for (i = 0; i < length; i++) {
1335 unsigned char c = op->ob_sval[i];
1336 if (c == quote || c == '\\')
1337 *p++ = '\\', *p++ = c;
1338 else if (c == '\t')
1339 *p++ = '\\', *p++ = 't';
1340 else if (c == '\n')
1341 *p++ = '\\', *p++ = 'n';
1342 else if (c == '\r')
1343 *p++ = '\\', *p++ = 'r';
1344 else if (c < ' ' || c >= 0x7f) {
1345 *p++ = '\\';
1346 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001347 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1348 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001350 else
1351 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001352 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001353 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001354 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001355 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001356
1357 overflow:
1358 PyErr_SetString(PyExc_OverflowError,
1359 "bytes object is too large to make repr");
1360 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001361}
1362
Neal Norwitz6968b052007-02-27 19:02:19 +00001363static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001364bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001365{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001366 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001367}
1368
Neal Norwitz6968b052007-02-27 19:02:19 +00001369static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001370bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001371{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001372 if (Py_BytesWarningFlag) {
1373 if (PyErr_WarnEx(PyExc_BytesWarning,
1374 "str() on a bytes instance", 1))
1375 return NULL;
1376 }
1377 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001378}
1379
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001380static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001381bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001382{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001383 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001384}
Neal Norwitz6968b052007-02-27 19:02:19 +00001385
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001386/* This is also used by PyBytes_Concat() */
1387static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001388bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001389{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 Py_ssize_t size;
1391 Py_buffer va, vb;
1392 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001393
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 va.len = -1;
1395 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001396 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1397 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001398 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1399 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1400 goto done;
1401 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001402
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001403 /* Optimize end cases */
1404 if (va.len == 0 && PyBytes_CheckExact(b)) {
1405 result = b;
1406 Py_INCREF(result);
1407 goto done;
1408 }
1409 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1410 result = a;
1411 Py_INCREF(result);
1412 goto done;
1413 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001414
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 size = va.len + vb.len;
1416 if (size < 0) {
1417 PyErr_NoMemory();
1418 goto done;
1419 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001420
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001421 result = PyBytes_FromStringAndSize(NULL, size);
1422 if (result != NULL) {
1423 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1424 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1425 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001426
1427 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 if (va.len != -1)
1429 PyBuffer_Release(&va);
1430 if (vb.len != -1)
1431 PyBuffer_Release(&vb);
1432 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001433}
Neal Norwitz6968b052007-02-27 19:02:19 +00001434
1435static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001436bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001437{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001438 Py_ssize_t i;
1439 Py_ssize_t j;
1440 Py_ssize_t size;
1441 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 size_t nbytes;
1443 if (n < 0)
1444 n = 0;
1445 /* watch out for overflows: the size can overflow int,
1446 * and the # of bytes needed can overflow size_t
1447 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001448 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001449 PyErr_SetString(PyExc_OverflowError,
1450 "repeated bytes are too long");
1451 return NULL;
1452 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001453 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001454 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1455 Py_INCREF(a);
1456 return (PyObject *)a;
1457 }
1458 nbytes = (size_t)size;
1459 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1460 PyErr_SetString(PyExc_OverflowError,
1461 "repeated bytes are too long");
1462 return NULL;
1463 }
1464 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1465 if (op == NULL)
1466 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001467 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001468 op->ob_shash = -1;
1469 op->ob_sval[size] = '\0';
1470 if (Py_SIZE(a) == 1 && n > 0) {
1471 memset(op->ob_sval, a->ob_sval[0] , n);
1472 return (PyObject *) op;
1473 }
1474 i = 0;
1475 if (i < size) {
1476 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1477 i = Py_SIZE(a);
1478 }
1479 while (i < size) {
1480 j = (i <= size-i) ? i : size-i;
1481 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1482 i += j;
1483 }
1484 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001485}
1486
Guido van Rossum98297ee2007-11-06 21:34:58 +00001487static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001488bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001489{
1490 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1491 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001492 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +00001493 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +00001494 PyErr_Clear();
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001495 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
Antoine Pitroud1188562010-06-09 16:38:55 +00001496 return -1;
1497 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
1498 varg.buf, varg.len, 0);
1499 PyBuffer_Release(&varg);
1500 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001501 }
1502 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001503 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1504 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001505 }
1506
Antoine Pitrou0010d372010-08-15 17:12:55 +00001507 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001508}
1509
Neal Norwitz6968b052007-02-27 19:02:19 +00001510static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001511bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001512{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001513 if (i < 0 || i >= Py_SIZE(a)) {
1514 PyErr_SetString(PyExc_IndexError, "index out of range");
1515 return NULL;
1516 }
1517 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001518}
1519
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001520Py_LOCAL(int)
1521bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1522{
1523 int cmp;
1524 Py_ssize_t len;
1525
1526 len = Py_SIZE(a);
1527 if (Py_SIZE(b) != len)
1528 return 0;
1529
1530 if (a->ob_sval[0] != b->ob_sval[0])
1531 return 0;
1532
1533 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1534 return (cmp == 0);
1535}
1536
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001537static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001538bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001539{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001540 int c;
1541 Py_ssize_t len_a, len_b;
1542 Py_ssize_t min_len;
1543 PyObject *result;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001544 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001545
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001546 /* Make sure both arguments are strings. */
1547 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001548 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001549 rc = PyObject_IsInstance((PyObject*)a,
1550 (PyObject*)&PyUnicode_Type);
1551 if (!rc)
1552 rc = PyObject_IsInstance((PyObject*)b,
1553 (PyObject*)&PyUnicode_Type);
1554 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001555 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001556 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001557 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001558 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001559 return NULL;
1560 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001561 else {
1562 rc = PyObject_IsInstance((PyObject*)a,
1563 (PyObject*)&PyLong_Type);
1564 if (!rc)
1565 rc = PyObject_IsInstance((PyObject*)b,
1566 (PyObject*)&PyLong_Type);
1567 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001568 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001569 if (rc) {
1570 if (PyErr_WarnEx(PyExc_BytesWarning,
1571 "Comparison between bytes and int", 1))
1572 return NULL;
1573 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001574 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001575 }
1576 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001577 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001578 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001579 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001580 case Py_EQ:
1581 case Py_LE:
1582 case Py_GE:
1583 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001584 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001585 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001586 case Py_NE:
1587 case Py_LT:
1588 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001589 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001590 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001591 default:
1592 PyErr_BadArgument();
1593 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001594 }
1595 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001596 else if (op == Py_EQ || op == Py_NE) {
1597 int eq = bytes_compare_eq(a, b);
1598 eq ^= (op == Py_NE);
1599 result = eq ? Py_True : Py_False;
1600 }
1601 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001602 len_a = Py_SIZE(a);
1603 len_b = Py_SIZE(b);
1604 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001605 if (min_len > 0) {
1606 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001607 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001608 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001609 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001610 else
1611 c = 0;
1612 if (c == 0)
1613 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1614 switch (op) {
1615 case Py_LT: c = c < 0; break;
1616 case Py_LE: c = c <= 0; break;
1617 case Py_GT: c = c > 0; break;
1618 case Py_GE: c = c >= 0; break;
1619 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001620 PyErr_BadArgument();
1621 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001622 }
1623 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001624 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001625
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001626 Py_INCREF(result);
1627 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001628}
1629
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001630static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001631bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001632{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001633 if (a->ob_shash == -1) {
1634 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001635 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001636 }
1637 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001638}
1639
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001640static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001641bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001642{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643 if (PyIndex_Check(item)) {
1644 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1645 if (i == -1 && PyErr_Occurred())
1646 return NULL;
1647 if (i < 0)
1648 i += PyBytes_GET_SIZE(self);
1649 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1650 PyErr_SetString(PyExc_IndexError,
1651 "index out of range");
1652 return NULL;
1653 }
1654 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1655 }
1656 else if (PySlice_Check(item)) {
1657 Py_ssize_t start, stop, step, slicelength, cur, i;
1658 char* source_buf;
1659 char* result_buf;
1660 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001661
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001662 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001663 PyBytes_GET_SIZE(self),
1664 &start, &stop, &step, &slicelength) < 0) {
1665 return NULL;
1666 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001667
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001668 if (slicelength <= 0) {
1669 return PyBytes_FromStringAndSize("", 0);
1670 }
1671 else if (start == 0 && step == 1 &&
1672 slicelength == PyBytes_GET_SIZE(self) &&
1673 PyBytes_CheckExact(self)) {
1674 Py_INCREF(self);
1675 return (PyObject *)self;
1676 }
1677 else if (step == 1) {
1678 return PyBytes_FromStringAndSize(
1679 PyBytes_AS_STRING(self) + start,
1680 slicelength);
1681 }
1682 else {
1683 source_buf = PyBytes_AS_STRING(self);
1684 result = PyBytes_FromStringAndSize(NULL, slicelength);
1685 if (result == NULL)
1686 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001687
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001688 result_buf = PyBytes_AS_STRING(result);
1689 for (cur = start, i = 0; i < slicelength;
1690 cur += step, i++) {
1691 result_buf[i] = source_buf[cur];
1692 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001693
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001694 return result;
1695 }
1696 }
1697 else {
1698 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001699 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001700 Py_TYPE(item)->tp_name);
1701 return NULL;
1702 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001703}
1704
1705static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001706bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001707{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001708 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1709 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001710}
1711
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001712static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001713 (lenfunc)bytes_length, /*sq_length*/
1714 (binaryfunc)bytes_concat, /*sq_concat*/
1715 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1716 (ssizeargfunc)bytes_item, /*sq_item*/
1717 0, /*sq_slice*/
1718 0, /*sq_ass_item*/
1719 0, /*sq_ass_slice*/
1720 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001721};
1722
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001723static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001724 (lenfunc)bytes_length,
1725 (binaryfunc)bytes_subscript,
1726 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001727};
1728
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001729static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001730 (getbufferproc)bytes_buffer_getbuffer,
1731 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001732};
1733
1734
1735#define LEFTSTRIP 0
1736#define RIGHTSTRIP 1
1737#define BOTHSTRIP 2
1738
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001739/*[clinic input]
1740bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001741
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001742 sep: object = None
1743 The delimiter according which to split the bytes.
1744 None (the default value) means split on ASCII whitespace characters
1745 (space, tab, return, newline, formfeed, vertical tab).
1746 maxsplit: Py_ssize_t = -1
1747 Maximum number of splits to do.
1748 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001749
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001750Return a list of the sections in the bytes, using sep as the delimiter.
1751[clinic start generated code]*/
1752
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001753static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001754bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001755/*[clinic end generated code: output=8bde44dacb36ef2e input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001756{
1757 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001758 const char *s = PyBytes_AS_STRING(self), *sub;
1759 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001760 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001761
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001762 if (maxsplit < 0)
1763 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001764 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001765 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001766 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001767 return NULL;
1768 sub = vsub.buf;
1769 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001770
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001771 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1772 PyBuffer_Release(&vsub);
1773 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001774}
1775
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001776/*[clinic input]
1777bytes.partition
1778
1779 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001780 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001781 /
1782
1783Partition the bytes into three parts using the given separator.
1784
1785This will search for the separator sep in the bytes. If the separator is found,
1786returns a 3-tuple containing the part before the separator, the separator
1787itself, and the part after it.
1788
1789If the separator is not found, returns a 3-tuple containing the original bytes
1790object and two empty bytes objects.
1791[clinic start generated code]*/
1792
Neal Norwitz6968b052007-02-27 19:02:19 +00001793static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001794bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001795/*[clinic end generated code: output=f532b392a17ff695 input=bc855dc63ca949de]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001796{
Neal Norwitz6968b052007-02-27 19:02:19 +00001797 return stringlib_partition(
1798 (PyObject*) self,
1799 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001800 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001801 );
1802}
1803
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001804/*[clinic input]
1805bytes.rpartition
1806
1807 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001808 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001809 /
1810
1811Partition the bytes into three parts using the given separator.
1812
1813This will search for the separator sep in the bytes, starting and the end. If
1814the separator is found, returns a 3-tuple containing the part before the
1815separator, the separator itself, and the part after it.
1816
1817If the separator is not found, returns a 3-tuple containing two empty bytes
1818objects and the original bytes object.
1819[clinic start generated code]*/
1820
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001821static PyObject *
1822bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001823/*[clinic end generated code: output=191b114cbb028e50 input=6588fff262a9170e]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001824{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001825 return stringlib_rpartition(
1826 (PyObject*) self,
1827 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001828 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001829 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001830}
1831
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001832/*[clinic input]
1833bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001834
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001835Return a list of the sections in the bytes, using sep as the delimiter.
1836
1837Splitting is done starting at the end of the bytes and working to the front.
1838[clinic start generated code]*/
1839
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001840static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001841bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001842/*[clinic end generated code: output=0b6570b977911d88 input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001843{
1844 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001845 const char *s = PyBytes_AS_STRING(self), *sub;
1846 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001847 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001848
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001849 if (maxsplit < 0)
1850 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001851 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001852 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001853 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001854 return NULL;
1855 sub = vsub.buf;
1856 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001857
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001858 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1859 PyBuffer_Release(&vsub);
1860 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001861}
1862
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001863
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001864/*[clinic input]
1865bytes.join
1866
1867 iterable_of_bytes: object
1868 /
1869
1870Concatenate any number of bytes objects.
1871
1872The bytes whose method is called is inserted in between each pair.
1873
1874The result is returned as a new bytes object.
1875
1876Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1877[clinic start generated code]*/
1878
Neal Norwitz6968b052007-02-27 19:02:19 +00001879static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001880bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001881/*[clinic end generated code: output=634aff14764ff997 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001882{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001883 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001884}
1885
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001886PyObject *
1887_PyBytes_Join(PyObject *sep, PyObject *x)
1888{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001889 assert(sep != NULL && PyBytes_Check(sep));
1890 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001891 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001892}
1893
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001894/* helper macro to fixup start/end slice values */
1895#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001896 if (end > len) \
1897 end = len; \
1898 else if (end < 0) { \
1899 end += len; \
1900 if (end < 0) \
1901 end = 0; \
1902 } \
1903 if (start < 0) { \
1904 start += len; \
1905 if (start < 0) \
1906 start = 0; \
1907 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001908
1909Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001910bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001911{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001912 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001913 char byte;
1914 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001915 const char *sub;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001916 Py_ssize_t len, sub_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001917 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001918 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001919
Antoine Pitrouac65d962011-10-20 23:54:17 +02001920 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1921 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001922 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001923
Antoine Pitrouac65d962011-10-20 23:54:17 +02001924 if (subobj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001925 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001926 return -2;
1927
1928 sub = subbuf.buf;
1929 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001930 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001931 else {
1932 sub = &byte;
1933 sub_len = 1;
1934 }
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001935 len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001936
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001937 ADJUST_INDICES(start, end, len);
1938 if (end - start < sub_len)
1939 res = -1;
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001940 else if (sub_len == 1
1941#ifndef HAVE_MEMRCHR
1942 && dir > 0
1943#endif
1944 ) {
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001945 unsigned char needle = *sub;
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001946 int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001947 res = stringlib_fastsearch_memchr_1char(
1948 PyBytes_AS_STRING(self) + start, end - start,
Serhiy Storchakad92d4ef2015-07-20 22:58:02 +03001949 needle, needle, mode);
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001950 if (res >= 0)
1951 res += start;
1952 }
1953 else {
1954 if (dir > 0)
1955 res = stringlib_find_slice(
1956 PyBytes_AS_STRING(self), len,
1957 sub, sub_len, start, end);
1958 else
1959 res = stringlib_rfind_slice(
1960 PyBytes_AS_STRING(self), len,
1961 sub, sub_len, start, end);
1962 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001963
1964 if (subobj)
1965 PyBuffer_Release(&subbuf);
1966
1967 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001968}
1969
1970
1971PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001972"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001973\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001974Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001975such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001976arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001977\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001978Return -1 on failure.");
1979
Neal Norwitz6968b052007-02-27 19:02:19 +00001980static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001981bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001982{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001983 Py_ssize_t result = bytes_find_internal(self, args, +1);
1984 if (result == -2)
1985 return NULL;
1986 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001987}
1988
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001989
1990PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001991"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001992\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001993Like B.find() but raise ValueError when the substring is not found.");
1994
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001995static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001996bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001997{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001998 Py_ssize_t result = bytes_find_internal(self, args, +1);
1999 if (result == -2)
2000 return NULL;
2001 if (result == -1) {
2002 PyErr_SetString(PyExc_ValueError,
2003 "substring not found");
2004 return NULL;
2005 }
2006 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00002007}
2008
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002009
2010PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002011"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002012\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002013Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08002014such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002015arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002016\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002017Return -1 on failure.");
2018
Neal Norwitz6968b052007-02-27 19:02:19 +00002019static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002020bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00002021{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002022 Py_ssize_t result = bytes_find_internal(self, args, -1);
2023 if (result == -2)
2024 return NULL;
2025 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00002026}
2027
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002028
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002029PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002030"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002031\n\
2032Like B.rfind() but raise ValueError when the substring is not found.");
2033
2034static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002035bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002036{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002037 Py_ssize_t result = bytes_find_internal(self, args, -1);
2038 if (result == -2)
2039 return NULL;
2040 if (result == -1) {
2041 PyErr_SetString(PyExc_ValueError,
2042 "substring not found");
2043 return NULL;
2044 }
2045 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002046}
2047
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002048
2049Py_LOCAL_INLINE(PyObject *)
2050do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002051{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002052 Py_buffer vsep;
2053 char *s = PyBytes_AS_STRING(self);
2054 Py_ssize_t len = PyBytes_GET_SIZE(self);
2055 char *sep;
2056 Py_ssize_t seplen;
2057 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002058
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002059 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002060 return NULL;
2061 sep = vsep.buf;
2062 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002063
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002064 i = 0;
2065 if (striptype != RIGHTSTRIP) {
2066 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2067 i++;
2068 }
2069 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002070
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002071 j = len;
2072 if (striptype != LEFTSTRIP) {
2073 do {
2074 j--;
2075 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2076 j++;
2077 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002079 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002080
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002081 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2082 Py_INCREF(self);
2083 return (PyObject*)self;
2084 }
2085 else
2086 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002087}
2088
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002089
2090Py_LOCAL_INLINE(PyObject *)
2091do_strip(PyBytesObject *self, int striptype)
2092{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002093 char *s = PyBytes_AS_STRING(self);
2094 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002095
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002096 i = 0;
2097 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00002098 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002099 i++;
2100 }
2101 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002102
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002103 j = len;
2104 if (striptype != LEFTSTRIP) {
2105 do {
2106 j--;
David Malcolm96960882010-11-05 17:23:41 +00002107 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002108 j++;
2109 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002110
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002111 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2112 Py_INCREF(self);
2113 return (PyObject*)self;
2114 }
2115 else
2116 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002117}
2118
2119
2120Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002121do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002122{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002123 if (bytes != NULL && bytes != Py_None) {
2124 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002125 }
2126 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002127}
2128
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002129/*[clinic input]
2130bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002131
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002132 self: self(type="PyBytesObject *")
2133 bytes: object = None
2134 /
2135
2136Strip leading and trailing bytes contained in the argument.
2137
2138If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2139[clinic start generated code]*/
2140
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002141static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002142bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002143/*[clinic end generated code: output=c7c228d3bd104a1b input=37daa5fad1395d95]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002144{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002145 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002146}
2147
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002148/*[clinic input]
2149bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002150
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002151 self: self(type="PyBytesObject *")
2152 bytes: object = None
2153 /
2154
2155Strip leading bytes contained in the argument.
2156
2157If the argument is omitted or None, strip leading ASCII whitespace.
2158[clinic start generated code]*/
2159
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002160static PyObject *
2161bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002162/*[clinic end generated code: output=28602e586f524e82 input=88811b09dfbc2988]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002163{
2164 return do_argstrip(self, LEFTSTRIP, bytes);
2165}
2166
2167/*[clinic input]
2168bytes.rstrip
2169
2170 self: self(type="PyBytesObject *")
2171 bytes: object = None
2172 /
2173
2174Strip trailing bytes contained in the argument.
2175
2176If the argument is omitted or None, strip trailing ASCII whitespace.
2177[clinic start generated code]*/
2178
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002179static PyObject *
2180bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002181/*[clinic end generated code: output=547e3815c95447da input=8f93c9cd361f0140]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002182{
2183 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002184}
Neal Norwitz6968b052007-02-27 19:02:19 +00002185
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002186
2187PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002188"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002189\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002190Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002191string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002192as in slice notation.");
2193
2194static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002195bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002196{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002197 PyObject *sub_obj;
2198 const char *str = PyBytes_AS_STRING(self), *sub;
2199 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02002200 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002201 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002202
Antoine Pitrouac65d962011-10-20 23:54:17 +02002203 Py_buffer vsub;
2204 PyObject *count_obj;
2205
2206 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
2207 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002208 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002209
Antoine Pitrouac65d962011-10-20 23:54:17 +02002210 if (sub_obj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002211 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02002212 return NULL;
2213
2214 sub = vsub.buf;
2215 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002216 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02002217 else {
2218 sub = &byte;
2219 sub_len = 1;
2220 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002221
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002222 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002223
Antoine Pitrouac65d962011-10-20 23:54:17 +02002224 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002225 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2226 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02002227
2228 if (sub_obj)
2229 PyBuffer_Release(&vsub);
2230
2231 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002232}
2233
2234
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002235/*[clinic input]
2236bytes.translate
2237
2238 self: self(type="PyBytesObject *")
Victor Stinner049e5092014-08-17 22:20:00 +02002239 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002240 Translation table, which must be a bytes object of length 256.
2241 [
2242 deletechars: object
2243 ]
2244 /
2245
2246Return a copy with each character mapped by the given translation table.
2247
2248All characters occurring in the optional argument deletechars are removed.
2249The remaining characters are mapped through the given translation table.
2250[clinic start generated code]*/
2251
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002252static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002253bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1,
2254 PyObject *deletechars)
2255/*[clinic end generated code: output=233df850eb50bf8d input=d8fa5519d7cc4be7]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002256{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002257 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002258 Py_buffer table_view = {NULL, NULL};
2259 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002260 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002261 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002262 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002263 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002264 Py_ssize_t inlen, tablen, dellen = 0;
2265 PyObject *result;
2266 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002267
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002268 if (PyBytes_Check(table)) {
2269 table_chars = PyBytes_AS_STRING(table);
2270 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002271 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002272 else if (table == Py_None) {
2273 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002274 tablen = 256;
2275 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002276 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002277 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002278 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002279 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002280 tablen = table_view.len;
2281 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002282
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002283 if (tablen != 256) {
2284 PyErr_SetString(PyExc_ValueError,
2285 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002286 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002287 return NULL;
2288 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002289
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002290 if (deletechars != NULL) {
2291 if (PyBytes_Check(deletechars)) {
2292 del_table_chars = PyBytes_AS_STRING(deletechars);
2293 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002294 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002295 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002296 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002297 PyBuffer_Release(&table_view);
2298 return NULL;
2299 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002300 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002301 dellen = del_table_view.len;
2302 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002303 }
2304 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002305 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002306 dellen = 0;
2307 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002308
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002309 inlen = PyBytes_GET_SIZE(input_obj);
2310 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002311 if (result == NULL) {
2312 PyBuffer_Release(&del_table_view);
2313 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002314 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002315 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002316 output_start = output = PyBytes_AsString(result);
2317 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002318
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002319 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002320 /* If no deletions are required, use faster code */
2321 for (i = inlen; --i >= 0; ) {
2322 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002323 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002324 changed = 1;
2325 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002326 if (!changed && PyBytes_CheckExact(input_obj)) {
2327 Py_INCREF(input_obj);
2328 Py_DECREF(result);
2329 result = input_obj;
2330 }
2331 PyBuffer_Release(&del_table_view);
2332 PyBuffer_Release(&table_view);
2333 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002334 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002335
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002336 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002337 for (i = 0; i < 256; i++)
2338 trans_table[i] = Py_CHARMASK(i);
2339 } else {
2340 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002341 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002342 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002343 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002344
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002345 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002346 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002347 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002348
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002349 for (i = inlen; --i >= 0; ) {
2350 c = Py_CHARMASK(*input++);
2351 if (trans_table[c] != -1)
2352 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2353 continue;
2354 changed = 1;
2355 }
2356 if (!changed && PyBytes_CheckExact(input_obj)) {
2357 Py_DECREF(result);
2358 Py_INCREF(input_obj);
2359 return input_obj;
2360 }
2361 /* Fix the size of the resulting string */
2362 if (inlen > 0)
2363 _PyBytes_Resize(&result, output - output_start);
2364 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002365}
2366
2367
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002368/*[clinic input]
2369
2370@staticmethod
2371bytes.maketrans
2372
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002373 frm: Py_buffer
2374 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002375 /
2376
2377Return a translation table useable for the bytes or bytearray translate method.
2378
2379The returned table will be one where each byte in frm is mapped to the byte at
2380the same position in to.
2381
2382The bytes objects frm and to must be of the same length.
2383[clinic start generated code]*/
2384
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002385static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002386bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002387/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002388{
2389 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002390}
2391
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002392/* find and count characters and substrings */
2393
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002394#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002395 ((char *)memchr((const void *)(target), c, target_len))
2396
2397/* String ops must return a string. */
2398/* If the object is subclass of string, create a copy */
2399Py_LOCAL(PyBytesObject *)
2400return_self(PyBytesObject *self)
2401{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002402 if (PyBytes_CheckExact(self)) {
2403 Py_INCREF(self);
2404 return self;
2405 }
2406 return (PyBytesObject *)PyBytes_FromStringAndSize(
2407 PyBytes_AS_STRING(self),
2408 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002409}
2410
2411Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00002412countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002413{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002414 Py_ssize_t count=0;
2415 const char *start=target;
2416 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002417
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002418 while ( (start=findchar(start, end-start, c)) != NULL ) {
2419 count++;
2420 if (count >= maxcount)
2421 break;
2422 start += 1;
2423 }
2424 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002425}
2426
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002427
2428/* Algorithms for different cases of string replacement */
2429
2430/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2431Py_LOCAL(PyBytesObject *)
2432replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002433 const char *to_s, Py_ssize_t to_len,
2434 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002435{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002436 char *self_s, *result_s;
2437 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002438 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002439 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002440
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002441 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002442
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002443 /* 1 at the end plus 1 after every character;
2444 count = min(maxcount, self_len + 1) */
2445 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002446 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002447 else
2448 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2449 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002450
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002451 /* Check for overflow */
2452 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002453 assert(count > 0);
2454 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002455 PyErr_SetString(PyExc_OverflowError,
2456 "replacement bytes are too long");
2457 return NULL;
2458 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002459 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002460
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002461 if (! (result = (PyBytesObject *)
2462 PyBytes_FromStringAndSize(NULL, result_len)) )
2463 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002464
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002465 self_s = PyBytes_AS_STRING(self);
2466 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002467
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002468 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002469
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002470 /* Lay the first one down (guaranteed this will occur) */
2471 Py_MEMCPY(result_s, to_s, to_len);
2472 result_s += to_len;
2473 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002474
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002475 for (i=0; i<count; i++) {
2476 *result_s++ = *self_s++;
2477 Py_MEMCPY(result_s, to_s, to_len);
2478 result_s += to_len;
2479 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002480
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002481 /* Copy the rest of the original string */
2482 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002483
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002484 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002485}
2486
2487/* Special case for deleting a single character */
2488/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2489Py_LOCAL(PyBytesObject *)
2490replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002491 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002492{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002493 char *self_s, *result_s;
2494 char *start, *next, *end;
2495 Py_ssize_t self_len, result_len;
2496 Py_ssize_t count;
2497 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002498
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002499 self_len = PyBytes_GET_SIZE(self);
2500 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002501
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002502 count = countchar(self_s, self_len, from_c, maxcount);
2503 if (count == 0) {
2504 return return_self(self);
2505 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002506
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002507 result_len = self_len - count; /* from_len == 1 */
2508 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002509
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002510 if ( (result = (PyBytesObject *)
2511 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2512 return NULL;
2513 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002514
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002515 start = self_s;
2516 end = self_s + self_len;
2517 while (count-- > 0) {
2518 next = findchar(start, end-start, from_c);
2519 if (next == NULL)
2520 break;
2521 Py_MEMCPY(result_s, start, next-start);
2522 result_s += (next-start);
2523 start = next+1;
2524 }
2525 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002526
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002527 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002528}
2529
2530/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2531
2532Py_LOCAL(PyBytesObject *)
2533replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002534 const char *from_s, Py_ssize_t from_len,
2535 Py_ssize_t maxcount) {
2536 char *self_s, *result_s;
2537 char *start, *next, *end;
2538 Py_ssize_t self_len, result_len;
2539 Py_ssize_t count, offset;
2540 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002541
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002542 self_len = PyBytes_GET_SIZE(self);
2543 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002544
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002545 count = stringlib_count(self_s, self_len,
2546 from_s, from_len,
2547 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002548
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002549 if (count == 0) {
2550 /* no matches */
2551 return return_self(self);
2552 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002553
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002554 result_len = self_len - (count * from_len);
2555 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002556
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002557 if ( (result = (PyBytesObject *)
2558 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2559 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002560
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002561 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002562
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002563 start = self_s;
2564 end = self_s + self_len;
2565 while (count-- > 0) {
2566 offset = stringlib_find(start, end-start,
2567 from_s, from_len,
2568 0);
2569 if (offset == -1)
2570 break;
2571 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002572
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002573 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002574
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002575 result_s += (next-start);
2576 start = next+from_len;
2577 }
2578 Py_MEMCPY(result_s, start, end-start);
2579 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002580}
2581
2582/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2583Py_LOCAL(PyBytesObject *)
2584replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002585 char from_c, char to_c,
2586 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002587{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002588 char *self_s, *result_s, *start, *end, *next;
2589 Py_ssize_t self_len;
2590 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002591
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002592 /* The result string will be the same size */
2593 self_s = PyBytes_AS_STRING(self);
2594 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002595
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002596 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002597
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002598 if (next == NULL) {
2599 /* No matches; return the original string */
2600 return return_self(self);
2601 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002602
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002603 /* Need to make a new string */
2604 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2605 if (result == NULL)
2606 return NULL;
2607 result_s = PyBytes_AS_STRING(result);
2608 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002609
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002610 /* change everything in-place, starting with this one */
2611 start = result_s + (next-self_s);
2612 *start = to_c;
2613 start++;
2614 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002615
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002616 while (--maxcount > 0) {
2617 next = findchar(start, end-start, from_c);
2618 if (next == NULL)
2619 break;
2620 *next = to_c;
2621 start = next+1;
2622 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002623
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002624 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002625}
2626
2627/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2628Py_LOCAL(PyBytesObject *)
2629replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002630 const char *from_s, Py_ssize_t from_len,
2631 const char *to_s, Py_ssize_t to_len,
2632 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002633{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002634 char *result_s, *start, *end;
2635 char *self_s;
2636 Py_ssize_t self_len, offset;
2637 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002638
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002639 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002640
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002641 self_s = PyBytes_AS_STRING(self);
2642 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002643
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002644 offset = stringlib_find(self_s, self_len,
2645 from_s, from_len,
2646 0);
2647 if (offset == -1) {
2648 /* No matches; return the original string */
2649 return return_self(self);
2650 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002651
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002652 /* Need to make a new string */
2653 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2654 if (result == NULL)
2655 return NULL;
2656 result_s = PyBytes_AS_STRING(result);
2657 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002658
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002659 /* change everything in-place, starting with this one */
2660 start = result_s + offset;
2661 Py_MEMCPY(start, to_s, from_len);
2662 start += from_len;
2663 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002664
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002665 while ( --maxcount > 0) {
2666 offset = stringlib_find(start, end-start,
2667 from_s, from_len,
2668 0);
2669 if (offset==-1)
2670 break;
2671 Py_MEMCPY(start+offset, to_s, from_len);
2672 start += offset+from_len;
2673 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002674
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002675 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002676}
2677
2678/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2679Py_LOCAL(PyBytesObject *)
2680replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002681 char from_c,
2682 const char *to_s, Py_ssize_t to_len,
2683 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002684{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002685 char *self_s, *result_s;
2686 char *start, *next, *end;
2687 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002688 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002689 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002691 self_s = PyBytes_AS_STRING(self);
2692 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002693
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002694 count = countchar(self_s, self_len, from_c, maxcount);
2695 if (count == 0) {
2696 /* no matches, return unchanged */
2697 return return_self(self);
2698 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002699
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002700 /* use the difference between current and new, hence the "-1" */
2701 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002702 assert(count > 0);
2703 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002704 PyErr_SetString(PyExc_OverflowError,
2705 "replacement bytes are too long");
2706 return NULL;
2707 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002708 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002709
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002710 if ( (result = (PyBytesObject *)
2711 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2712 return NULL;
2713 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002714
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002715 start = self_s;
2716 end = self_s + self_len;
2717 while (count-- > 0) {
2718 next = findchar(start, end-start, from_c);
2719 if (next == NULL)
2720 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002721
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002722 if (next == start) {
2723 /* replace with the 'to' */
2724 Py_MEMCPY(result_s, to_s, to_len);
2725 result_s += to_len;
2726 start += 1;
2727 } else {
2728 /* copy the unchanged old then the 'to' */
2729 Py_MEMCPY(result_s, start, next-start);
2730 result_s += (next-start);
2731 Py_MEMCPY(result_s, to_s, to_len);
2732 result_s += to_len;
2733 start = next+1;
2734 }
2735 }
2736 /* Copy the remainder of the remaining string */
2737 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002738
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002739 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002740}
2741
2742/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2743Py_LOCAL(PyBytesObject *)
2744replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002745 const char *from_s, Py_ssize_t from_len,
2746 const char *to_s, Py_ssize_t to_len,
2747 Py_ssize_t maxcount) {
2748 char *self_s, *result_s;
2749 char *start, *next, *end;
2750 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002751 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002752 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002753
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002754 self_s = PyBytes_AS_STRING(self);
2755 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002756
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002757 count = stringlib_count(self_s, self_len,
2758 from_s, from_len,
2759 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002760
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002761 if (count == 0) {
2762 /* no matches, return unchanged */
2763 return return_self(self);
2764 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002765
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002766 /* Check for overflow */
2767 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002768 assert(count > 0);
2769 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002770 PyErr_SetString(PyExc_OverflowError,
2771 "replacement bytes are too long");
2772 return NULL;
2773 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002774 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002775
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002776 if ( (result = (PyBytesObject *)
2777 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2778 return NULL;
2779 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002780
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002781 start = self_s;
2782 end = self_s + self_len;
2783 while (count-- > 0) {
2784 offset = stringlib_find(start, end-start,
2785 from_s, from_len,
2786 0);
2787 if (offset == -1)
2788 break;
2789 next = start+offset;
2790 if (next == start) {
2791 /* replace with the 'to' */
2792 Py_MEMCPY(result_s, to_s, to_len);
2793 result_s += to_len;
2794 start += from_len;
2795 } else {
2796 /* copy the unchanged old then the 'to' */
2797 Py_MEMCPY(result_s, start, next-start);
2798 result_s += (next-start);
2799 Py_MEMCPY(result_s, to_s, to_len);
2800 result_s += to_len;
2801 start = next+from_len;
2802 }
2803 }
2804 /* Copy the remainder of the remaining string */
2805 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002806
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002807 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002808}
2809
2810
2811Py_LOCAL(PyBytesObject *)
2812replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002813 const char *from_s, Py_ssize_t from_len,
2814 const char *to_s, Py_ssize_t to_len,
2815 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002816{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002817 if (maxcount < 0) {
2818 maxcount = PY_SSIZE_T_MAX;
2819 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2820 /* nothing to do; return the original string */
2821 return return_self(self);
2822 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002823
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002824 if (maxcount == 0 ||
2825 (from_len == 0 && to_len == 0)) {
2826 /* nothing to do; return the original string */
2827 return return_self(self);
2828 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002829
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002830 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002831
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002832 if (from_len == 0) {
2833 /* insert the 'to' string everywhere. */
2834 /* >>> "Python".replace("", ".") */
2835 /* '.P.y.t.h.o.n.' */
2836 return replace_interleave(self, to_s, to_len, maxcount);
2837 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002838
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002839 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2840 /* point for an empty self string to generate a non-empty string */
2841 /* Special case so the remaining code always gets a non-empty string */
2842 if (PyBytes_GET_SIZE(self) == 0) {
2843 return return_self(self);
2844 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002845
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002846 if (to_len == 0) {
2847 /* delete all occurrences of 'from' string */
2848 if (from_len == 1) {
2849 return replace_delete_single_character(
2850 self, from_s[0], maxcount);
2851 } else {
2852 return replace_delete_substring(self, from_s,
2853 from_len, maxcount);
2854 }
2855 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002856
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002857 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002858
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002859 if (from_len == to_len) {
2860 if (from_len == 1) {
2861 return replace_single_character_in_place(
2862 self,
2863 from_s[0],
2864 to_s[0],
2865 maxcount);
2866 } else {
2867 return replace_substring_in_place(
2868 self, from_s, from_len, to_s, to_len,
2869 maxcount);
2870 }
2871 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002872
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002873 /* Otherwise use the more generic algorithms */
2874 if (from_len == 1) {
2875 return replace_single_character(self, from_s[0],
2876 to_s, to_len, maxcount);
2877 } else {
2878 /* len('from')>=2, len('to')>=1 */
2879 return replace_substring(self, from_s, from_len, to_s, to_len,
2880 maxcount);
2881 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002882}
2883
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002884
2885/*[clinic input]
2886bytes.replace
2887
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002888 old: Py_buffer
2889 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002890 count: Py_ssize_t = -1
2891 Maximum number of occurrences to replace.
2892 -1 (the default value) means replace all occurrences.
2893 /
2894
2895Return a copy with all occurrences of substring old replaced by new.
2896
2897If the optional argument count is given, only the first count occurrences are
2898replaced.
2899[clinic start generated code]*/
2900
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002901static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002902bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new,
2903 Py_ssize_t count)
2904/*[clinic end generated code: output=403dc9d7a83c5a1d input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002905{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002906 return (PyObject *)replace((PyBytesObject *) self,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002907 (const char *)old->buf, old->len,
2908 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002909}
2910
2911/** End DALKE **/
2912
2913/* Matches the end (direction >= 0) or start (direction < 0) of self
2914 * against substr, using the start and end arguments. Returns
2915 * -1 on error, 0 if not found and 1 if found.
2916 */
2917Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002918_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002919 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002920{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002921 Py_ssize_t len = PyBytes_GET_SIZE(self);
2922 Py_ssize_t slen;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002923 Py_buffer sub_view = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002924 const char* sub;
2925 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002926
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002927 if (PyBytes_Check(substr)) {
2928 sub = PyBytes_AS_STRING(substr);
2929 slen = PyBytes_GET_SIZE(substr);
2930 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002931 else {
2932 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
2933 return -1;
2934 sub = sub_view.buf;
2935 slen = sub_view.len;
2936 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002937 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002938
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002939 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002940
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002941 if (direction < 0) {
2942 /* startswith */
2943 if (start+slen > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002944 goto notfound;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002945 } else {
2946 /* endswith */
2947 if (end-start < slen || start > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002948 goto notfound;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002949
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002950 if (end-slen > start)
2951 start = end - slen;
2952 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002953 if (end-start < slen)
2954 goto notfound;
2955 if (memcmp(str+start, sub, slen) != 0)
2956 goto notfound;
2957
2958 PyBuffer_Release(&sub_view);
2959 return 1;
2960
2961notfound:
2962 PyBuffer_Release(&sub_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002963 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002964}
2965
2966
2967PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002968"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002969\n\
2970Return True if B starts with the specified prefix, False otherwise.\n\
2971With optional start, test B beginning at that position.\n\
2972With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002973prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002974
2975static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002976bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002977{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002978 Py_ssize_t start = 0;
2979 Py_ssize_t end = PY_SSIZE_T_MAX;
2980 PyObject *subobj;
2981 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002982
Jesus Ceaac451502011-04-20 17:09:23 +02002983 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002984 return NULL;
2985 if (PyTuple_Check(subobj)) {
2986 Py_ssize_t i;
2987 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2988 result = _bytes_tailmatch(self,
2989 PyTuple_GET_ITEM(subobj, i),
2990 start, end, -1);
2991 if (result == -1)
2992 return NULL;
2993 else if (result) {
2994 Py_RETURN_TRUE;
2995 }
2996 }
2997 Py_RETURN_FALSE;
2998 }
2999 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03003000 if (result == -1) {
3001 if (PyErr_ExceptionMatches(PyExc_TypeError))
3002 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
3003 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003004 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03003005 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003006 else
3007 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003008}
3009
3010
3011PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003012"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003013\n\
3014Return True if B ends with the specified suffix, False otherwise.\n\
3015With optional start, test B beginning at that position.\n\
3016With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00003017suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003018
3019static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003020bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003021{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003022 Py_ssize_t start = 0;
3023 Py_ssize_t end = PY_SSIZE_T_MAX;
3024 PyObject *subobj;
3025 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003026
Jesus Ceaac451502011-04-20 17:09:23 +02003027 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003028 return NULL;
3029 if (PyTuple_Check(subobj)) {
3030 Py_ssize_t i;
3031 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3032 result = _bytes_tailmatch(self,
3033 PyTuple_GET_ITEM(subobj, i),
3034 start, end, +1);
3035 if (result == -1)
3036 return NULL;
3037 else if (result) {
3038 Py_RETURN_TRUE;
3039 }
3040 }
3041 Py_RETURN_FALSE;
3042 }
3043 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03003044 if (result == -1) {
3045 if (PyErr_ExceptionMatches(PyExc_TypeError))
3046 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
3047 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003048 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03003049 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003050 else
3051 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003052}
3053
3054
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003055/*[clinic input]
3056bytes.decode
3057
3058 encoding: str(c_default="NULL") = 'utf-8'
3059 The encoding with which to decode the bytes.
3060 errors: str(c_default="NULL") = 'strict'
3061 The error handling scheme to use for the handling of decoding errors.
3062 The default is 'strict' meaning that decoding errors raise a
3063 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
3064 as well as any other name registered with codecs.register_error that
3065 can handle UnicodeDecodeErrors.
3066
3067Decode the bytes using the codec registered for encoding.
3068[clinic start generated code]*/
3069
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003070static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04003071bytes_decode_impl(PyBytesObject*self, const char *encoding,
3072 const char *errors)
3073/*[clinic end generated code: output=2d2016ff8e0bb176 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003074{
Martin v. Löwis0efea322014-07-27 17:29:17 +02003075 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00003076}
3077
Guido van Rossum20188312006-05-05 15:15:40 +00003078
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003079/*[clinic input]
3080bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003081
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03003082 keepends: int(c_default="0") = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003083
3084Return a list of the lines in the bytes, breaking at line boundaries.
3085
3086Line breaks are not included in the resulting list unless keepends is given and
3087true.
3088[clinic start generated code]*/
3089
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003090static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003091bytes_splitlines_impl(PyBytesObject*self, int keepends)
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03003092/*[clinic end generated code: output=995c3598f7833cad input=7f4aac67144f9944]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003093{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003094 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00003095 (PyObject*) self, PyBytes_AS_STRING(self),
3096 PyBytes_GET_SIZE(self), keepends
3097 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003098}
3099
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003100/*[clinic input]
3101@classmethod
3102bytes.fromhex
3103
3104 string: unicode
3105 /
3106
3107Create a bytes object from a string of hexadecimal numbers.
3108
3109Spaces between two numbers are accepted.
3110Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
3111[clinic start generated code]*/
3112
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003113static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003114bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03003115/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003116{
Victor Stinner2bf89932015-10-14 11:25:33 +02003117 return _PyBytes_FromHex(string, 0);
3118}
3119
3120PyObject*
3121_PyBytes_FromHex(PyObject *string, int use_bytearray)
3122{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003123 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02003124 Py_ssize_t hexlen, invalid_char;
3125 unsigned int top, bot;
3126 Py_UCS1 *str, *end;
3127 _PyBytesWriter writer;
3128
3129 _PyBytesWriter_Init(&writer);
3130 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003131
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003132 assert(PyUnicode_Check(string));
3133 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003134 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003135 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003136
Victor Stinner2bf89932015-10-14 11:25:33 +02003137 if (!PyUnicode_IS_ASCII(string)) {
3138 void *data = PyUnicode_DATA(string);
3139 unsigned int kind = PyUnicode_KIND(string);
3140 Py_ssize_t i;
3141
3142 /* search for the first non-ASCII character */
3143 for (i = 0; i < hexlen; i++) {
3144 if (PyUnicode_READ(kind, data, i) >= 128)
3145 break;
3146 }
3147 invalid_char = i;
3148 goto error;
3149 }
3150
3151 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
3152 str = PyUnicode_1BYTE_DATA(string);
3153
3154 /* This overestimates if there are spaces */
3155 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
3156 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003157 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02003158
3159 end = str + hexlen;
3160 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003161 /* skip over spaces in the input */
Victor Stinner2bf89932015-10-14 11:25:33 +02003162 if (*str == ' ') {
3163 do {
3164 str++;
3165 } while (*str == ' ');
3166 if (str >= end)
3167 break;
3168 }
3169
3170 top = _PyLong_DigitValue[*str];
3171 if (top >= 16) {
3172 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003173 goto error;
3174 }
Victor Stinner2bf89932015-10-14 11:25:33 +02003175 str++;
3176
3177 bot = _PyLong_DigitValue[*str];
3178 if (bot >= 16) {
3179 invalid_char = str - PyUnicode_1BYTE_DATA(string);
3180 goto error;
3181 }
3182 str++;
3183
3184 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003185 }
Victor Stinner2bf89932015-10-14 11:25:33 +02003186
3187 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003188
3189 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02003190 PyErr_Format(PyExc_ValueError,
3191 "non-hexadecimal number found in "
3192 "fromhex() arg at position %zd", invalid_char);
3193 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003194 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003195}
3196
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003197PyDoc_STRVAR(hex__doc__,
3198"B.hex() -> string\n\
3199\n\
3200Create a string of hexadecimal numbers from a bytes object.\n\
3201Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
3202
3203static PyObject *
3204bytes_hex(PyBytesObject *self)
3205{
3206 char* argbuf = PyBytes_AS_STRING(self);
3207 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
3208 return _Py_strhex(argbuf, arglen);
3209}
3210
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003211static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003212bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003213{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003214 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003215}
3216
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003217
3218static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003219bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003220 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
3221 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3222 _Py_capitalize__doc__},
3223 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3224 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003225 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003226 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
3227 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02003228 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003229 expandtabs__doc__},
3230 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003231 BYTES_FROMHEX_METHODDEF
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003232 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003233 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3234 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3235 _Py_isalnum__doc__},
3236 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3237 _Py_isalpha__doc__},
3238 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3239 _Py_isdigit__doc__},
3240 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3241 _Py_islower__doc__},
3242 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3243 _Py_isspace__doc__},
3244 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3245 _Py_istitle__doc__},
3246 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3247 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003248 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003249 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3250 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003251 BYTES_LSTRIP_METHODDEF
3252 BYTES_MAKETRANS_METHODDEF
3253 BYTES_PARTITION_METHODDEF
3254 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003255 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3256 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3257 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003258 BYTES_RPARTITION_METHODDEF
3259 BYTES_RSPLIT_METHODDEF
3260 BYTES_RSTRIP_METHODDEF
3261 BYTES_SPLIT_METHODDEF
3262 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003263 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
3264 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003265 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003266 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3267 _Py_swapcase__doc__},
3268 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003269 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003270 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3271 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003272 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003273};
3274
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003275static PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +02003276bytes_mod(PyObject *self, PyObject *args)
Ethan Furmanb95b5612015-01-23 20:05:18 -08003277{
Victor Stinner772b2b02015-10-14 09:56:53 +02003278 if (self == NULL || !PyBytes_Check(self)) {
3279 PyErr_BadInternalCall();
3280 return NULL;
3281 }
3282
3283 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
3284 args, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08003285}
3286
3287static PyNumberMethods bytes_as_number = {
3288 0, /*nb_add*/
3289 0, /*nb_subtract*/
3290 0, /*nb_multiply*/
3291 bytes_mod, /*nb_remainder*/
3292};
3293
3294static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003295str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3296
3297static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003298bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003299{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003300 PyObject *x = NULL;
3301 const char *encoding = NULL;
3302 const char *errors = NULL;
3303 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003304 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003305 Py_ssize_t size;
3306 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003307 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003308
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003309 if (type != &PyBytes_Type)
3310 return str_subtype_new(type, args, kwds);
3311 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3312 &encoding, &errors))
3313 return NULL;
3314 if (x == NULL) {
3315 if (encoding != NULL || errors != NULL) {
3316 PyErr_SetString(PyExc_TypeError,
3317 "encoding or errors without sequence "
3318 "argument");
3319 return NULL;
3320 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003321 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003322 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003323
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003324 if (PyUnicode_Check(x)) {
3325 /* Encode via the codec registry */
3326 if (encoding == NULL) {
3327 PyErr_SetString(PyExc_TypeError,
3328 "string argument without an encoding");
3329 return NULL;
3330 }
3331 new = PyUnicode_AsEncodedString(x, encoding, errors);
3332 if (new == NULL)
3333 return NULL;
3334 assert(PyBytes_Check(new));
3335 return new;
3336 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003337
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003338 /* If it's not unicode, there can't be encoding or errors */
3339 if (encoding != NULL || errors != NULL) {
3340 PyErr_SetString(PyExc_TypeError,
3341 "encoding or errors without a string argument");
3342 return NULL;
3343 }
3344
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003345 /* We'd like to call PyObject_Bytes here, but we need to check for an
3346 integer argument before deferring to PyBytes_FromObject, something
3347 PyObject_Bytes doesn't do. */
3348 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3349 if (func != NULL) {
3350 new = PyObject_CallFunctionObjArgs(func, NULL);
3351 Py_DECREF(func);
3352 if (new == NULL)
3353 return NULL;
3354 if (!PyBytes_Check(new)) {
3355 PyErr_Format(PyExc_TypeError,
3356 "__bytes__ returned non-bytes (type %.200s)",
3357 Py_TYPE(new)->tp_name);
3358 Py_DECREF(new);
3359 return NULL;
3360 }
3361 return new;
3362 }
3363 else if (PyErr_Occurred())
3364 return NULL;
3365
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003366 /* Is it an integer? */
3367 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3368 if (size == -1 && PyErr_Occurred()) {
3369 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3370 return NULL;
3371 PyErr_Clear();
3372 }
3373 else if (size < 0) {
3374 PyErr_SetString(PyExc_ValueError, "negative count");
3375 return NULL;
3376 }
3377 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003378 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003379 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003380 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003381 return new;
3382 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003383
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003384 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003385}
3386
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003387static PyObject*
3388_PyBytes_FromBuffer(PyObject *x)
3389{
3390 PyObject *new;
3391 Py_buffer view;
3392
3393 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3394 return NULL;
3395
3396 new = PyBytes_FromStringAndSize(NULL, view.len);
3397 if (!new)
3398 goto fail;
3399 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3400 &view, view.len, 'C') < 0)
3401 goto fail;
3402 PyBuffer_Release(&view);
3403 return new;
3404
3405fail:
3406 Py_XDECREF(new);
3407 PyBuffer_Release(&view);
3408 return NULL;
3409}
3410
Victor Stinner3c50ce32015-10-14 13:50:40 +02003411#define _PyBytes_FROM_LIST_BODY(x, GET_ITEM) \
3412 do { \
3413 PyObject *bytes; \
3414 Py_ssize_t i; \
3415 Py_ssize_t value; \
3416 char *str; \
3417 PyObject *item; \
3418 \
3419 bytes = PyBytes_FromStringAndSize(NULL, Py_SIZE(x)); \
3420 if (bytes == NULL) \
3421 return NULL; \
3422 str = ((PyBytesObject *)bytes)->ob_sval; \
3423 \
3424 for (i = 0; i < Py_SIZE(x); i++) { \
3425 item = GET_ITEM((x), i); \
3426 value = PyNumber_AsSsize_t(item, PyExc_ValueError); \
3427 if (value == -1 && PyErr_Occurred()) \
3428 goto error; \
3429 \
3430 if (value < 0 || value >= 256) { \
3431 PyErr_SetString(PyExc_ValueError, \
3432 "bytes must be in range(0, 256)"); \
3433 goto error; \
3434 } \
3435 *str++ = (char) value; \
3436 } \
3437 return bytes; \
3438 \
3439 error: \
3440 Py_DECREF(bytes); \
3441 return NULL; \
3442 } while (0)
3443
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003444static PyObject*
3445_PyBytes_FromList(PyObject *x)
3446{
Victor Stinner3c50ce32015-10-14 13:50:40 +02003447 _PyBytes_FROM_LIST_BODY(x, PyList_GET_ITEM);
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003448}
3449
3450static PyObject*
3451_PyBytes_FromTuple(PyObject *x)
3452{
Victor Stinner3c50ce32015-10-14 13:50:40 +02003453 _PyBytes_FROM_LIST_BODY(x, PyTuple_GET_ITEM);
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003454}
3455
3456static PyObject *
3457_PyBytes_FromIterator(PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003458{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003459 PyObject *new, *it;
3460 Py_ssize_t i, size;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003461
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003462 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003463 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003464 if (size == -1 && PyErr_Occurred())
3465 return NULL;
3466 /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
3467 returning a shared empty bytes string. This required because we
3468 want to call _PyBytes_Resize() the returned object, which we can
3469 only do on bytes objects with refcount == 1. */
Victor Stinner88d146b2014-08-17 21:12:18 +02003470 if (size == 0)
3471 size = 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003472 new = PyBytes_FromStringAndSize(NULL, size);
3473 if (new == NULL)
3474 return NULL;
Victor Stinner88d146b2014-08-17 21:12:18 +02003475 assert(Py_REFCNT(new) == 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003476
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003477 /* Get the iterator */
3478 it = PyObject_GetIter(x);
3479 if (it == NULL)
3480 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003481
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003482 /* Run the iterator to exhaustion */
3483 for (i = 0; ; i++) {
3484 PyObject *item;
3485 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003486
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003487 /* Get the next item */
3488 item = PyIter_Next(it);
3489 if (item == NULL) {
3490 if (PyErr_Occurred())
3491 goto error;
3492 break;
3493 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003494
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003495 /* Interpret it as an int (__index__) */
3496 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3497 Py_DECREF(item);
3498 if (value == -1 && PyErr_Occurred())
3499 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003500
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003501 /* Range check */
3502 if (value < 0 || value >= 256) {
3503 PyErr_SetString(PyExc_ValueError,
3504 "bytes must be in range(0, 256)");
3505 goto error;
3506 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003507
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003508 /* Append the byte */
3509 if (i >= size) {
3510 size = 2 * size + 1;
3511 if (_PyBytes_Resize(&new, size) < 0)
3512 goto error;
3513 }
Antoine Pitrou0010d372010-08-15 17:12:55 +00003514 ((PyBytesObject *)new)->ob_sval[i] = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003515 }
3516 _PyBytes_Resize(&new, i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003517
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003518 /* Clean up and return success */
3519 Py_DECREF(it);
3520 return new;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003521
3522 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003523 Py_XDECREF(it);
Victor Stinner986e2242013-10-29 03:14:22 +01003524 Py_XDECREF(new);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003525 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003526}
3527
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003528PyObject *
3529PyBytes_FromObject(PyObject *x)
3530{
3531 if (x == NULL) {
3532 PyErr_BadInternalCall();
3533 return NULL;
3534 }
3535
3536 if (PyBytes_CheckExact(x)) {
3537 Py_INCREF(x);
3538 return x;
3539 }
3540
3541 /* Use the modern buffer interface */
3542 if (PyObject_CheckBuffer(x))
3543 return _PyBytes_FromBuffer(x);
3544
3545 if (PyList_CheckExact(x))
3546 return _PyBytes_FromList(x);
3547
3548 if (PyTuple_CheckExact(x))
3549 return _PyBytes_FromTuple(x);
3550
3551 if (PyUnicode_Check(x)) {
3552 PyErr_SetString(PyExc_TypeError,
3553 "cannot convert unicode object to bytes");
3554 return NULL;
3555 }
3556
3557 return _PyBytes_FromIterator(x);
3558}
3559
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003560static PyObject *
3561str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3562{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003563 PyObject *tmp, *pnew;
3564 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003565
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003566 assert(PyType_IsSubtype(type, &PyBytes_Type));
3567 tmp = bytes_new(&PyBytes_Type, args, kwds);
3568 if (tmp == NULL)
3569 return NULL;
3570 assert(PyBytes_CheckExact(tmp));
3571 n = PyBytes_GET_SIZE(tmp);
3572 pnew = type->tp_alloc(type, n);
3573 if (pnew != NULL) {
3574 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3575 PyBytes_AS_STRING(tmp), n+1);
3576 ((PyBytesObject *)pnew)->ob_shash =
3577 ((PyBytesObject *)tmp)->ob_shash;
3578 }
3579 Py_DECREF(tmp);
3580 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003581}
3582
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003583PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003584"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003585bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003586bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003587bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3588bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003589\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003590Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003591 - an iterable yielding integers in range(256)\n\
3592 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003593 - any object implementing the buffer API.\n\
3594 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003595
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003596static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003597
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003598PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003599 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3600 "bytes",
3601 PyBytesObject_SIZE,
3602 sizeof(char),
3603 bytes_dealloc, /* tp_dealloc */
3604 0, /* tp_print */
3605 0, /* tp_getattr */
3606 0, /* tp_setattr */
3607 0, /* tp_reserved */
3608 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08003609 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003610 &bytes_as_sequence, /* tp_as_sequence */
3611 &bytes_as_mapping, /* tp_as_mapping */
3612 (hashfunc)bytes_hash, /* tp_hash */
3613 0, /* tp_call */
3614 bytes_str, /* tp_str */
3615 PyObject_GenericGetAttr, /* tp_getattro */
3616 0, /* tp_setattro */
3617 &bytes_as_buffer, /* tp_as_buffer */
3618 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3619 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3620 bytes_doc, /* tp_doc */
3621 0, /* tp_traverse */
3622 0, /* tp_clear */
3623 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3624 0, /* tp_weaklistoffset */
3625 bytes_iter, /* tp_iter */
3626 0, /* tp_iternext */
3627 bytes_methods, /* tp_methods */
3628 0, /* tp_members */
3629 0, /* tp_getset */
3630 &PyBaseObject_Type, /* tp_base */
3631 0, /* tp_dict */
3632 0, /* tp_descr_get */
3633 0, /* tp_descr_set */
3634 0, /* tp_dictoffset */
3635 0, /* tp_init */
3636 0, /* tp_alloc */
3637 bytes_new, /* tp_new */
3638 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003639};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003640
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003641void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003642PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003643{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003644 assert(pv != NULL);
3645 if (*pv == NULL)
3646 return;
3647 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003648 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003649 return;
3650 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003651
3652 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3653 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003654 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003655 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02003656
Antoine Pitrou161d6952014-05-01 14:36:20 +02003657 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003658 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02003659 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3660 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3661 Py_CLEAR(*pv);
3662 return;
3663 }
3664
3665 oldsize = PyBytes_GET_SIZE(*pv);
3666 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3667 PyErr_NoMemory();
3668 goto error;
3669 }
3670 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3671 goto error;
3672
3673 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3674 PyBuffer_Release(&wb);
3675 return;
3676
3677 error:
3678 PyBuffer_Release(&wb);
3679 Py_CLEAR(*pv);
3680 return;
3681 }
3682
3683 else {
3684 /* Multiple references, need to create new object */
3685 PyObject *v;
3686 v = bytes_concat(*pv, w);
3687 Py_DECREF(*pv);
3688 *pv = v;
3689 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003690}
3691
3692void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003693PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003694{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003695 PyBytes_Concat(pv, w);
3696 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003697}
3698
3699
Ethan Furmanb95b5612015-01-23 20:05:18 -08003700/* The following function breaks the notion that bytes are immutable:
3701 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003702 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08003703 as creating a new bytes object and destroying the old one, only
3704 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003705 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08003706 Note that if there's not enough memory to resize the bytes object, the
3707 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003708 memory" exception is set, and -1 is returned. Else (on success) 0 is
3709 returned, and the value in *pv may or may not be the same as on input.
3710 As always, an extra byte is allocated for a trailing \0 byte (newsize
3711 does *not* include that), and a trailing \0 byte is stored.
3712*/
3713
3714int
3715_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3716{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003717 PyObject *v;
3718 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003719 v = *pv;
3720 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3721 *pv = 0;
3722 Py_DECREF(v);
3723 PyErr_BadInternalCall();
3724 return -1;
3725 }
3726 /* XXX UNREF/NEWREF interface should be more symmetrical */
3727 _Py_DEC_REFTOTAL;
3728 _Py_ForgetReference(v);
3729 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003730 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003731 if (*pv == NULL) {
3732 PyObject_Del(v);
3733 PyErr_NoMemory();
3734 return -1;
3735 }
3736 _Py_NewReference(*pv);
3737 sv = (PyBytesObject *) *pv;
3738 Py_SIZE(sv) = newsize;
3739 sv->ob_sval[newsize] = '\0';
3740 sv->ob_shash = -1; /* invalidate cached hash value */
3741 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003742}
3743
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003744void
3745PyBytes_Fini(void)
3746{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003747 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003748 for (i = 0; i < UCHAR_MAX + 1; i++)
3749 Py_CLEAR(characters[i]);
3750 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003751}
3752
Benjamin Peterson4116f362008-05-27 00:36:20 +00003753/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003754
3755typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003756 PyObject_HEAD
3757 Py_ssize_t it_index;
3758 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003759} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003760
3761static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003762striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003763{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003764 _PyObject_GC_UNTRACK(it);
3765 Py_XDECREF(it->it_seq);
3766 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003767}
3768
3769static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003770striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003771{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003772 Py_VISIT(it->it_seq);
3773 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003774}
3775
3776static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003777striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003778{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003779 PyBytesObject *seq;
3780 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003781
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003782 assert(it != NULL);
3783 seq = it->it_seq;
3784 if (seq == NULL)
3785 return NULL;
3786 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003787
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003788 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3789 item = PyLong_FromLong(
3790 (unsigned char)seq->ob_sval[it->it_index]);
3791 if (item != NULL)
3792 ++it->it_index;
3793 return item;
3794 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003795
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003796 Py_DECREF(seq);
3797 it->it_seq = NULL;
3798 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003799}
3800
3801static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003802striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003803{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003804 Py_ssize_t len = 0;
3805 if (it->it_seq)
3806 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3807 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003808}
3809
3810PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003811 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003812
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003813static PyObject *
3814striter_reduce(striterobject *it)
3815{
3816 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003817 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003818 it->it_seq, it->it_index);
3819 } else {
3820 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3821 if (u == NULL)
3822 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02003823 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003824 }
3825}
3826
3827PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3828
3829static PyObject *
3830striter_setstate(striterobject *it, PyObject *state)
3831{
3832 Py_ssize_t index = PyLong_AsSsize_t(state);
3833 if (index == -1 && PyErr_Occurred())
3834 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003835 if (it->it_seq != NULL) {
3836 if (index < 0)
3837 index = 0;
3838 else if (index > PyBytes_GET_SIZE(it->it_seq))
3839 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3840 it->it_index = index;
3841 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003842 Py_RETURN_NONE;
3843}
3844
3845PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3846
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003847static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003848 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3849 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003850 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3851 reduce_doc},
3852 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3853 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003854 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003855};
3856
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003857PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003858 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3859 "bytes_iterator", /* tp_name */
3860 sizeof(striterobject), /* tp_basicsize */
3861 0, /* tp_itemsize */
3862 /* methods */
3863 (destructor)striter_dealloc, /* tp_dealloc */
3864 0, /* tp_print */
3865 0, /* tp_getattr */
3866 0, /* tp_setattr */
3867 0, /* tp_reserved */
3868 0, /* tp_repr */
3869 0, /* tp_as_number */
3870 0, /* tp_as_sequence */
3871 0, /* tp_as_mapping */
3872 0, /* tp_hash */
3873 0, /* tp_call */
3874 0, /* tp_str */
3875 PyObject_GenericGetAttr, /* tp_getattro */
3876 0, /* tp_setattro */
3877 0, /* tp_as_buffer */
3878 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3879 0, /* tp_doc */
3880 (traverseproc)striter_traverse, /* tp_traverse */
3881 0, /* tp_clear */
3882 0, /* tp_richcompare */
3883 0, /* tp_weaklistoffset */
3884 PyObject_SelfIter, /* tp_iter */
3885 (iternextfunc)striter_next, /* tp_iternext */
3886 striter_methods, /* tp_methods */
3887 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003888};
3889
3890static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003891bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003892{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003893 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003894
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003895 if (!PyBytes_Check(seq)) {
3896 PyErr_BadInternalCall();
3897 return NULL;
3898 }
3899 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3900 if (it == NULL)
3901 return NULL;
3902 it->it_index = 0;
3903 Py_INCREF(seq);
3904 it->it_seq = (PyBytesObject *)seq;
3905 _PyObject_GC_TRACK(it);
3906 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003907}
Victor Stinner00165072015-10-09 01:53:21 +02003908
3909
3910/* _PyBytesWriter API */
3911
3912#ifdef MS_WINDOWS
3913 /* On Windows, overallocate by 50% is the best factor */
3914# define OVERALLOCATE_FACTOR 2
3915#else
3916 /* On Linux, overallocate by 25% is the best factor */
3917# define OVERALLOCATE_FACTOR 4
3918#endif
3919
3920void
3921_PyBytesWriter_Init(_PyBytesWriter *writer)
3922{
Victor Stinner661aacc2015-10-14 09:41:48 +02003923 /* Set all attributes before small_buffer to 0 */
3924 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003925#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003926 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003927#endif
3928}
3929
3930void
3931_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3932{
3933 Py_CLEAR(writer->buffer);
3934}
3935
3936Py_LOCAL_INLINE(char*)
3937_PyBytesWriter_AsString(_PyBytesWriter *writer)
3938{
Victor Stinner661aacc2015-10-14 09:41:48 +02003939 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003940 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003941 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003942 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003943 else if (writer->use_bytearray) {
3944 assert(writer->buffer != NULL);
3945 return PyByteArray_AS_STRING(writer->buffer);
3946 }
3947 else {
3948 assert(writer->buffer != NULL);
3949 return PyBytes_AS_STRING(writer->buffer);
3950 }
Victor Stinner00165072015-10-09 01:53:21 +02003951}
3952
3953Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003954_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003955{
3956 char *start = _PyBytesWriter_AsString(writer);
3957 assert(str != NULL);
3958 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003959 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003960 return str - start;
3961}
3962
3963Py_LOCAL_INLINE(void)
3964_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3965{
3966#ifdef Py_DEBUG
3967 char *start, *end;
3968
Victor Stinner661aacc2015-10-14 09:41:48 +02003969 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003970 assert(writer->buffer == NULL);
3971 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003972 else {
3973 assert(writer->buffer != NULL);
3974 if (writer->use_bytearray)
3975 assert(PyByteArray_CheckExact(writer->buffer));
3976 else
3977 assert(PyBytes_CheckExact(writer->buffer));
3978 assert(Py_REFCNT(writer->buffer) == 1);
3979 }
Victor Stinner00165072015-10-09 01:53:21 +02003980
Victor Stinner661aacc2015-10-14 09:41:48 +02003981 if (writer->use_bytearray) {
3982 /* bytearray has its own overallocation algorithm,
3983 writer overallocation must be disabled */
3984 assert(!writer->overallocate);
3985 }
3986
3987 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003988 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003989 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003990 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003991 assert(start[writer->allocated] == 0);
3992
3993 end = start + writer->allocated;
3994 assert(str != NULL);
3995 assert(start <= str && str <= end);
3996#endif
3997}
3998
Victor Stinnerc29e29b2015-10-12 13:12:54 +02003999void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02004000_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02004001{
4002 Py_ssize_t allocated, pos;
4003
4004 _PyBytesWriter_CheckConsistency(writer, str);
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02004005 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02004006
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02004007 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02004008 if (writer->overallocate
4009 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
4010 /* overallocate to limit the number of realloc() */
4011 allocated += allocated / OVERALLOCATE_FACTOR;
4012 }
4013
Victor Stinner2bf89932015-10-14 11:25:33 +02004014 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02004015 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02004016 if (writer->use_bytearray) {
4017 if (PyByteArray_Resize(writer->buffer, allocated))
4018 goto error;
4019 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
4020 but we cannot use ob_alloc because bytes may need to be moved
4021 to use the whole buffer. bytearray uses an internal optimization
4022 to avoid moving or copying bytes when bytes are removed at the
4023 beginning (ex: del bytearray[:1]). */
4024 }
4025 else {
4026 if (_PyBytes_Resize(&writer->buffer, allocated))
4027 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02004028 }
4029 }
4030 else {
4031 /* convert from stack buffer to bytes object buffer */
4032 assert(writer->buffer == NULL);
4033
Victor Stinner661aacc2015-10-14 09:41:48 +02004034 if (writer->use_bytearray)
4035 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
4036 else
4037 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02004038 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02004039 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02004040
4041 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02004042 char *dest;
4043 if (writer->use_bytearray)
4044 dest = PyByteArray_AS_STRING(writer->buffer);
4045 else
4046 dest = PyBytes_AS_STRING(writer->buffer);
4047 Py_MEMCPY(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02004048 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02004049 pos);
4050 }
4051
Victor Stinnerb3653a32015-10-09 03:38:24 +02004052 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02004053#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02004054 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02004055#endif
Victor Stinner00165072015-10-09 01:53:21 +02004056 }
4057 writer->allocated = allocated;
4058
4059 str = _PyBytesWriter_AsString(writer) + pos;
4060 _PyBytesWriter_CheckConsistency(writer, str);
4061 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02004062
4063error:
4064 _PyBytesWriter_Dealloc(writer);
4065 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02004066}
4067
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02004068void*
4069_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
4070{
4071 Py_ssize_t new_min_size;
4072
4073 _PyBytesWriter_CheckConsistency(writer, str);
4074 assert(size >= 0);
4075
4076 if (size == 0) {
4077 /* nothing to do */
4078 return str;
4079 }
4080
4081 if (writer->min_size > PY_SSIZE_T_MAX - size) {
4082 PyErr_NoMemory();
4083 _PyBytesWriter_Dealloc(writer);
4084 return NULL;
4085 }
4086 new_min_size = writer->min_size + size;
4087
4088 if (new_min_size > writer->allocated)
4089 str = _PyBytesWriter_Resize(writer, str, new_min_size);
4090
4091 writer->min_size = new_min_size;
4092 return str;
4093}
4094
Victor Stinner00165072015-10-09 01:53:21 +02004095/* Allocate the buffer to write size bytes.
4096 Return the pointer to the beginning of buffer data.
4097 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004098void*
Victor Stinner00165072015-10-09 01:53:21 +02004099_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
4100{
4101 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02004102 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02004103 assert(size >= 0);
4104
Victor Stinnerb3653a32015-10-09 03:38:24 +02004105 writer->use_small_buffer = 1;
Victor Stinnerb13b97d2015-10-09 02:52:16 +02004106#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02004107 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02004108 /* In debug mode, don't use the full small buffer because it is less
4109 efficient than bytes and bytearray objects to detect buffer underflow
4110 and buffer overflow. Use 10 bytes of the small buffer to test also
4111 code using the smaller buffer in debug mode.
4112
4113 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
4114 in debug mode to also be able to detect stack overflow when running
4115 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
4116 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
4117 stack overflow. */
4118 writer->allocated = Py_MIN(writer->allocated, 10);
4119 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
4120 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02004121 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02004122#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02004123 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02004124#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02004125 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02004126}
4127
4128PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004129_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02004130{
Victor Stinner2bf89932015-10-14 11:25:33 +02004131 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02004132 PyObject *result;
4133
4134 _PyBytesWriter_CheckConsistency(writer, str);
4135
Victor Stinner2bf89932015-10-14 11:25:33 +02004136 size = _PyBytesWriter_GetSize(writer, str);
4137 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02004138 Py_CLEAR(writer->buffer);
4139 /* Get the empty byte string singleton */
4140 result = PyBytes_FromStringAndSize(NULL, 0);
4141 }
4142 else if (writer->use_small_buffer) {
Victor Stinner2bf89932015-10-14 11:25:33 +02004143 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
Victor Stinner6c2cdae2015-10-12 13:29:43 +02004144 }
4145 else {
4146 result = writer->buffer;
4147 writer->buffer = NULL;
4148
Victor Stinner2bf89932015-10-14 11:25:33 +02004149 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02004150 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02004151 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02004152 Py_DECREF(result);
4153 return NULL;
4154 }
4155 }
4156 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02004157 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02004158 assert(result == NULL);
4159 return NULL;
4160 }
Victor Stinner00165072015-10-09 01:53:21 +02004161 }
4162 }
Victor Stinner00165072015-10-09 01:53:21 +02004163 }
Victor Stinner00165072015-10-09 01:53:21 +02004164 return result;
4165}
Victor Stinnerce179bf2015-10-09 12:57:22 +02004166
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004167void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02004168_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004169 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02004170{
Victor Stinnere9aa5952015-10-12 13:57:47 +02004171 char *str = (char *)ptr;
4172
Victor Stinnerce179bf2015-10-09 12:57:22 +02004173 str = _PyBytesWriter_Prepare(writer, str, size);
4174 if (str == NULL)
4175 return NULL;
4176
4177 Py_MEMCPY(str, bytes, size);
4178 str += size;
4179
4180 return str;
4181}