blob: 602dea681ce45d40d4b0aea3fb0101074a503b5e [file] [log] [blame]
Benjamin Peterson4116f362008-05-27 00:36:20 +00001/* bytes object implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
Christian Heimes2c9c7a52008-05-26 13:42:13 +00004
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00005#include "Python.h"
Christian Heimes2c9c7a52008-05-26 13:42:13 +00006
Gregory P. Smith60d241f2007-10-16 06:31:30 +00007#include "bytes_methods.h"
Gregory P. Smith8cb65692015-04-25 23:22:26 +00008#include "pystrhex.h"
Mark Dickinsonfd24b322008-12-06 15:33:31 +00009#include <stddef.h>
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000010
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020011/*[clinic input]
Martin v. Löwis0efea322014-07-27 17:29:17 +020012class bytes "PyBytesObject*" "&PyBytes_Type"
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020013[clinic start generated code]*/
Martin v. Löwis0efea322014-07-27 17:29:17 +020014/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1a1d9102afc1b00c]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +020015
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030016#include "clinic/bytesobject.c.h"
17
Christian Heimes2c9c7a52008-05-26 13:42:13 +000018#ifdef COUNT_ALLOCS
Benjamin Petersona4a37fe2009-01-11 17:13:55 +000019Py_ssize_t null_strings, one_strings;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000020#endif
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000021
Christian Heimes2c9c7a52008-05-26 13:42:13 +000022static PyBytesObject *characters[UCHAR_MAX + 1];
23static PyBytesObject *nullstring;
24
Mark Dickinsonfd24b322008-12-06 15:33:31 +000025/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
26 for a string of length n should request PyBytesObject_SIZE + n bytes.
27
28 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29 3 bytes per string allocation on a typical system.
30*/
31#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32
Victor Stinner2bf89932015-10-14 11:25:33 +020033/* Forward declaration */
34Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
35 char *str);
36
Christian Heimes2c9c7a52008-05-26 13:42:13 +000037/*
Christian Heimes2c9c7a52008-05-26 13:42:13 +000038 For PyBytes_FromString(), the parameter `str' points to a null-terminated
39 string containing exactly `size' bytes.
40
41 For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
42 either NULL or else points to a string containing at least `size' bytes.
43 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
44 not have to be null-terminated. (Therefore it is safe to construct a
45 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
46 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
47 bytes (setting the last byte to the null terminating character) and you can
48 fill in the data yourself. If `str' is non-NULL then the resulting
Antoine Pitrouf2c54842010-01-13 08:07:53 +000049 PyBytes object must be treated as immutable and you must not fill in nor
Christian Heimes2c9c7a52008-05-26 13:42:13 +000050 alter the data yourself, since the strings may be shared.
51
52 The PyObject member `op->ob_size', which denotes the number of "extra
53 items" in a variable-size object, will contain the number of bytes
Eli Bendersky1aef6b62011-03-24 22:32:56 +020054 allocated for string data, not counting the null terminating character.
55 It is therefore equal to the `size' parameter (for
Christian Heimes2c9c7a52008-05-26 13:42:13 +000056 PyBytes_FromStringAndSize()) or the length of the string in the `str'
57 parameter (for PyBytes_FromString()).
58*/
Victor Stinnerdb067af2014-05-02 22:31:14 +020059static PyObject *
60_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
Guido van Rossumd624f182006-04-24 13:47:05 +000061{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +020062 PyBytesObject *op;
Victor Stinnerdb067af2014-05-02 22:31:14 +020063 assert(size >= 0);
Victor Stinner049e5092014-08-17 22:20:00 +020064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +000066#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +000068#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 Py_INCREF(op);
70 return (PyObject *)op;
71 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +000072
Victor Stinner049e5092014-08-17 22:20:00 +020073 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 PyErr_SetString(PyExc_OverflowError,
75 "byte string is too large");
76 return NULL;
77 }
Neal Norwitz3ce5d922008-08-24 07:08:55 +000078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 /* Inline PyObject_NewVar */
Victor Stinnerdb067af2014-05-02 22:31:14 +020080 if (use_calloc)
81 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
82 else
83 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 if (op == NULL)
85 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +010086 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 op->ob_shash = -1;
Victor Stinnerdb067af2014-05-02 22:31:14 +020088 if (!use_calloc)
89 op->ob_sval[size] = '\0';
90 /* empty byte string singleton */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 if (size == 0) {
92 nullstring = op;
93 Py_INCREF(op);
Victor Stinnerdb067af2014-05-02 22:31:14 +020094 }
95 return (PyObject *) op;
96}
97
98PyObject *
99PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
100{
101 PyBytesObject *op;
102 if (size < 0) {
103 PyErr_SetString(PyExc_SystemError,
104 "Negative size passed to PyBytes_FromStringAndSize");
105 return NULL;
106 }
107 if (size == 1 && str != NULL &&
108 (op = characters[*str & UCHAR_MAX]) != NULL)
109 {
110#ifdef COUNT_ALLOCS
111 one_strings++;
112#endif
113 Py_INCREF(op);
114 return (PyObject *)op;
115 }
116
117 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
118 if (op == NULL)
119 return NULL;
120 if (str == NULL)
121 return (PyObject *) op;
122
123 Py_MEMCPY(op->ob_sval, str, size);
124 /* share short strings */
125 if (size == 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000126 characters[*str & UCHAR_MAX] = op;
127 Py_INCREF(op);
128 }
129 return (PyObject *) op;
Guido van Rossumd624f182006-04-24 13:47:05 +0000130}
131
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000132PyObject *
133PyBytes_FromString(const char *str)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000134{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +0200135 size_t size;
136 PyBytesObject *op;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 assert(str != NULL);
139 size = strlen(str);
140 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
141 PyErr_SetString(PyExc_OverflowError,
142 "byte string is too long");
143 return NULL;
144 }
145 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000146#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 null_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000148#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 Py_INCREF(op);
150 return (PyObject *)op;
151 }
152 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000153#ifdef COUNT_ALLOCS
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 one_strings++;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000155#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 Py_INCREF(op);
157 return (PyObject *)op;
158 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000160 /* Inline PyObject_NewVar */
161 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
162 if (op == NULL)
163 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +0100164 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 op->ob_shash = -1;
166 Py_MEMCPY(op->ob_sval, str, size+1);
167 /* share short strings */
168 if (size == 0) {
169 nullstring = op;
170 Py_INCREF(op);
171 } else if (size == 1) {
172 characters[*str & UCHAR_MAX] = op;
173 Py_INCREF(op);
174 }
175 return (PyObject *) op;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000176}
Guido van Rossumebea9be2007-04-09 00:49:13 +0000177
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000178PyObject *
179PyBytes_FromFormatV(const char *format, va_list vargs)
180{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 char *s;
Victor Stinner03dab782015-10-14 00:21:35 +0200182 const char *f;
183 const char *p;
184 Py_ssize_t prec;
185 int longflag;
186 int size_tflag;
187 /* Longest 64-bit formatted numbers:
188 - "18446744073709551615\0" (21 bytes)
189 - "-9223372036854775808\0" (21 bytes)
190 Decimal takes the most space (it isn't enough for octal.)
Guido van Rossum343e97f2007-04-09 00:43:24 +0000191
Victor Stinner03dab782015-10-14 00:21:35 +0200192 Longest 64-bit pointer representation:
193 "0xffffffffffffffff\0" (19 bytes). */
194 char buffer[21];
195 _PyBytesWriter writer;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000196
Victor Stinner03dab782015-10-14 00:21:35 +0200197 _PyBytesWriter_Init(&writer);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000198
Victor Stinner03dab782015-10-14 00:21:35 +0200199 s = _PyBytesWriter_Alloc(&writer, strlen(format));
200 if (s == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 return NULL;
Victor Stinner03dab782015-10-14 00:21:35 +0200202 writer.overallocate = 1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000203
Victor Stinner03dab782015-10-14 00:21:35 +0200204#define WRITE_BYTES(str) \
205 do { \
206 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
207 if (s == NULL) \
208 goto error; \
209 } while (0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000210
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 for (f = format; *f; f++) {
Victor Stinner03dab782015-10-14 00:21:35 +0200212 if (*f != '%') {
213 *s++ = *f;
214 continue;
215 }
216
217 p = f++;
218
219 /* ignore the width (ex: 10 in "%10s") */
220 while (Py_ISDIGIT(*f))
221 f++;
222
223 /* parse the precision (ex: 10 in "%.10s") */
224 prec = 0;
225 if (*f == '.') {
226 f++;
227 for (; Py_ISDIGIT(*f); f++) {
228 prec = (prec * 10) + (*f - '0');
229 }
230 }
231
232 while (*f && *f != '%' && !Py_ISALPHA(*f))
233 f++;
234
235 /* handle the long flag ('l'), but only for %ld and %lu.
236 others can be added when necessary. */
237 longflag = 0;
238 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
239 longflag = 1;
240 ++f;
241 }
242
243 /* handle the size_t flag ('z'). */
244 size_tflag = 0;
245 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
246 size_tflag = 1;
247 ++f;
248 }
249
250 /* substract bytes preallocated for the format string
251 (ex: 2 for "%s") */
252 writer.min_size -= (f - p + 1);
253
254 switch (*f) {
255 case 'c':
256 {
257 int c = va_arg(vargs, int);
258 if (c < 0 || c > 255) {
259 PyErr_SetString(PyExc_OverflowError,
260 "PyBytes_FromFormatV(): %c format "
261 "expects an integer in range [0; 255]");
262 goto error;
263 }
264 writer.min_size++;
265 *s++ = (unsigned char)c;
266 break;
267 }
268
269 case 'd':
270 if (longflag)
271 sprintf(buffer, "%ld", va_arg(vargs, long));
272 else if (size_tflag)
273 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
274 va_arg(vargs, Py_ssize_t));
275 else
276 sprintf(buffer, "%d", va_arg(vargs, int));
277 assert(strlen(buffer) < sizeof(buffer));
278 WRITE_BYTES(buffer);
279 break;
280
281 case 'u':
282 if (longflag)
283 sprintf(buffer, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(buffer, "%u",
290 va_arg(vargs, unsigned int));
291 assert(strlen(buffer) < sizeof(buffer));
292 WRITE_BYTES(buffer);
293 break;
294
295 case 'i':
296 sprintf(buffer, "%i", va_arg(vargs, int));
297 assert(strlen(buffer) < sizeof(buffer));
298 WRITE_BYTES(buffer);
299 break;
300
301 case 'x':
302 sprintf(buffer, "%x", va_arg(vargs, int));
303 assert(strlen(buffer) < sizeof(buffer));
304 WRITE_BYTES(buffer);
305 break;
306
307 case 's':
308 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000309 Py_ssize_t i;
Victor Stinner03dab782015-10-14 00:21:35 +0200310
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200311 p = va_arg(vargs, const char*);
Victor Stinner03dab782015-10-14 00:21:35 +0200312 i = strlen(p);
313 if (prec > 0 && i > prec)
314 i = prec;
315 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
316 if (s == NULL)
317 goto error;
318 break;
319 }
320
321 case 'p':
322 sprintf(buffer, "%p", va_arg(vargs, void*));
323 assert(strlen(buffer) < sizeof(buffer));
324 /* %p is ill-defined: ensure leading 0x. */
325 if (buffer[1] == 'X')
326 buffer[1] = 'x';
327 else if (buffer[1] != 'x') {
328 memmove(buffer+2, buffer, strlen(buffer)+1);
329 buffer[0] = '0';
330 buffer[1] = 'x';
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000331 }
Victor Stinner03dab782015-10-14 00:21:35 +0200332 WRITE_BYTES(buffer);
333 break;
334
335 case '%':
336 writer.min_size++;
337 *s++ = '%';
338 break;
339
340 default:
341 if (*f == 0) {
342 /* fix min_size if we reached the end of the format string */
343 writer.min_size++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000345
Victor Stinner03dab782015-10-14 00:21:35 +0200346 /* invalid format string: copy unformatted string and exit */
347 WRITE_BYTES(p);
348 return _PyBytesWriter_Finish(&writer, s);
349 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000351
Victor Stinner03dab782015-10-14 00:21:35 +0200352#undef WRITE_BYTES
353
354 return _PyBytesWriter_Finish(&writer, s);
355
356 error:
357 _PyBytesWriter_Dealloc(&writer);
358 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000359}
360
361PyObject *
362PyBytes_FromFormat(const char *format, ...)
363{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000364 PyObject* ret;
365 va_list vargs;
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000366
367#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000368 va_start(vargs, format);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000369#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 va_start(vargs);
Christian Heimes2c9c7a52008-05-26 13:42:13 +0000371#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 ret = PyBytes_FromFormatV(format, vargs);
373 va_end(vargs);
374 return ret;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000375}
376
Ethan Furmanb95b5612015-01-23 20:05:18 -0800377/* Helpers for formatstring */
378
379Py_LOCAL_INLINE(PyObject *)
380getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
381{
382 Py_ssize_t argidx = *p_argidx;
383 if (argidx < arglen) {
384 (*p_argidx)++;
385 if (arglen < 0)
386 return args;
387 else
388 return PyTuple_GetItem(args, argidx);
389 }
390 PyErr_SetString(PyExc_TypeError,
391 "not enough arguments for format string");
392 return NULL;
393}
394
395/* Format codes
396 * F_LJUST '-'
397 * F_SIGN '+'
398 * F_BLANK ' '
399 * F_ALT '#'
400 * F_ZERO '0'
401 */
402#define F_LJUST (1<<0)
403#define F_SIGN (1<<1)
404#define F_BLANK (1<<2)
405#define F_ALT (1<<3)
406#define F_ZERO (1<<4)
407
408/* Returns a new reference to a PyBytes object, or NULL on failure. */
409
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200410static char*
411formatfloat(PyObject *v, int flags, int prec, int type,
Victor Stinnerad771582015-10-09 12:38:53 +0200412 PyObject **p_result, _PyBytesWriter *writer, char *str)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800413{
414 char *p;
415 PyObject *result;
416 double x;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200417 size_t len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800418
419 x = PyFloat_AsDouble(v);
420 if (x == -1.0 && PyErr_Occurred()) {
421 PyErr_Format(PyExc_TypeError, "float argument required, "
422 "not %.200s", Py_TYPE(v)->tp_name);
423 return NULL;
424 }
425
426 if (prec < 0)
427 prec = 6;
428
429 p = PyOS_double_to_string(x, type, prec,
430 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
431
432 if (p == NULL)
433 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200434
435 len = strlen(p);
436 if (writer != NULL) {
Victor Stinnerad771582015-10-09 12:38:53 +0200437 str = _PyBytesWriter_Prepare(writer, str, len);
438 if (str == NULL)
439 return NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200440 Py_MEMCPY(str, p, len);
441 str += len;
442 return str;
443 }
444
445 result = PyBytes_FromStringAndSize(p, len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800446 PyMem_Free(p);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200447 *p_result = result;
448 return str;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800449}
450
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300451static PyObject *
452formatlong(PyObject *v, int flags, int prec, int type)
453{
454 PyObject *result, *iobj;
455 if (type == 'i')
456 type = 'd';
457 if (PyLong_Check(v))
458 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
459 if (PyNumber_Check(v)) {
460 /* make sure number is a type of integer for o, x, and X */
461 if (type == 'o' || type == 'x' || type == 'X')
462 iobj = PyNumber_Index(v);
463 else
464 iobj = PyNumber_Long(v);
465 if (iobj == NULL) {
466 if (!PyErr_ExceptionMatches(PyExc_TypeError))
467 return NULL;
468 }
469 else if (!PyLong_Check(iobj))
470 Py_CLEAR(iobj);
471 if (iobj != NULL) {
472 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
473 Py_DECREF(iobj);
474 return result;
475 }
476 }
477 PyErr_Format(PyExc_TypeError,
478 "%%%c format: %s is required, not %.200s", type,
479 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
480 : "a number",
481 Py_TYPE(v)->tp_name);
482 return NULL;
483}
484
485static int
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200486byte_converter(PyObject *arg, char *p)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800487{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200488 if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) {
489 *p = PyBytes_AS_STRING(arg)[0];
490 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800491 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200492 else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) {
493 *p = PyByteArray_AS_STRING(arg)[0];
494 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800495 }
496 else {
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300497 PyObject *iobj;
498 long ival;
499 int overflow;
500 /* make sure number is a type of integer */
501 if (PyLong_Check(arg)) {
502 ival = PyLong_AsLongAndOverflow(arg, &overflow);
503 }
504 else {
505 iobj = PyNumber_Index(arg);
506 if (iobj == NULL) {
507 if (!PyErr_ExceptionMatches(PyExc_TypeError))
508 return 0;
509 goto onError;
510 }
511 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
512 Py_DECREF(iobj);
513 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300514 if (!overflow && ival == -1 && PyErr_Occurred())
515 goto onError;
516 if (overflow || !(0 <= ival && ival <= 255)) {
517 PyErr_SetString(PyExc_OverflowError,
518 "%c arg not in range(256)");
519 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800520 }
Serhiy Storchaka41525e32015-04-03 20:53:46 +0300521 *p = (char)ival;
522 return 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800523 }
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300524 onError:
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200525 PyErr_SetString(PyExc_TypeError,
526 "%c requires an integer in range(256) or a single byte");
527 return 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800528}
529
530static PyObject *
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200531format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800532{
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200533 PyObject *func, *result;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800534 _Py_IDENTIFIER(__bytes__);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800535 /* is it a bytes object? */
536 if (PyBytes_Check(v)) {
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200537 *pbuf = PyBytes_AS_STRING(v);
538 *plen = PyBytes_GET_SIZE(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800539 Py_INCREF(v);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200540 return v;
541 }
542 if (PyByteArray_Check(v)) {
543 *pbuf = PyByteArray_AS_STRING(v);
544 *plen = PyByteArray_GET_SIZE(v);
545 Py_INCREF(v);
546 return v;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800547 }
548 /* does it support __bytes__? */
549 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
550 if (func != NULL) {
551 result = PyObject_CallFunctionObjArgs(func, NULL);
552 Py_DECREF(func);
553 if (result == NULL)
554 return NULL;
555 if (!PyBytes_Check(result)) {
556 PyErr_Format(PyExc_TypeError,
557 "__bytes__ returned non-bytes (type %.200s)",
558 Py_TYPE(result)->tp_name);
559 Py_DECREF(result);
560 return NULL;
561 }
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200562 *pbuf = PyBytes_AS_STRING(result);
563 *plen = PyBytes_GET_SIZE(result);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800564 return result;
565 }
566 PyErr_Format(PyExc_TypeError,
567 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
568 Py_TYPE(v)->tp_name);
569 return NULL;
570}
571
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200572/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800573
574PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +0200575_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
576 PyObject *args, int use_bytearray)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800577{
Victor Stinner772b2b02015-10-14 09:56:53 +0200578 const char *fmt;
579 char *res;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800580 Py_ssize_t arglen, argidx;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200581 Py_ssize_t fmtcnt;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800582 int args_owned = 0;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800583 PyObject *dict = NULL;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200584 _PyBytesWriter writer;
585
Victor Stinner772b2b02015-10-14 09:56:53 +0200586 if (args == NULL) {
Ethan Furmanb95b5612015-01-23 20:05:18 -0800587 PyErr_BadInternalCall();
588 return NULL;
589 }
Victor Stinner772b2b02015-10-14 09:56:53 +0200590 fmt = format;
591 fmtcnt = format_len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200592
593 _PyBytesWriter_Init(&writer);
Victor Stinner772b2b02015-10-14 09:56:53 +0200594 writer.use_bytearray = use_bytearray;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200595
596 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
597 if (res == NULL)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800598 return NULL;
Victor Stinner772b2b02015-10-14 09:56:53 +0200599 if (!use_bytearray)
600 writer.overallocate = 1;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200601
Ethan Furmanb95b5612015-01-23 20:05:18 -0800602 if (PyTuple_Check(args)) {
603 arglen = PyTuple_GET_SIZE(args);
604 argidx = 0;
605 }
606 else {
607 arglen = -1;
608 argidx = -2;
609 }
610 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
611 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
612 !PyByteArray_Check(args)) {
613 dict = args;
614 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200615
Ethan Furmanb95b5612015-01-23 20:05:18 -0800616 while (--fmtcnt >= 0) {
617 if (*fmt != '%') {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200618 Py_ssize_t len;
619 char *pos;
620
621 pos = strchr(fmt + 1, '%');
622 if (pos != NULL)
623 len = pos - fmt;
Victor Stinner772b2b02015-10-14 09:56:53 +0200624 else
625 len = format_len - (fmt - format);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200626 assert(len != 0);
627
628 Py_MEMCPY(res, fmt, len);
629 res += len;
630 fmt += len;
631 fmtcnt -= (len - 1);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800632 }
633 else {
634 /* Got a format specifier */
635 int flags = 0;
636 Py_ssize_t width = -1;
637 int prec = -1;
638 int c = '\0';
639 int fill;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800640 PyObject *v = NULL;
641 PyObject *temp = NULL;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200642 const char *pbuf = NULL;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800643 int sign;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200644 Py_ssize_t len = 0;
645 char onechar; /* For byte_converter() */
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200646 Py_ssize_t alloc;
647#ifdef Py_DEBUG
648 char *before;
649#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -0800650
Ethan Furmanb95b5612015-01-23 20:05:18 -0800651 fmt++;
652 if (*fmt == '(') {
Victor Stinner772b2b02015-10-14 09:56:53 +0200653 const char *keystart;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800654 Py_ssize_t keylen;
655 PyObject *key;
656 int pcount = 1;
657
658 if (dict == NULL) {
659 PyErr_SetString(PyExc_TypeError,
660 "format requires a mapping");
661 goto error;
662 }
663 ++fmt;
664 --fmtcnt;
665 keystart = fmt;
666 /* Skip over balanced parentheses */
667 while (pcount > 0 && --fmtcnt >= 0) {
668 if (*fmt == ')')
669 --pcount;
670 else if (*fmt == '(')
671 ++pcount;
672 fmt++;
673 }
674 keylen = fmt - keystart - 1;
675 if (fmtcnt < 0 || pcount > 0) {
676 PyErr_SetString(PyExc_ValueError,
677 "incomplete format key");
678 goto error;
679 }
680 key = PyBytes_FromStringAndSize(keystart,
681 keylen);
682 if (key == NULL)
683 goto error;
684 if (args_owned) {
685 Py_DECREF(args);
686 args_owned = 0;
687 }
688 args = PyObject_GetItem(dict, key);
689 Py_DECREF(key);
690 if (args == NULL) {
691 goto error;
692 }
693 args_owned = 1;
694 arglen = -1;
695 argidx = -2;
696 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200697
698 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800699 while (--fmtcnt >= 0) {
700 switch (c = *fmt++) {
701 case '-': flags |= F_LJUST; continue;
702 case '+': flags |= F_SIGN; continue;
703 case ' ': flags |= F_BLANK; continue;
704 case '#': flags |= F_ALT; continue;
705 case '0': flags |= F_ZERO; continue;
706 }
707 break;
708 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200709
710 /* Parse width. Example: "%10s" => width=10 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800711 if (c == '*') {
712 v = getnextarg(args, arglen, &argidx);
713 if (v == NULL)
714 goto error;
715 if (!PyLong_Check(v)) {
716 PyErr_SetString(PyExc_TypeError,
717 "* wants int");
718 goto error;
719 }
720 width = PyLong_AsSsize_t(v);
721 if (width == -1 && PyErr_Occurred())
722 goto error;
723 if (width < 0) {
724 flags |= F_LJUST;
725 width = -width;
726 }
727 if (--fmtcnt >= 0)
728 c = *fmt++;
729 }
730 else if (c >= 0 && isdigit(c)) {
731 width = c - '0';
732 while (--fmtcnt >= 0) {
733 c = Py_CHARMASK(*fmt++);
734 if (!isdigit(c))
735 break;
736 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
737 PyErr_SetString(
738 PyExc_ValueError,
739 "width too big");
740 goto error;
741 }
742 width = width*10 + (c - '0');
743 }
744 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200745
746 /* Parse precision. Example: "%.3f" => prec=3 */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800747 if (c == '.') {
748 prec = 0;
749 if (--fmtcnt >= 0)
750 c = *fmt++;
751 if (c == '*') {
752 v = getnextarg(args, arglen, &argidx);
753 if (v == NULL)
754 goto error;
755 if (!PyLong_Check(v)) {
756 PyErr_SetString(
757 PyExc_TypeError,
758 "* wants int");
759 goto error;
760 }
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200761 prec = _PyLong_AsInt(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800762 if (prec == -1 && PyErr_Occurred())
763 goto error;
764 if (prec < 0)
765 prec = 0;
766 if (--fmtcnt >= 0)
767 c = *fmt++;
768 }
769 else if (c >= 0 && isdigit(c)) {
770 prec = c - '0';
771 while (--fmtcnt >= 0) {
772 c = Py_CHARMASK(*fmt++);
773 if (!isdigit(c))
774 break;
775 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
776 PyErr_SetString(
777 PyExc_ValueError,
778 "prec too big");
779 goto error;
780 }
781 prec = prec*10 + (c - '0');
782 }
783 }
784 } /* prec */
785 if (fmtcnt >= 0) {
786 if (c == 'h' || c == 'l' || c == 'L') {
787 if (--fmtcnt >= 0)
788 c = *fmt++;
789 }
790 }
791 if (fmtcnt < 0) {
792 PyErr_SetString(PyExc_ValueError,
793 "incomplete format");
794 goto error;
795 }
796 if (c != '%') {
797 v = getnextarg(args, arglen, &argidx);
798 if (v == NULL)
799 goto error;
800 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200801
802 if (fmtcnt < 0) {
803 /* last writer: disable writer overallocation */
804 writer.overallocate = 0;
805 }
806
Ethan Furmanb95b5612015-01-23 20:05:18 -0800807 sign = 0;
808 fill = ' ';
809 switch (c) {
810 case '%':
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200811 *res++ = '%';
812 continue;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200813
Ethan Furman62e977f2015-03-11 08:17:00 -0700814 case 'r':
815 // %r is only for 2/3 code; 3 only code should use %a
Ethan Furmanb95b5612015-01-23 20:05:18 -0800816 case 'a':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200817 temp = PyObject_ASCII(v);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800818 if (temp == NULL)
819 goto error;
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200820 assert(PyUnicode_IS_ASCII(temp));
821 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
822 len = PyUnicode_GET_LENGTH(temp);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800823 if (prec >= 0 && len > prec)
824 len = prec;
825 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200826
Ethan Furmanb95b5612015-01-23 20:05:18 -0800827 case 's':
828 // %s is only for 2/3 code; 3 only code should use %b
829 case 'b':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200830 temp = format_obj(v, &pbuf, &len);
Ethan Furmanb95b5612015-01-23 20:05:18 -0800831 if (temp == NULL)
832 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800833 if (prec >= 0 && len > prec)
834 len = prec;
835 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200836
Ethan Furmanb95b5612015-01-23 20:05:18 -0800837 case 'i':
838 case 'd':
839 case 'u':
840 case 'o':
841 case 'x':
842 case 'X':
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200843 if (PyLong_CheckExact(v)
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200844 && width == -1 && prec == -1
845 && !(flags & (F_SIGN | F_BLANK))
846 && c != 'X')
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200847 {
848 /* Fast path */
849 int alternate = flags & F_ALT;
850 int base;
851
852 switch(c)
853 {
854 default:
855 assert(0 && "'type' not in [diuoxX]");
856 case 'd':
857 case 'i':
858 case 'u':
859 base = 10;
860 break;
861 case 'o':
862 base = 8;
863 break;
864 case 'x':
865 case 'X':
866 base = 16;
867 break;
868 }
869
870 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200871 writer.min_size -= 2; /* size preallocated for "%d" */
Victor Stinnerbe75b8c2015-10-09 22:43:24 +0200872 res = _PyLong_FormatBytesWriter(&writer, res,
873 v, base, alternate);
874 if (res == NULL)
875 goto error;
876 continue;
877 }
878
Serhiy Storchaka2c7b5a92015-03-30 09:19:08 +0300879 temp = formatlong(v, flags, prec, c);
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200880 if (!temp)
881 goto error;
882 assert(PyUnicode_IS_ASCII(temp));
883 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
884 len = PyUnicode_GET_LENGTH(temp);
885 sign = 1;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800886 if (flags & F_ZERO)
887 fill = '0';
888 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200889
Ethan Furmanb95b5612015-01-23 20:05:18 -0800890 case 'e':
891 case 'E':
892 case 'f':
893 case 'F':
894 case 'g':
895 case 'G':
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200896 if (width == -1 && prec == -1
897 && !(flags & (F_SIGN | F_BLANK)))
898 {
899 /* Fast path */
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200900 writer.min_size -= 2; /* size preallocated for "%f" */
Victor Stinnerad771582015-10-09 12:38:53 +0200901 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200902 if (res == NULL)
903 goto error;
904 continue;
905 }
906
Victor Stinnerad771582015-10-09 12:38:53 +0200907 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
Ethan Furmanb95b5612015-01-23 20:05:18 -0800908 goto error;
909 pbuf = PyBytes_AS_STRING(temp);
910 len = PyBytes_GET_SIZE(temp);
911 sign = 1;
912 if (flags & F_ZERO)
913 fill = '0';
914 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200915
Ethan Furmanb95b5612015-01-23 20:05:18 -0800916 case 'c':
Serhiy Storchakaea5ce5a2015-02-10 23:23:12 +0200917 pbuf = &onechar;
918 len = byte_converter(v, &onechar);
919 if (!len)
Ethan Furmanb95b5612015-01-23 20:05:18 -0800920 goto error;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200921 if (width == -1) {
922 /* Fast path */
923 *res++ = onechar;
924 continue;
925 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800926 break;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200927
Ethan Furmanb95b5612015-01-23 20:05:18 -0800928 default:
929 PyErr_Format(PyExc_ValueError,
930 "unsupported format character '%c' (0x%x) "
931 "at index %zd",
932 c, c,
Victor Stinner772b2b02015-10-14 09:56:53 +0200933 (Py_ssize_t)(fmt - 1 - format));
Ethan Furmanb95b5612015-01-23 20:05:18 -0800934 goto error;
935 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200936
Ethan Furmanb95b5612015-01-23 20:05:18 -0800937 if (sign) {
938 if (*pbuf == '-' || *pbuf == '+') {
939 sign = *pbuf++;
940 len--;
941 }
942 else if (flags & F_SIGN)
943 sign = '+';
944 else if (flags & F_BLANK)
945 sign = ' ';
946 else
947 sign = 0;
948 }
949 if (width < len)
950 width = len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200951
952 alloc = width;
953 if (sign != 0 && len == width)
954 alloc++;
Victor Stinner0cdad1e2015-10-09 22:50:36 +0200955 /* 2: size preallocated for %s */
956 if (alloc > 2) {
957 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200958 if (res == NULL)
959 goto error;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800960 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200961#ifdef Py_DEBUG
962 before = res;
963#endif
964
965 /* Write the sign if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800966 if (sign) {
967 if (fill != ' ')
968 *res++ = sign;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800969 if (width > len)
970 width--;
971 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200972
973 /* Write the numeric prefix for "x", "X" and "o" formats
974 if the alternate form is used.
975 For example, write "0x" for the "%#x" format. */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800976 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
977 assert(pbuf[0] == '0');
978 assert(pbuf[1] == c);
979 if (fill != ' ') {
980 *res++ = *pbuf++;
981 *res++ = *pbuf++;
982 }
Ethan Furmanb95b5612015-01-23 20:05:18 -0800983 width -= 2;
984 if (width < 0)
985 width = 0;
986 len -= 2;
987 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200988
989 /* Pad left with the fill character if needed */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800990 if (width > len && !(flags & F_LJUST)) {
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200991 memset(res, fill, width - len);
992 res += (width - len);
993 width = len;
Ethan Furmanb95b5612015-01-23 20:05:18 -0800994 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +0200995
996 /* If padding with spaces: write sign if needed and/or numeric
997 prefix if the alternate form is used */
Ethan Furmanb95b5612015-01-23 20:05:18 -0800998 if (fill == ' ') {
999 if (sign)
1000 *res++ = sign;
1001 if ((flags & F_ALT) &&
1002 (c == 'x' || c == 'X')) {
1003 assert(pbuf[0] == '0');
1004 assert(pbuf[1] == c);
1005 *res++ = *pbuf++;
1006 *res++ = *pbuf++;
1007 }
1008 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001009
1010 /* Copy bytes */
Ethan Furmanb95b5612015-01-23 20:05:18 -08001011 Py_MEMCPY(res, pbuf, len);
1012 res += len;
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001013
1014 /* Pad right with the fill character if needed */
1015 if (width > len) {
1016 memset(res, ' ', width - len);
1017 res += (width - len);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001018 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001019
Ethan Furmanb95b5612015-01-23 20:05:18 -08001020 if (dict && (argidx < arglen) && c != '%') {
1021 PyErr_SetString(PyExc_TypeError,
1022 "not all arguments converted during bytes formatting");
Ethan Furmanb95b5612015-01-23 20:05:18 -08001023 Py_XDECREF(temp);
1024 goto error;
1025 }
Ethan Furmanb95b5612015-01-23 20:05:18 -08001026 Py_XDECREF(temp);
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001027
1028#ifdef Py_DEBUG
1029 /* check that we computed the exact size for this write */
1030 assert((res - before) == alloc);
1031#endif
Ethan Furmanb95b5612015-01-23 20:05:18 -08001032 } /* '%' */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001033
1034 /* If overallocation was disabled, ensure that it was the last
1035 write. Otherwise, we missed an optimization */
Victor Stinner772b2b02015-10-14 09:56:53 +02001036 assert(writer.overallocate || fmtcnt < 0 || use_bytearray);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001037 } /* until end */
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001038
Ethan Furmanb95b5612015-01-23 20:05:18 -08001039 if (argidx < arglen && !dict) {
1040 PyErr_SetString(PyExc_TypeError,
1041 "not all arguments converted during bytes formatting");
1042 goto error;
1043 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001044
Ethan Furmanb95b5612015-01-23 20:05:18 -08001045 if (args_owned) {
1046 Py_DECREF(args);
1047 }
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001048 return _PyBytesWriter_Finish(&writer, res);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001049
1050 error:
Victor Stinnerfa7762e2015-10-09 11:48:06 +02001051 _PyBytesWriter_Dealloc(&writer);
Ethan Furmanb95b5612015-01-23 20:05:18 -08001052 if (args_owned) {
1053 Py_DECREF(args);
1054 }
1055 return NULL;
1056}
1057
1058/* =-= */
1059
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001060static void
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001061bytes_dealloc(PyObject *op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001062{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001063 Py_TYPE(op)->tp_free(op);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001064}
1065
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001066/* Unescape a backslash-escaped string. If unicode is non-zero,
1067 the string is a u-literal. If recode_encoding is non-zero,
1068 the string is UTF-8 encoded and should be re-encoded in the
1069 specified encoding. */
1070
Victor Stinner2ec80632015-10-14 13:32:13 +02001071static char *
1072_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1073 const char *errors, const char *recode_encoding,
1074 _PyBytesWriter *writer, char *p)
1075{
1076 PyObject *u, *w;
1077 const char* t;
1078
1079 t = *s;
1080 /* Decode non-ASCII bytes as UTF-8. */
1081 while (t < end && (*t & 0x80))
1082 t++;
1083 u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1084 if (u == NULL)
1085 return NULL;
1086
1087 /* Recode them in target encoding. */
1088 w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1089 Py_DECREF(u);
1090 if (w == NULL)
1091 return NULL;
1092 assert(PyBytes_Check(w));
1093
1094 /* Append bytes to output buffer. */
1095 writer->min_size--; /* substract 1 preallocated byte */
1096 p = _PyBytesWriter_WriteBytes(writer, p,
1097 PyBytes_AS_STRING(w),
1098 PyBytes_GET_SIZE(w));
1099 Py_DECREF(w);
1100 if (p == NULL)
1101 return NULL;
1102
1103 *s = t;
1104 return p;
1105}
1106
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001107PyObject *PyBytes_DecodeEscape(const char *s,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 Py_ssize_t len,
1109 const char *errors,
1110 Py_ssize_t unicode,
1111 const char *recode_encoding)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001112{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001113 int c;
Victor Stinner2ec80632015-10-14 13:32:13 +02001114 char *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001115 const char *end;
Victor Stinner2ec80632015-10-14 13:32:13 +02001116 _PyBytesWriter writer;
1117
1118 _PyBytesWriter_Init(&writer);
1119
1120 p = _PyBytesWriter_Alloc(&writer, len);
1121 if (p == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001122 return NULL;
Victor Stinner2ec80632015-10-14 13:32:13 +02001123 writer.overallocate = 1;
1124
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001125 end = s + len;
1126 while (s < end) {
1127 if (*s != '\\') {
1128 non_esc:
Victor Stinner2ec80632015-10-14 13:32:13 +02001129 if (!(recode_encoding && (*s & 0x80))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001130 *p++ = *s++;
1131 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001132 else {
1133 /* non-ASCII character and need to recode */
1134 p = _PyBytes_DecodeEscapeRecode(&s, end,
1135 errors, recode_encoding,
1136 &writer, p);
1137 if (p == NULL)
1138 goto failed;
1139 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001140 continue;
1141 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001143 s++;
Victor Stinner2ec80632015-10-14 13:32:13 +02001144 if (s == end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001145 PyErr_SetString(PyExc_ValueError,
1146 "Trailing \\ in string");
1147 goto failed;
1148 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001149
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001150 switch (*s++) {
1151 /* XXX This assumes ASCII! */
1152 case '\n': break;
1153 case '\\': *p++ = '\\'; break;
1154 case '\'': *p++ = '\''; break;
1155 case '\"': *p++ = '\"'; break;
1156 case 'b': *p++ = '\b'; break;
1157 case 'f': *p++ = '\014'; break; /* FF */
1158 case 't': *p++ = '\t'; break;
1159 case 'n': *p++ = '\n'; break;
1160 case 'r': *p++ = '\r'; break;
1161 case 'v': *p++ = '\013'; break; /* VT */
1162 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1163 case '0': case '1': case '2': case '3':
1164 case '4': case '5': case '6': case '7':
1165 c = s[-1] - '0';
1166 if (s < end && '0' <= *s && *s <= '7') {
1167 c = (c<<3) + *s++ - '0';
1168 if (s < end && '0' <= *s && *s <= '7')
1169 c = (c<<3) + *s++ - '0';
1170 }
1171 *p++ = c;
1172 break;
1173 case 'x':
Victor Stinner2ec80632015-10-14 13:32:13 +02001174 if (s+1 < end) {
1175 int digit1, digit2;
1176 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1177 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1178 if (digit1 < 16 && digit2 < 16) {
1179 *p++ = (unsigned char)((digit1 << 4) + digit2);
1180 s += 2;
1181 break;
1182 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001183 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001184 /* invalid hexadecimal digits */
1185
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001186 if (!errors || strcmp(errors, "strict") == 0) {
Serhiy Storchaka5e61f142013-02-10 17:36:00 +02001187 PyErr_Format(PyExc_ValueError,
1188 "invalid \\x escape at position %d",
Serhiy Storchaka801d9552013-02-10 17:42:01 +02001189 s - 2 - (end - len));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001190 goto failed;
1191 }
1192 if (strcmp(errors, "replace") == 0) {
1193 *p++ = '?';
1194 } else if (strcmp(errors, "ignore") == 0)
1195 /* do nothing */;
1196 else {
1197 PyErr_Format(PyExc_ValueError,
1198 "decoding error; unknown "
1199 "error handling code: %.400s",
1200 errors);
1201 goto failed;
1202 }
Serhiy Storchakaace3ad32013-01-25 23:31:43 +02001203 /* skip \x */
1204 if (s < end && Py_ISXDIGIT(s[0]))
1205 s++; /* and a hexdigit */
1206 break;
Victor Stinner2ec80632015-10-14 13:32:13 +02001207
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001208 default:
1209 *p++ = '\\';
1210 s--;
Ezio Melotti42da6632011-03-15 05:18:48 +02001211 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001212 UTF-8 bytes may follow. */
1213 }
1214 }
Victor Stinner2ec80632015-10-14 13:32:13 +02001215
1216 return _PyBytesWriter_Finish(&writer, p);
1217
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001218 failed:
Victor Stinner2ec80632015-10-14 13:32:13 +02001219 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001220 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001221}
1222
1223/* -------------------------------------------------------------------- */
1224/* object api */
1225
1226Py_ssize_t
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001227PyBytes_Size(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001228{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001229 if (!PyBytes_Check(op)) {
1230 PyErr_Format(PyExc_TypeError,
1231 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1232 return -1;
1233 }
1234 return Py_SIZE(op);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001235}
1236
1237char *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001238PyBytes_AsString(PyObject *op)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001239{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001240 if (!PyBytes_Check(op)) {
1241 PyErr_Format(PyExc_TypeError,
1242 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1243 return NULL;
1244 }
1245 return ((PyBytesObject *)op)->ob_sval;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001246}
1247
1248int
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001249PyBytes_AsStringAndSize(PyObject *obj,
1250 char **s,
1251 Py_ssize_t *len)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001252{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001253 if (s == NULL) {
1254 PyErr_BadInternalCall();
1255 return -1;
1256 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001257
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001258 if (!PyBytes_Check(obj)) {
1259 PyErr_Format(PyExc_TypeError,
1260 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1261 return -1;
1262 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001263
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001264 *s = PyBytes_AS_STRING(obj);
1265 if (len != NULL)
1266 *len = PyBytes_GET_SIZE(obj);
1267 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
Serhiy Storchakad8a14472014-09-06 20:07:17 +03001268 PyErr_SetString(PyExc_ValueError,
1269 "embedded null byte");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001270 return -1;
1271 }
1272 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001273}
Neal Norwitz6968b052007-02-27 19:02:19 +00001274
1275/* -------------------------------------------------------------------- */
1276/* Methods */
1277
Eric Smith0923d1d2009-04-16 20:16:10 +00001278#include "stringlib/stringdefs.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001279
1280#include "stringlib/fastsearch.h"
1281#include "stringlib/count.h"
1282#include "stringlib/find.h"
Antoine Pitroucfc22b42012-10-16 21:07:23 +02001283#include "stringlib/join.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001284#include "stringlib/partition.h"
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001285#include "stringlib/split.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001286#include "stringlib/ctype.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001287
Eric Smith0f78bff2009-11-30 01:01:42 +00001288#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001289
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001290PyObject *
1291PyBytes_Repr(PyObject *obj, int smartquotes)
Neal Norwitz6968b052007-02-27 19:02:19 +00001292{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001293 PyBytesObject* op = (PyBytesObject*) obj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001294 Py_ssize_t i, length = Py_SIZE(op);
Benjamin Petersond48bc942014-09-29 19:12:26 -04001295 Py_ssize_t newsize, squotes, dquotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001296 PyObject *v;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001297 unsigned char quote, *s, *p;
1298
1299 /* Compute size of output string */
1300 squotes = dquotes = 0;
1301 newsize = 3; /* b'' */
1302 s = (unsigned char*)op->ob_sval;
1303 for (i = 0; i < length; i++) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001304 Py_ssize_t incr = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001305 switch(s[i]) {
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001306 case '\'': squotes++; break;
1307 case '"': dquotes++; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001308 case '\\': case '\t': case '\n': case '\r':
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001309 incr = 2; break; /* \C */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001310 default:
1311 if (s[i] < ' ' || s[i] >= 0x7f)
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001312 incr = 4; /* \xHH */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001313 }
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001314 if (newsize > PY_SSIZE_T_MAX - incr)
1315 goto overflow;
1316 newsize += incr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001317 }
1318 quote = '\'';
1319 if (smartquotes && squotes && !dquotes)
1320 quote = '"';
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001321 if (squotes && quote == '\'') {
1322 if (newsize > PY_SSIZE_T_MAX - squotes)
1323 goto overflow;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001324 newsize += squotes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001325 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001326
1327 v = PyUnicode_New(newsize, 127);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001328 if (v == NULL) {
1329 return NULL;
1330 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001331 p = PyUnicode_1BYTE_DATA(v);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001332
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001333 *p++ = 'b', *p++ = quote;
1334 for (i = 0; i < length; i++) {
1335 unsigned char c = op->ob_sval[i];
1336 if (c == quote || c == '\\')
1337 *p++ = '\\', *p++ = c;
1338 else if (c == '\t')
1339 *p++ = '\\', *p++ = 't';
1340 else if (c == '\n')
1341 *p++ = '\\', *p++ = 'n';
1342 else if (c == '\r')
1343 *p++ = '\\', *p++ = 'r';
1344 else if (c < ' ' || c >= 0x7f) {
1345 *p++ = '\\';
1346 *p++ = 'x';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001347 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1348 *p++ = Py_hexdigits[c & 0xf];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001350 else
1351 *p++ = c;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001352 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001353 *p++ = quote;
Victor Stinner8f825062012-04-27 13:55:39 +02001354 assert(_PyUnicode_CheckConsistency(v, 1));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001355 return v;
Benjamin Peterson42ff1052014-09-29 19:01:18 -04001356
1357 overflow:
1358 PyErr_SetString(PyExc_OverflowError,
1359 "bytes object is too large to make repr");
1360 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001361}
1362
Neal Norwitz6968b052007-02-27 19:02:19 +00001363static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001364bytes_repr(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001365{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001366 return PyBytes_Repr(op, 1);
Neal Norwitz6968b052007-02-27 19:02:19 +00001367}
1368
Neal Norwitz6968b052007-02-27 19:02:19 +00001369static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001370bytes_str(PyObject *op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001371{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001372 if (Py_BytesWarningFlag) {
1373 if (PyErr_WarnEx(PyExc_BytesWarning,
1374 "str() on a bytes instance", 1))
1375 return NULL;
1376 }
1377 return bytes_repr(op);
Neal Norwitz6968b052007-02-27 19:02:19 +00001378}
1379
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001380static Py_ssize_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001381bytes_length(PyBytesObject *a)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001382{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001383 return Py_SIZE(a);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001384}
Neal Norwitz6968b052007-02-27 19:02:19 +00001385
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001386/* This is also used by PyBytes_Concat() */
1387static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001388bytes_concat(PyObject *a, PyObject *b)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001389{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 Py_ssize_t size;
1391 Py_buffer va, vb;
1392 PyObject *result = NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001393
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 va.len = -1;
1395 vb.len = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001396 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1397 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001398 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1399 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1400 goto done;
1401 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001402
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001403 /* Optimize end cases */
1404 if (va.len == 0 && PyBytes_CheckExact(b)) {
1405 result = b;
1406 Py_INCREF(result);
1407 goto done;
1408 }
1409 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1410 result = a;
1411 Py_INCREF(result);
1412 goto done;
1413 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001414
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 size = va.len + vb.len;
1416 if (size < 0) {
1417 PyErr_NoMemory();
1418 goto done;
1419 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001420
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001421 result = PyBytes_FromStringAndSize(NULL, size);
1422 if (result != NULL) {
1423 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1424 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1425 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001426
1427 done:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 if (va.len != -1)
1429 PyBuffer_Release(&va);
1430 if (vb.len != -1)
1431 PyBuffer_Release(&vb);
1432 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001433}
Neal Norwitz6968b052007-02-27 19:02:19 +00001434
1435static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001436bytes_repeat(PyBytesObject *a, Py_ssize_t n)
Neal Norwitz6968b052007-02-27 19:02:19 +00001437{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001438 Py_ssize_t i;
1439 Py_ssize_t j;
1440 Py_ssize_t size;
1441 PyBytesObject *op;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 size_t nbytes;
1443 if (n < 0)
1444 n = 0;
1445 /* watch out for overflows: the size can overflow int,
1446 * and the # of bytes needed can overflow size_t
1447 */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001448 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001449 PyErr_SetString(PyExc_OverflowError,
1450 "repeated bytes are too long");
1451 return NULL;
1452 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00001453 size = Py_SIZE(a) * n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001454 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1455 Py_INCREF(a);
1456 return (PyObject *)a;
1457 }
1458 nbytes = (size_t)size;
1459 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1460 PyErr_SetString(PyExc_OverflowError,
1461 "repeated bytes are too long");
1462 return NULL;
1463 }
1464 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1465 if (op == NULL)
1466 return PyErr_NoMemory();
Christian Heimesd3afe782013-12-04 09:27:47 +01001467 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001468 op->ob_shash = -1;
1469 op->ob_sval[size] = '\0';
1470 if (Py_SIZE(a) == 1 && n > 0) {
1471 memset(op->ob_sval, a->ob_sval[0] , n);
1472 return (PyObject *) op;
1473 }
1474 i = 0;
1475 if (i < size) {
1476 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1477 i = Py_SIZE(a);
1478 }
1479 while (i < size) {
1480 j = (i <= size-i) ? i : size-i;
1481 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1482 i += j;
1483 }
1484 return (PyObject *) op;
Neal Norwitz6968b052007-02-27 19:02:19 +00001485}
1486
Guido van Rossum98297ee2007-11-06 21:34:58 +00001487static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001488bytes_contains(PyObject *self, PyObject *arg)
Guido van Rossum98297ee2007-11-06 21:34:58 +00001489{
1490 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1491 if (ival == -1 && PyErr_Occurred()) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001492 Py_buffer varg;
Antoine Pitrou0010d372010-08-15 17:12:55 +00001493 Py_ssize_t pos;
Antoine Pitroud1188562010-06-09 16:38:55 +00001494 PyErr_Clear();
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001495 if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
Antoine Pitroud1188562010-06-09 16:38:55 +00001496 return -1;
1497 pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
1498 varg.buf, varg.len, 0);
1499 PyBuffer_Release(&varg);
1500 return pos >= 0;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001501 }
1502 if (ival < 0 || ival >= 256) {
Antoine Pitroud1188562010-06-09 16:38:55 +00001503 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1504 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001505 }
1506
Antoine Pitrou0010d372010-08-15 17:12:55 +00001507 return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001508}
1509
Neal Norwitz6968b052007-02-27 19:02:19 +00001510static PyObject *
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02001511bytes_item(PyBytesObject *a, Py_ssize_t i)
Neal Norwitz6968b052007-02-27 19:02:19 +00001512{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001513 if (i < 0 || i >= Py_SIZE(a)) {
1514 PyErr_SetString(PyExc_IndexError, "index out of range");
1515 return NULL;
1516 }
1517 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
Neal Norwitz6968b052007-02-27 19:02:19 +00001518}
1519
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001520Py_LOCAL(int)
1521bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1522{
1523 int cmp;
1524 Py_ssize_t len;
1525
1526 len = Py_SIZE(a);
1527 if (Py_SIZE(b) != len)
1528 return 0;
1529
1530 if (a->ob_sval[0] != b->ob_sval[0])
1531 return 0;
1532
1533 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1534 return (cmp == 0);
1535}
1536
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001537static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001538bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
Neal Norwitz6968b052007-02-27 19:02:19 +00001539{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001540 int c;
1541 Py_ssize_t len_a, len_b;
1542 Py_ssize_t min_len;
1543 PyObject *result;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001544 int rc;
Neal Norwitz6968b052007-02-27 19:02:19 +00001545
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001546 /* Make sure both arguments are strings. */
1547 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001548 if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001549 rc = PyObject_IsInstance((PyObject*)a,
1550 (PyObject*)&PyUnicode_Type);
1551 if (!rc)
1552 rc = PyObject_IsInstance((PyObject*)b,
1553 (PyObject*)&PyUnicode_Type);
1554 if (rc < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001555 return NULL;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001556 if (rc) {
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001557 if (PyErr_WarnEx(PyExc_BytesWarning,
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001558 "Comparison between bytes and string", 1))
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001559 return NULL;
1560 }
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001561 else {
1562 rc = PyObject_IsInstance((PyObject*)a,
1563 (PyObject*)&PyLong_Type);
1564 if (!rc)
1565 rc = PyObject_IsInstance((PyObject*)b,
1566 (PyObject*)&PyLong_Type);
1567 if (rc < 0)
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001568 return NULL;
Serhiy Storchakaac5569b2015-05-30 17:48:19 +03001569 if (rc) {
1570 if (PyErr_WarnEx(PyExc_BytesWarning,
1571 "Comparison between bytes and int", 1))
1572 return NULL;
1573 }
Serhiy Storchaka1dd49822015-03-20 16:54:57 +02001574 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001575 }
1576 result = Py_NotImplemented;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001577 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001578 else if (a == b) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001579 switch (op) {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001580 case Py_EQ:
1581 case Py_LE:
1582 case Py_GE:
1583 /* a string is equal to itself */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001584 result = Py_True;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001585 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001586 case Py_NE:
1587 case Py_LT:
1588 case Py_GT:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001589 result = Py_False;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001590 break;
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001591 default:
1592 PyErr_BadArgument();
1593 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001594 }
1595 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001596 else if (op == Py_EQ || op == Py_NE) {
1597 int eq = bytes_compare_eq(a, b);
1598 eq ^= (op == Py_NE);
1599 result = eq ? Py_True : Py_False;
1600 }
1601 else {
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001602 len_a = Py_SIZE(a);
1603 len_b = Py_SIZE(b);
1604 min_len = Py_MIN(len_a, len_b);
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001605 if (min_len > 0) {
1606 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001607 if (c == 0)
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001608 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001609 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001610 else
1611 c = 0;
1612 if (c == 0)
1613 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1614 switch (op) {
1615 case Py_LT: c = c < 0; break;
1616 case Py_LE: c = c <= 0; break;
1617 case Py_GT: c = c > 0; break;
1618 case Py_GE: c = c >= 0; break;
1619 default:
Victor Stinnerfd9e44d2013-11-04 11:23:05 +01001620 PyErr_BadArgument();
1621 return NULL;
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001622 }
1623 result = c ? Py_True : Py_False;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001624 }
Victor Stinnerc8bc5372013-11-04 11:08:10 +01001625
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001626 Py_INCREF(result);
1627 return result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001628}
1629
Benjamin Peterson8f67d082010-10-17 20:54:53 +00001630static Py_hash_t
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001631bytes_hash(PyBytesObject *a)
Neal Norwitz6968b052007-02-27 19:02:19 +00001632{
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001633 if (a->ob_shash == -1) {
1634 /* Can't fail */
Christian Heimes985ecdc2013-11-20 11:46:18 +01001635 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
Antoine Pitrouce4a9da2011-11-21 20:46:33 +01001636 }
1637 return a->ob_shash;
Neal Norwitz6968b052007-02-27 19:02:19 +00001638}
1639
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001640static PyObject*
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001641bytes_subscript(PyBytesObject* self, PyObject* item)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001642{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643 if (PyIndex_Check(item)) {
1644 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1645 if (i == -1 && PyErr_Occurred())
1646 return NULL;
1647 if (i < 0)
1648 i += PyBytes_GET_SIZE(self);
1649 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1650 PyErr_SetString(PyExc_IndexError,
1651 "index out of range");
1652 return NULL;
1653 }
1654 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1655 }
1656 else if (PySlice_Check(item)) {
1657 Py_ssize_t start, stop, step, slicelength, cur, i;
1658 char* source_buf;
1659 char* result_buf;
1660 PyObject* result;
Neal Norwitz6968b052007-02-27 19:02:19 +00001661
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001662 if (PySlice_GetIndicesEx(item,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001663 PyBytes_GET_SIZE(self),
1664 &start, &stop, &step, &slicelength) < 0) {
1665 return NULL;
1666 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001667
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001668 if (slicelength <= 0) {
1669 return PyBytes_FromStringAndSize("", 0);
1670 }
1671 else if (start == 0 && step == 1 &&
1672 slicelength == PyBytes_GET_SIZE(self) &&
1673 PyBytes_CheckExact(self)) {
1674 Py_INCREF(self);
1675 return (PyObject *)self;
1676 }
1677 else if (step == 1) {
1678 return PyBytes_FromStringAndSize(
1679 PyBytes_AS_STRING(self) + start,
1680 slicelength);
1681 }
1682 else {
1683 source_buf = PyBytes_AS_STRING(self);
1684 result = PyBytes_FromStringAndSize(NULL, slicelength);
1685 if (result == NULL)
1686 return NULL;
Neal Norwitz6968b052007-02-27 19:02:19 +00001687
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001688 result_buf = PyBytes_AS_STRING(result);
1689 for (cur = start, i = 0; i < slicelength;
1690 cur += step, i++) {
1691 result_buf[i] = source_buf[cur];
1692 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001693
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001694 return result;
1695 }
1696 }
1697 else {
1698 PyErr_Format(PyExc_TypeError,
Terry Jan Reedyffff1442014-08-02 01:30:37 -04001699 "byte indices must be integers or slices, not %.200s",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001700 Py_TYPE(item)->tp_name);
1701 return NULL;
1702 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001703}
1704
1705static int
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001706bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001707{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001708 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1709 1, flags);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001710}
1711
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001712static PySequenceMethods bytes_as_sequence = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001713 (lenfunc)bytes_length, /*sq_length*/
1714 (binaryfunc)bytes_concat, /*sq_concat*/
1715 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1716 (ssizeargfunc)bytes_item, /*sq_item*/
1717 0, /*sq_slice*/
1718 0, /*sq_ass_item*/
1719 0, /*sq_ass_slice*/
1720 (objobjproc)bytes_contains /*sq_contains*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001721};
1722
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001723static PyMappingMethods bytes_as_mapping = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001724 (lenfunc)bytes_length,
1725 (binaryfunc)bytes_subscript,
1726 0,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001727};
1728
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001729static PyBufferProcs bytes_as_buffer = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001730 (getbufferproc)bytes_buffer_getbuffer,
1731 NULL,
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001732};
1733
1734
1735#define LEFTSTRIP 0
1736#define RIGHTSTRIP 1
1737#define BOTHSTRIP 2
1738
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001739/*[clinic input]
1740bytes.split
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001741
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001742 sep: object = None
1743 The delimiter according which to split the bytes.
1744 None (the default value) means split on ASCII whitespace characters
1745 (space, tab, return, newline, formfeed, vertical tab).
1746 maxsplit: Py_ssize_t = -1
1747 Maximum number of splits to do.
1748 -1 (the default value) means no limit.
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001749
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001750Return a list of the sections in the bytes, using sep as the delimiter.
1751[clinic start generated code]*/
1752
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001753static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001754bytes_split_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001755/*[clinic end generated code: output=8bde44dacb36ef2e input=8b809b39074abbfa]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001756{
1757 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001758 const char *s = PyBytes_AS_STRING(self), *sub;
1759 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001760 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001761
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001762 if (maxsplit < 0)
1763 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001764 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001765 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001766 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001767 return NULL;
1768 sub = vsub.buf;
1769 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001770
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001771 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1772 PyBuffer_Release(&vsub);
1773 return list;
Guido van Rossum98297ee2007-11-06 21:34:58 +00001774}
1775
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001776/*[clinic input]
1777bytes.partition
1778
1779 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001780 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001781 /
1782
1783Partition the bytes into three parts using the given separator.
1784
1785This will search for the separator sep in the bytes. If the separator is found,
1786returns a 3-tuple containing the part before the separator, the separator
1787itself, and the part after it.
1788
1789If the separator is not found, returns a 3-tuple containing the original bytes
1790object and two empty bytes objects.
1791[clinic start generated code]*/
1792
Neal Norwitz6968b052007-02-27 19:02:19 +00001793static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001794bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001795/*[clinic end generated code: output=f532b392a17ff695 input=bc855dc63ca949de]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001796{
Neal Norwitz6968b052007-02-27 19:02:19 +00001797 return stringlib_partition(
1798 (PyObject*) self,
1799 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001800 sep->obj, (const char *)sep->buf, sep->len
Neal Norwitz6968b052007-02-27 19:02:19 +00001801 );
1802}
1803
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001804/*[clinic input]
1805bytes.rpartition
1806
1807 self: self(type="PyBytesObject *")
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001808 sep: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001809 /
1810
1811Partition the bytes into three parts using the given separator.
1812
1813This will search for the separator sep in the bytes, starting and the end. If
1814the separator is found, returns a 3-tuple containing the part before the
1815separator, the separator itself, and the part after it.
1816
1817If the separator is not found, returns a 3-tuple containing two empty bytes
1818objects and the original bytes object.
1819[clinic start generated code]*/
1820
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001821static PyObject *
1822bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001823/*[clinic end generated code: output=191b114cbb028e50 input=6588fff262a9170e]*/
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001824{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001825 return stringlib_rpartition(
1826 (PyObject*) self,
1827 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001828 sep->obj, (const char *)sep->buf, sep->len
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001829 );
Neal Norwitz6968b052007-02-27 19:02:19 +00001830}
1831
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001832/*[clinic input]
1833bytes.rsplit = bytes.split
Neal Norwitz6968b052007-02-27 19:02:19 +00001834
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001835Return a list of the sections in the bytes, using sep as the delimiter.
1836
1837Splitting is done starting at the end of the bytes and working to the front.
1838[clinic start generated code]*/
1839
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001840static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001841bytes_rsplit_impl(PyBytesObject*self, PyObject *sep, Py_ssize_t maxsplit)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001842/*[clinic end generated code: output=0b6570b977911d88 input=0f86c9f28f7d7b7b]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001843{
1844 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001845 const char *s = PyBytes_AS_STRING(self), *sub;
1846 Py_buffer vsub;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001847 PyObject *list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001848
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001849 if (maxsplit < 0)
1850 maxsplit = PY_SSIZE_T_MAX;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001851 if (sep == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001852 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02001853 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001854 return NULL;
1855 sub = vsub.buf;
1856 n = vsub.len;
Guido van Rossum8f950672007-09-10 16:53:45 +00001857
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001858 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1859 PyBuffer_Release(&vsub);
1860 return list;
Neal Norwitz6968b052007-02-27 19:02:19 +00001861}
1862
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001863
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02001864/*[clinic input]
1865bytes.join
1866
1867 iterable_of_bytes: object
1868 /
1869
1870Concatenate any number of bytes objects.
1871
1872The bytes whose method is called is inserted in between each pair.
1873
1874The result is returned as a new bytes object.
1875
1876Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1877[clinic start generated code]*/
1878
Neal Norwitz6968b052007-02-27 19:02:19 +00001879static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02001880bytes_join(PyBytesObject*self, PyObject *iterable_of_bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03001881/*[clinic end generated code: output=634aff14764ff997 input=7fe377b95bd549d2]*/
Neal Norwitz6968b052007-02-27 19:02:19 +00001882{
Martin v. Löwis0efea322014-07-27 17:29:17 +02001883 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
Neal Norwitz6968b052007-02-27 19:02:19 +00001884}
1885
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001886PyObject *
1887_PyBytes_Join(PyObject *sep, PyObject *x)
1888{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001889 assert(sep != NULL && PyBytes_Check(sep));
1890 assert(x != NULL);
Martin v. Löwis0efea322014-07-27 17:29:17 +02001891 return bytes_join((PyBytesObject*)sep, x);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001892}
1893
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001894/* helper macro to fixup start/end slice values */
1895#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001896 if (end > len) \
1897 end = len; \
1898 else if (end < 0) { \
1899 end += len; \
1900 if (end < 0) \
1901 end = 0; \
1902 } \
1903 if (start < 0) { \
1904 start += len; \
1905 if (start < 0) \
1906 start = 0; \
1907 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001908
1909Py_LOCAL_INLINE(Py_ssize_t)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001910bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001911{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001912 PyObject *subobj;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001913 char byte;
1914 Py_buffer subbuf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001915 const char *sub;
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001916 Py_ssize_t len, sub_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001917 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Antoine Pitrouac65d962011-10-20 23:54:17 +02001918 Py_ssize_t res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001919
Antoine Pitrouac65d962011-10-20 23:54:17 +02001920 if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1921 args, &subobj, &byte, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001922 return -2;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001923
Antoine Pitrouac65d962011-10-20 23:54:17 +02001924 if (subobj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02001925 if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02001926 return -2;
1927
1928 sub = subbuf.buf;
1929 sub_len = subbuf.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001930 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001931 else {
1932 sub = &byte;
1933 sub_len = 1;
1934 }
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001935 len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001936
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001937 ADJUST_INDICES(start, end, len);
1938 if (end - start < sub_len)
1939 res = -1;
Serhiy Storchaka413fdce2015-11-14 15:42:17 +02001940 else if (sub_len == 1) {
1941 if (dir > 0)
1942 res = stringlib_find_char(
1943 PyBytes_AS_STRING(self) + start, end - start,
1944 *sub);
1945 else
1946 res = stringlib_rfind_char(
1947 PyBytes_AS_STRING(self) + start, end - start,
1948 *sub);
Serhiy Storchakad9d769f2015-03-24 21:55:47 +02001949 if (res >= 0)
1950 res += start;
1951 }
1952 else {
1953 if (dir > 0)
1954 res = stringlib_find_slice(
1955 PyBytes_AS_STRING(self), len,
1956 sub, sub_len, start, end);
1957 else
1958 res = stringlib_rfind_slice(
1959 PyBytes_AS_STRING(self), len,
1960 sub, sub_len, start, end);
1961 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02001962
1963 if (subobj)
1964 PyBuffer_Release(&subbuf);
1965
1966 return res;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001967}
1968
1969
1970PyDoc_STRVAR(find__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001971"B.find(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001972\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001973Return the lowest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08001974such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001975arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001976\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001977Return -1 on failure.");
1978
Neal Norwitz6968b052007-02-27 19:02:19 +00001979static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001980bytes_find(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00001981{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001982 Py_ssize_t result = bytes_find_internal(self, args, +1);
1983 if (result == -2)
1984 return NULL;
1985 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001986}
1987
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001988
1989PyDoc_STRVAR(index__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00001990"B.index(sub[, start[, end]]) -> int\n\
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001991\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00001992Like B.find() but raise ValueError when the substring is not found.");
1993
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001994static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00001995bytes_index(PyBytesObject *self, PyObject *args)
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00001996{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001997 Py_ssize_t result = bytes_find_internal(self, args, +1);
1998 if (result == -2)
1999 return NULL;
2000 if (result == -1) {
2001 PyErr_SetString(PyExc_ValueError,
2002 "substring not found");
2003 return NULL;
2004 }
2005 return PyLong_FromSsize_t(result);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00002006}
2007
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002008
2009PyDoc_STRVAR(rfind__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002010"B.rfind(sub[, start[, end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002011\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002012Return the highest index in B where substring sub is found,\n\
Senthil Kumaran53516a82011-07-27 23:33:54 +08002013such that sub is contained within B[start:end]. Optional\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002014arguments start and end are interpreted as in slice notation.\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002015\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002016Return -1 on failure.");
2017
Neal Norwitz6968b052007-02-27 19:02:19 +00002018static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002019bytes_rfind(PyBytesObject *self, PyObject *args)
Neal Norwitz6968b052007-02-27 19:02:19 +00002020{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002021 Py_ssize_t result = bytes_find_internal(self, args, -1);
2022 if (result == -2)
2023 return NULL;
2024 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00002025}
2026
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002027
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002028PyDoc_STRVAR(rindex__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002029"B.rindex(sub[, start[, end]]) -> int\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002030\n\
2031Like B.rfind() but raise ValueError when the substring is not found.");
2032
2033static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002034bytes_rindex(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002035{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002036 Py_ssize_t result = bytes_find_internal(self, args, -1);
2037 if (result == -2)
2038 return NULL;
2039 if (result == -1) {
2040 PyErr_SetString(PyExc_ValueError,
2041 "substring not found");
2042 return NULL;
2043 }
2044 return PyLong_FromSsize_t(result);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002045}
2046
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002047
2048Py_LOCAL_INLINE(PyObject *)
2049do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002050{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002051 Py_buffer vsep;
2052 char *s = PyBytes_AS_STRING(self);
2053 Py_ssize_t len = PyBytes_GET_SIZE(self);
2054 char *sep;
2055 Py_ssize_t seplen;
2056 Py_ssize_t i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002057
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002058 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002059 return NULL;
2060 sep = vsep.buf;
2061 seplen = vsep.len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002062
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002063 i = 0;
2064 if (striptype != RIGHTSTRIP) {
2065 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2066 i++;
2067 }
2068 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002069
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002070 j = len;
2071 if (striptype != LEFTSTRIP) {
2072 do {
2073 j--;
2074 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2075 j++;
2076 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002077
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002078 PyBuffer_Release(&vsep);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002079
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002080 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2081 Py_INCREF(self);
2082 return (PyObject*)self;
2083 }
2084 else
2085 return PyBytes_FromStringAndSize(s+i, j-i);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002086}
2087
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002088
2089Py_LOCAL_INLINE(PyObject *)
2090do_strip(PyBytesObject *self, int striptype)
2091{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002092 char *s = PyBytes_AS_STRING(self);
2093 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002095 i = 0;
2096 if (striptype != RIGHTSTRIP) {
David Malcolm96960882010-11-05 17:23:41 +00002097 while (i < len && Py_ISSPACE(s[i])) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002098 i++;
2099 }
2100 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002101
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002102 j = len;
2103 if (striptype != LEFTSTRIP) {
2104 do {
2105 j--;
David Malcolm96960882010-11-05 17:23:41 +00002106 } while (j >= i && Py_ISSPACE(s[j]));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002107 j++;
2108 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002109
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002110 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
2111 Py_INCREF(self);
2112 return (PyObject*)self;
2113 }
2114 else
2115 return PyBytes_FromStringAndSize(s+i, j-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002116}
2117
2118
2119Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002120do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002121{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002122 if (bytes != NULL && bytes != Py_None) {
2123 return do_xstrip(self, striptype, bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002124 }
2125 return do_strip(self, striptype);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002126}
2127
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002128/*[clinic input]
2129bytes.strip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002130
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002131 self: self(type="PyBytesObject *")
2132 bytes: object = None
2133 /
2134
2135Strip leading and trailing bytes contained in the argument.
2136
2137If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2138[clinic start generated code]*/
2139
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002140static PyObject *
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002141bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002142/*[clinic end generated code: output=c7c228d3bd104a1b input=37daa5fad1395d95]*/
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002143{
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002144 return do_argstrip(self, BOTHSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002145}
2146
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002147/*[clinic input]
2148bytes.lstrip
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002149
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002150 self: self(type="PyBytesObject *")
2151 bytes: object = None
2152 /
2153
2154Strip leading bytes contained in the argument.
2155
2156If the argument is omitted or None, strip leading ASCII whitespace.
2157[clinic start generated code]*/
2158
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002159static PyObject *
2160bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002161/*[clinic end generated code: output=28602e586f524e82 input=88811b09dfbc2988]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002162{
2163 return do_argstrip(self, LEFTSTRIP, bytes);
2164}
2165
2166/*[clinic input]
2167bytes.rstrip
2168
2169 self: self(type="PyBytesObject *")
2170 bytes: object = None
2171 /
2172
2173Strip trailing bytes contained in the argument.
2174
2175If the argument is omitted or None, strip trailing ASCII whitespace.
2176[clinic start generated code]*/
2177
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002178static PyObject *
2179bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002180/*[clinic end generated code: output=547e3815c95447da input=8f93c9cd361f0140]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002181{
2182 return do_argstrip(self, RIGHTSTRIP, bytes);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002183}
Neal Norwitz6968b052007-02-27 19:02:19 +00002184
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002185
2186PyDoc_STRVAR(count__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002187"B.count(sub[, start[, end]]) -> int\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002188\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002189Return the number of non-overlapping occurrences of substring sub in\n\
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002190string B[start:end]. Optional arguments start and end are interpreted\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002191as in slice notation.");
2192
2193static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002194bytes_count(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002195{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002196 PyObject *sub_obj;
2197 const char *str = PyBytes_AS_STRING(self), *sub;
2198 Py_ssize_t sub_len;
Antoine Pitrouac65d962011-10-20 23:54:17 +02002199 char byte;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002200 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002201
Antoine Pitrouac65d962011-10-20 23:54:17 +02002202 Py_buffer vsub;
2203 PyObject *count_obj;
2204
2205 if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
2206 &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002207 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002208
Antoine Pitrouac65d962011-10-20 23:54:17 +02002209 if (sub_obj) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002210 if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
Antoine Pitrouac65d962011-10-20 23:54:17 +02002211 return NULL;
2212
2213 sub = vsub.buf;
2214 sub_len = vsub.len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002215 }
Antoine Pitrouac65d962011-10-20 23:54:17 +02002216 else {
2217 sub = &byte;
2218 sub_len = 1;
2219 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002220
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002221 ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002222
Antoine Pitrouac65d962011-10-20 23:54:17 +02002223 count_obj = PyLong_FromSsize_t(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002224 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2225 );
Antoine Pitrouac65d962011-10-20 23:54:17 +02002226
2227 if (sub_obj)
2228 PyBuffer_Release(&vsub);
2229
2230 return count_obj;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002231}
2232
2233
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002234/*[clinic input]
2235bytes.translate
2236
2237 self: self(type="PyBytesObject *")
Victor Stinner049e5092014-08-17 22:20:00 +02002238 table: object
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002239 Translation table, which must be a bytes object of length 256.
2240 [
2241 deletechars: object
2242 ]
2243 /
2244
2245Return a copy with each character mapped by the given translation table.
2246
2247All characters occurring in the optional argument deletechars are removed.
2248The remaining characters are mapped through the given translation table.
2249[clinic start generated code]*/
2250
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002251static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002252bytes_translate_impl(PyBytesObject *self, PyObject *table, int group_right_1,
2253 PyObject *deletechars)
2254/*[clinic end generated code: output=233df850eb50bf8d input=d8fa5519d7cc4be7]*/
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002255{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002256 char *input, *output;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002257 Py_buffer table_view = {NULL, NULL};
2258 Py_buffer del_table_view = {NULL, NULL};
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002259 const char *table_chars;
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02002260 Py_ssize_t i, c, changed = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002261 PyObject *input_obj = (PyObject*)self;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002262 const char *output_start, *del_table_chars=NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002263 Py_ssize_t inlen, tablen, dellen = 0;
2264 PyObject *result;
2265 int trans_table[256];
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002266
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002267 if (PyBytes_Check(table)) {
2268 table_chars = PyBytes_AS_STRING(table);
2269 tablen = PyBytes_GET_SIZE(table);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002270 }
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002271 else if (table == Py_None) {
2272 table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002273 tablen = 256;
2274 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002275 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002276 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002277 return NULL;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002278 table_chars = table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002279 tablen = table_view.len;
2280 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002281
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002282 if (tablen != 256) {
2283 PyErr_SetString(PyExc_ValueError,
2284 "translation table must be 256 characters long");
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002285 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002286 return NULL;
2287 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002288
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002289 if (deletechars != NULL) {
2290 if (PyBytes_Check(deletechars)) {
2291 del_table_chars = PyBytes_AS_STRING(deletechars);
2292 dellen = PyBytes_GET_SIZE(deletechars);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002293 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002294 else {
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002295 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002296 PyBuffer_Release(&table_view);
2297 return NULL;
2298 }
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002299 del_table_chars = del_table_view.buf;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002300 dellen = del_table_view.len;
2301 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002302 }
2303 else {
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002304 del_table_chars = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002305 dellen = 0;
2306 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002307
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002308 inlen = PyBytes_GET_SIZE(input_obj);
2309 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002310 if (result == NULL) {
2311 PyBuffer_Release(&del_table_view);
2312 PyBuffer_Release(&table_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002313 return NULL;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002314 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002315 output_start = output = PyBytes_AsString(result);
2316 input = PyBytes_AS_STRING(input_obj);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002317
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002318 if (dellen == 0 && table_chars != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002319 /* If no deletions are required, use faster code */
2320 for (i = inlen; --i >= 0; ) {
2321 c = Py_CHARMASK(*input++);
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002322 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002323 changed = 1;
2324 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002325 if (!changed && PyBytes_CheckExact(input_obj)) {
2326 Py_INCREF(input_obj);
2327 Py_DECREF(result);
2328 result = input_obj;
2329 }
2330 PyBuffer_Release(&del_table_view);
2331 PyBuffer_Release(&table_view);
2332 return result;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002333 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002334
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002335 if (table_chars == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002336 for (i = 0; i < 256; i++)
2337 trans_table[i] = Py_CHARMASK(i);
2338 } else {
2339 for (i = 0; i < 256; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002340 trans_table[i] = Py_CHARMASK(table_chars[i]);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002341 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002342 PyBuffer_Release(&table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002343
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002344 for (i = 0; i < dellen; i++)
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002345 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002346 PyBuffer_Release(&del_table_view);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002347
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002348 for (i = inlen; --i >= 0; ) {
2349 c = Py_CHARMASK(*input++);
2350 if (trans_table[c] != -1)
2351 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2352 continue;
2353 changed = 1;
2354 }
2355 if (!changed && PyBytes_CheckExact(input_obj)) {
2356 Py_DECREF(result);
2357 Py_INCREF(input_obj);
2358 return input_obj;
2359 }
2360 /* Fix the size of the resulting string */
2361 if (inlen > 0)
2362 _PyBytes_Resize(&result, output - output_start);
2363 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002364}
2365
2366
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002367/*[clinic input]
2368
2369@staticmethod
2370bytes.maketrans
2371
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002372 frm: Py_buffer
2373 to: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002374 /
2375
2376Return a translation table useable for the bytes or bytearray translate method.
2377
2378The returned table will be one where each byte in frm is mapped to the byte at
2379the same position in to.
2380
2381The bytes objects frm and to must be of the same length.
2382[clinic start generated code]*/
2383
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002384static PyObject *
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002385bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03002386/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002387{
2388 return _Py_bytes_maketrans(frm, to);
Georg Brandlabc38772009-04-12 15:51:51 +00002389}
2390
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002391/* find and count characters and substrings */
2392
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002393#define findchar(target, target_len, c) \
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002394 ((char *)memchr((const void *)(target), c, target_len))
2395
2396/* String ops must return a string. */
2397/* If the object is subclass of string, create a copy */
2398Py_LOCAL(PyBytesObject *)
2399return_self(PyBytesObject *self)
2400{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002401 if (PyBytes_CheckExact(self)) {
2402 Py_INCREF(self);
2403 return self;
2404 }
2405 return (PyBytesObject *)PyBytes_FromStringAndSize(
2406 PyBytes_AS_STRING(self),
2407 PyBytes_GET_SIZE(self));
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002408}
2409
2410Py_LOCAL_INLINE(Py_ssize_t)
Antoine Pitrou0010d372010-08-15 17:12:55 +00002411countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002412{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002413 Py_ssize_t count=0;
2414 const char *start=target;
2415 const char *end=target+target_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002416
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002417 while ( (start=findchar(start, end-start, c)) != NULL ) {
2418 count++;
2419 if (count >= maxcount)
2420 break;
2421 start += 1;
2422 }
2423 return count;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002424}
2425
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002426
2427/* Algorithms for different cases of string replacement */
2428
2429/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2430Py_LOCAL(PyBytesObject *)
2431replace_interleave(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002432 const char *to_s, Py_ssize_t to_len,
2433 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002434{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002435 char *self_s, *result_s;
2436 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002437 Py_ssize_t count, i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002438 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002439
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002440 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002441
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002442 /* 1 at the end plus 1 after every character;
2443 count = min(maxcount, self_len + 1) */
2444 if (maxcount <= self_len)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002445 count = maxcount;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002446 else
2447 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2448 count = self_len + 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002449
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002450 /* Check for overflow */
2451 /* result_len = count * to_len + self_len; */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002452 assert(count > 0);
2453 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002454 PyErr_SetString(PyExc_OverflowError,
2455 "replacement bytes are too long");
2456 return NULL;
2457 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002458 result_len = count * to_len + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002459
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002460 if (! (result = (PyBytesObject *)
2461 PyBytes_FromStringAndSize(NULL, result_len)) )
2462 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002463
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002464 self_s = PyBytes_AS_STRING(self);
2465 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002466
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002467 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002468
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002469 /* Lay the first one down (guaranteed this will occur) */
2470 Py_MEMCPY(result_s, to_s, to_len);
2471 result_s += to_len;
2472 count -= 1;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002473
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002474 for (i=0; i<count; i++) {
2475 *result_s++ = *self_s++;
2476 Py_MEMCPY(result_s, to_s, to_len);
2477 result_s += to_len;
2478 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002479
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002480 /* Copy the rest of the original string */
2481 Py_MEMCPY(result_s, self_s, self_len-i);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002482
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002483 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002484}
2485
2486/* Special case for deleting a single character */
2487/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2488Py_LOCAL(PyBytesObject *)
2489replace_delete_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002490 char from_c, Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002491{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002492 char *self_s, *result_s;
2493 char *start, *next, *end;
2494 Py_ssize_t self_len, result_len;
2495 Py_ssize_t count;
2496 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002497
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002498 self_len = PyBytes_GET_SIZE(self);
2499 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002500
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002501 count = countchar(self_s, self_len, from_c, maxcount);
2502 if (count == 0) {
2503 return return_self(self);
2504 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002505
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002506 result_len = self_len - count; /* from_len == 1 */
2507 assert(result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002508
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002509 if ( (result = (PyBytesObject *)
2510 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2511 return NULL;
2512 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002513
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002514 start = self_s;
2515 end = self_s + self_len;
2516 while (count-- > 0) {
2517 next = findchar(start, end-start, from_c);
2518 if (next == NULL)
2519 break;
2520 Py_MEMCPY(result_s, start, next-start);
2521 result_s += (next-start);
2522 start = next+1;
2523 }
2524 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002525
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002526 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002527}
2528
2529/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2530
2531Py_LOCAL(PyBytesObject *)
2532replace_delete_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002533 const char *from_s, Py_ssize_t from_len,
2534 Py_ssize_t maxcount) {
2535 char *self_s, *result_s;
2536 char *start, *next, *end;
2537 Py_ssize_t self_len, result_len;
2538 Py_ssize_t count, offset;
2539 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002540
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002541 self_len = PyBytes_GET_SIZE(self);
2542 self_s = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002543
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002544 count = stringlib_count(self_s, self_len,
2545 from_s, from_len,
2546 maxcount);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002547
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002548 if (count == 0) {
2549 /* no matches */
2550 return return_self(self);
2551 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002552
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002553 result_len = self_len - (count * from_len);
2554 assert (result_len>=0);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002555
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002556 if ( (result = (PyBytesObject *)
2557 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
2558 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002559
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002560 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002561
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002562 start = self_s;
2563 end = self_s + self_len;
2564 while (count-- > 0) {
2565 offset = stringlib_find(start, end-start,
2566 from_s, from_len,
2567 0);
2568 if (offset == -1)
2569 break;
2570 next = start + offset;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002571
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002572 Py_MEMCPY(result_s, start, next-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002573
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002574 result_s += (next-start);
2575 start = next+from_len;
2576 }
2577 Py_MEMCPY(result_s, start, end-start);
2578 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002579}
2580
2581/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2582Py_LOCAL(PyBytesObject *)
2583replace_single_character_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002584 char from_c, char to_c,
2585 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002586{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002587 char *self_s, *result_s, *start, *end, *next;
2588 Py_ssize_t self_len;
2589 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002590
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002591 /* The result string will be the same size */
2592 self_s = PyBytes_AS_STRING(self);
2593 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002594
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002595 next = findchar(self_s, self_len, from_c);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002596
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002597 if (next == NULL) {
2598 /* No matches; return the original string */
2599 return return_self(self);
2600 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002601
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002602 /* Need to make a new string */
2603 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2604 if (result == NULL)
2605 return NULL;
2606 result_s = PyBytes_AS_STRING(result);
2607 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002608
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002609 /* change everything in-place, starting with this one */
2610 start = result_s + (next-self_s);
2611 *start = to_c;
2612 start++;
2613 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002614
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002615 while (--maxcount > 0) {
2616 next = findchar(start, end-start, from_c);
2617 if (next == NULL)
2618 break;
2619 *next = to_c;
2620 start = next+1;
2621 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002622
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002623 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002624}
2625
2626/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2627Py_LOCAL(PyBytesObject *)
2628replace_substring_in_place(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002629 const char *from_s, Py_ssize_t from_len,
2630 const char *to_s, Py_ssize_t to_len,
2631 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002632{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002633 char *result_s, *start, *end;
2634 char *self_s;
2635 Py_ssize_t self_len, offset;
2636 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002637
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002638 /* The result string will be the same size */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002639
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002640 self_s = PyBytes_AS_STRING(self);
2641 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002642
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002643 offset = stringlib_find(self_s, self_len,
2644 from_s, from_len,
2645 0);
2646 if (offset == -1) {
2647 /* No matches; return the original string */
2648 return return_self(self);
2649 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002650
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002651 /* Need to make a new string */
2652 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
2653 if (result == NULL)
2654 return NULL;
2655 result_s = PyBytes_AS_STRING(result);
2656 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002657
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002658 /* change everything in-place, starting with this one */
2659 start = result_s + offset;
2660 Py_MEMCPY(start, to_s, from_len);
2661 start += from_len;
2662 end = result_s + self_len;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002663
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002664 while ( --maxcount > 0) {
2665 offset = stringlib_find(start, end-start,
2666 from_s, from_len,
2667 0);
2668 if (offset==-1)
2669 break;
2670 Py_MEMCPY(start+offset, to_s, from_len);
2671 start += offset+from_len;
2672 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002673
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002674 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002675}
2676
2677/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2678Py_LOCAL(PyBytesObject *)
2679replace_single_character(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002680 char from_c,
2681 const char *to_s, Py_ssize_t to_len,
2682 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002683{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002684 char *self_s, *result_s;
2685 char *start, *next, *end;
2686 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002687 Py_ssize_t count;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002688 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002689
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002690 self_s = PyBytes_AS_STRING(self);
2691 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002692
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002693 count = countchar(self_s, self_len, from_c, maxcount);
2694 if (count == 0) {
2695 /* no matches, return unchanged */
2696 return return_self(self);
2697 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002699 /* use the difference between current and new, hence the "-1" */
2700 /* result_len = self_len + count * (to_len-1) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002701 assert(count > 0);
2702 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002703 PyErr_SetString(PyExc_OverflowError,
2704 "replacement bytes are too long");
2705 return NULL;
2706 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002707 result_len = self_len + count * (to_len - 1);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002708
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002709 if ( (result = (PyBytesObject *)
2710 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2711 return NULL;
2712 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002713
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002714 start = self_s;
2715 end = self_s + self_len;
2716 while (count-- > 0) {
2717 next = findchar(start, end-start, from_c);
2718 if (next == NULL)
2719 break;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002720
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002721 if (next == start) {
2722 /* replace with the 'to' */
2723 Py_MEMCPY(result_s, to_s, to_len);
2724 result_s += to_len;
2725 start += 1;
2726 } else {
2727 /* copy the unchanged old then the 'to' */
2728 Py_MEMCPY(result_s, start, next-start);
2729 result_s += (next-start);
2730 Py_MEMCPY(result_s, to_s, to_len);
2731 result_s += to_len;
2732 start = next+1;
2733 }
2734 }
2735 /* Copy the remainder of the remaining string */
2736 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002737
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002738 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002739}
2740
2741/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2742Py_LOCAL(PyBytesObject *)
2743replace_substring(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002744 const char *from_s, Py_ssize_t from_len,
2745 const char *to_s, Py_ssize_t to_len,
2746 Py_ssize_t maxcount) {
2747 char *self_s, *result_s;
2748 char *start, *next, *end;
2749 Py_ssize_t self_len, result_len;
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002750 Py_ssize_t count, offset;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002751 PyBytesObject *result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002752
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002753 self_s = PyBytes_AS_STRING(self);
2754 self_len = PyBytes_GET_SIZE(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002755
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002756 count = stringlib_count(self_s, self_len,
2757 from_s, from_len,
2758 maxcount);
Antoine Pitrouf2c54842010-01-13 08:07:53 +00002759
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002760 if (count == 0) {
2761 /* no matches, return unchanged */
2762 return return_self(self);
2763 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002764
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002765 /* Check for overflow */
2766 /* result_len = self_len + count * (to_len-from_len) */
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002767 assert(count > 0);
2768 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002769 PyErr_SetString(PyExc_OverflowError,
2770 "replacement bytes are too long");
2771 return NULL;
2772 }
Mark Dickinsoncf940c72010-08-10 18:35:01 +00002773 result_len = self_len + count * (to_len-from_len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002774
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002775 if ( (result = (PyBytesObject *)
2776 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2777 return NULL;
2778 result_s = PyBytes_AS_STRING(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002779
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002780 start = self_s;
2781 end = self_s + self_len;
2782 while (count-- > 0) {
2783 offset = stringlib_find(start, end-start,
2784 from_s, from_len,
2785 0);
2786 if (offset == -1)
2787 break;
2788 next = start+offset;
2789 if (next == start) {
2790 /* replace with the 'to' */
2791 Py_MEMCPY(result_s, to_s, to_len);
2792 result_s += to_len;
2793 start += from_len;
2794 } else {
2795 /* copy the unchanged old then the 'to' */
2796 Py_MEMCPY(result_s, start, next-start);
2797 result_s += (next-start);
2798 Py_MEMCPY(result_s, to_s, to_len);
2799 result_s += to_len;
2800 start = next+from_len;
2801 }
2802 }
2803 /* Copy the remainder of the remaining string */
2804 Py_MEMCPY(result_s, start, end-start);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002805
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002806 return result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002807}
2808
2809
2810Py_LOCAL(PyBytesObject *)
2811replace(PyBytesObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002812 const char *from_s, Py_ssize_t from_len,
2813 const char *to_s, Py_ssize_t to_len,
2814 Py_ssize_t maxcount)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002815{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002816 if (maxcount < 0) {
2817 maxcount = PY_SSIZE_T_MAX;
2818 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2819 /* nothing to do; return the original string */
2820 return return_self(self);
2821 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002822
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002823 if (maxcount == 0 ||
2824 (from_len == 0 && to_len == 0)) {
2825 /* nothing to do; return the original string */
2826 return return_self(self);
2827 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002828
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002829 /* Handle zero-length special cases */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002830
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002831 if (from_len == 0) {
2832 /* insert the 'to' string everywhere. */
2833 /* >>> "Python".replace("", ".") */
2834 /* '.P.y.t.h.o.n.' */
2835 return replace_interleave(self, to_s, to_len, maxcount);
2836 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002837
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002838 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2839 /* point for an empty self string to generate a non-empty string */
2840 /* Special case so the remaining code always gets a non-empty string */
2841 if (PyBytes_GET_SIZE(self) == 0) {
2842 return return_self(self);
2843 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002844
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002845 if (to_len == 0) {
2846 /* delete all occurrences of 'from' string */
2847 if (from_len == 1) {
2848 return replace_delete_single_character(
2849 self, from_s[0], maxcount);
2850 } else {
2851 return replace_delete_substring(self, from_s,
2852 from_len, maxcount);
2853 }
2854 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002855
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002856 /* Handle special case where both strings have the same length */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002857
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002858 if (from_len == to_len) {
2859 if (from_len == 1) {
2860 return replace_single_character_in_place(
2861 self,
2862 from_s[0],
2863 to_s[0],
2864 maxcount);
2865 } else {
2866 return replace_substring_in_place(
2867 self, from_s, from_len, to_s, to_len,
2868 maxcount);
2869 }
2870 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002871
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002872 /* Otherwise use the more generic algorithms */
2873 if (from_len == 1) {
2874 return replace_single_character(self, from_s[0],
2875 to_s, to_len, maxcount);
2876 } else {
2877 /* len('from')>=2, len('to')>=1 */
2878 return replace_substring(self, from_s, from_len, to_s, to_len,
2879 maxcount);
2880 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002881}
2882
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002883
2884/*[clinic input]
2885bytes.replace
2886
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002887 old: Py_buffer
2888 new: Py_buffer
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002889 count: Py_ssize_t = -1
2890 Maximum number of occurrences to replace.
2891 -1 (the default value) means replace all occurrences.
2892 /
2893
2894Return a copy with all occurrences of substring old replaced by new.
2895
2896If the optional argument count is given, only the first count occurrences are
2897replaced.
2898[clinic start generated code]*/
2899
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002900static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04002901bytes_replace_impl(PyBytesObject*self, Py_buffer *old, Py_buffer *new,
2902 Py_ssize_t count)
2903/*[clinic end generated code: output=403dc9d7a83c5a1d input=b2fbbf0bf04de8e5]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02002904{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002905 return (PyObject *)replace((PyBytesObject *) self,
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02002906 (const char *)old->buf, old->len,
2907 (const char *)new->buf, new->len, count);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002908}
2909
2910/** End DALKE **/
2911
2912/* Matches the end (direction >= 0) or start (direction < 0) of self
2913 * against substr, using the start and end arguments. Returns
2914 * -1 on error, 0 if not found and 1 if found.
2915 */
2916Py_LOCAL(int)
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002917_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002918 Py_ssize_t end, int direction)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002919{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002920 Py_ssize_t len = PyBytes_GET_SIZE(self);
2921 Py_ssize_t slen;
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002922 Py_buffer sub_view = {NULL, NULL};
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002923 const char* sub;
2924 const char* str;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002925
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002926 if (PyBytes_Check(substr)) {
2927 sub = PyBytes_AS_STRING(substr);
2928 slen = PyBytes_GET_SIZE(substr);
2929 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002930 else {
2931 if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
2932 return -1;
2933 sub = sub_view.buf;
2934 slen = sub_view.len;
2935 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002936 str = PyBytes_AS_STRING(self);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002937
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002938 ADJUST_INDICES(start, end, len);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002939
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002940 if (direction < 0) {
2941 /* startswith */
2942 if (start+slen > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002943 goto notfound;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002944 } else {
2945 /* endswith */
2946 if (end-start < slen || start > len)
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002947 goto notfound;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002948
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002949 if (end-slen > start)
2950 start = end - slen;
2951 }
Serhiy Storchaka4fdb6842015-02-03 01:21:08 +02002952 if (end-start < slen)
2953 goto notfound;
2954 if (memcmp(str+start, sub, slen) != 0)
2955 goto notfound;
2956
2957 PyBuffer_Release(&sub_view);
2958 return 1;
2959
2960notfound:
2961 PyBuffer_Release(&sub_view);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002962 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002963}
2964
2965
2966PyDoc_STRVAR(startswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00002967"B.startswith(prefix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002968\n\
2969Return True if B starts with the specified prefix, False otherwise.\n\
2970With optional start, test B beginning at that position.\n\
2971With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00002972prefix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002973
2974static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00002975bytes_startswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002976{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002977 Py_ssize_t start = 0;
2978 Py_ssize_t end = PY_SSIZE_T_MAX;
2979 PyObject *subobj;
2980 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00002981
Jesus Ceaac451502011-04-20 17:09:23 +02002982 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002983 return NULL;
2984 if (PyTuple_Check(subobj)) {
2985 Py_ssize_t i;
2986 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2987 result = _bytes_tailmatch(self,
2988 PyTuple_GET_ITEM(subobj, i),
2989 start, end, -1);
2990 if (result == -1)
2991 return NULL;
2992 else if (result) {
2993 Py_RETURN_TRUE;
2994 }
2995 }
2996 Py_RETURN_FALSE;
2997 }
2998 result = _bytes_tailmatch(self, subobj, start, end, -1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03002999 if (result == -1) {
3000 if (PyErr_ExceptionMatches(PyExc_TypeError))
3001 PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
3002 "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003003 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03003004 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003005 else
3006 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003007}
3008
3009
3010PyDoc_STRVAR(endswith__doc__,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003011"B.endswith(suffix[, start[, end]]) -> bool\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003012\n\
3013Return True if B ends with the specified suffix, False otherwise.\n\
3014With optional start, test B beginning at that position.\n\
3015With optional end, stop comparing B at that position.\n\
Benjamin Peterson4116f362008-05-27 00:36:20 +00003016suffix can also be a tuple of bytes to try.");
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003017
3018static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003019bytes_endswith(PyBytesObject *self, PyObject *args)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003020{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003021 Py_ssize_t start = 0;
3022 Py_ssize_t end = PY_SSIZE_T_MAX;
3023 PyObject *subobj;
3024 int result;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003025
Jesus Ceaac451502011-04-20 17:09:23 +02003026 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003027 return NULL;
3028 if (PyTuple_Check(subobj)) {
3029 Py_ssize_t i;
3030 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3031 result = _bytes_tailmatch(self,
3032 PyTuple_GET_ITEM(subobj, i),
3033 start, end, +1);
3034 if (result == -1)
3035 return NULL;
3036 else if (result) {
3037 Py_RETURN_TRUE;
3038 }
3039 }
3040 Py_RETURN_FALSE;
3041 }
3042 result = _bytes_tailmatch(self, subobj, start, end, +1);
Ezio Melottiba42fd52011-04-26 06:09:45 +03003043 if (result == -1) {
3044 if (PyErr_ExceptionMatches(PyExc_TypeError))
3045 PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
3046 "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003047 return NULL;
Ezio Melottiba42fd52011-04-26 06:09:45 +03003048 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003049 else
3050 return PyBool_FromLong(result);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003051}
3052
3053
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003054/*[clinic input]
3055bytes.decode
3056
3057 encoding: str(c_default="NULL") = 'utf-8'
3058 The encoding with which to decode the bytes.
3059 errors: str(c_default="NULL") = 'strict'
3060 The error handling scheme to use for the handling of decoding errors.
3061 The default is 'strict' meaning that decoding errors raise a
3062 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
3063 as well as any other name registered with codecs.register_error that
3064 can handle UnicodeDecodeErrors.
3065
3066Decode the bytes using the codec registered for encoding.
3067[clinic start generated code]*/
3068
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003069static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04003070bytes_decode_impl(PyBytesObject*self, const char *encoding,
3071 const char *errors)
3072/*[clinic end generated code: output=2d2016ff8e0bb176 input=958174769d2a40ca]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003073{
Martin v. Löwis0efea322014-07-27 17:29:17 +02003074 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
Guido van Rossumd624f182006-04-24 13:47:05 +00003075}
3076
Guido van Rossum20188312006-05-05 15:15:40 +00003077
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003078/*[clinic input]
3079bytes.splitlines
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003080
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03003081 keepends: int(c_default="0") = False
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003082
3083Return a list of the lines in the bytes, breaking at line boundaries.
3084
3085Line breaks are not included in the resulting list unless keepends is given and
3086true.
3087[clinic start generated code]*/
3088
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003089static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003090bytes_splitlines_impl(PyBytesObject*self, int keepends)
Serhiy Storchaka8b2e8b62015-05-30 11:30:39 +03003091/*[clinic end generated code: output=995c3598f7833cad input=7f4aac67144f9944]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003092{
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003093 return stringlib_splitlines(
Antoine Pitroud1188562010-06-09 16:38:55 +00003094 (PyObject*) self, PyBytes_AS_STRING(self),
3095 PyBytes_GET_SIZE(self), keepends
3096 );
Antoine Pitrouf2c54842010-01-13 08:07:53 +00003097}
3098
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003099/*[clinic input]
3100@classmethod
3101bytes.fromhex
3102
3103 string: unicode
3104 /
3105
3106Create a bytes object from a string of hexadecimal numbers.
3107
3108Spaces between two numbers are accepted.
3109Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
3110[clinic start generated code]*/
3111
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003112static PyObject *
Martin v. Löwis0efea322014-07-27 17:29:17 +02003113bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
Serhiy Storchaka1009bf12015-04-03 23:53:51 +03003114/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003115{
Victor Stinner2bf89932015-10-14 11:25:33 +02003116 return _PyBytes_FromHex(string, 0);
3117}
3118
3119PyObject*
3120_PyBytes_FromHex(PyObject *string, int use_bytearray)
3121{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003122 char *buf;
Victor Stinner2bf89932015-10-14 11:25:33 +02003123 Py_ssize_t hexlen, invalid_char;
3124 unsigned int top, bot;
3125 Py_UCS1 *str, *end;
3126 _PyBytesWriter writer;
3127
3128 _PyBytesWriter_Init(&writer);
3129 writer.use_bytearray = use_bytearray;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003130
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003131 assert(PyUnicode_Check(string));
3132 if (PyUnicode_READY(string))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003133 return NULL;
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003134 hexlen = PyUnicode_GET_LENGTH(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003135
Victor Stinner2bf89932015-10-14 11:25:33 +02003136 if (!PyUnicode_IS_ASCII(string)) {
3137 void *data = PyUnicode_DATA(string);
3138 unsigned int kind = PyUnicode_KIND(string);
3139 Py_ssize_t i;
3140
3141 /* search for the first non-ASCII character */
3142 for (i = 0; i < hexlen; i++) {
3143 if (PyUnicode_READ(kind, data, i) >= 128)
3144 break;
3145 }
3146 invalid_char = i;
3147 goto error;
3148 }
3149
3150 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
3151 str = PyUnicode_1BYTE_DATA(string);
3152
3153 /* This overestimates if there are spaces */
3154 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
3155 if (buf == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003156 return NULL;
Victor Stinner2bf89932015-10-14 11:25:33 +02003157
3158 end = str + hexlen;
3159 while (str < end) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003160 /* skip over spaces in the input */
Victor Stinner2bf89932015-10-14 11:25:33 +02003161 if (*str == ' ') {
3162 do {
3163 str++;
3164 } while (*str == ' ');
3165 if (str >= end)
3166 break;
3167 }
3168
3169 top = _PyLong_DigitValue[*str];
3170 if (top >= 16) {
3171 invalid_char = str - PyUnicode_1BYTE_DATA(string);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003172 goto error;
3173 }
Victor Stinner2bf89932015-10-14 11:25:33 +02003174 str++;
3175
3176 bot = _PyLong_DigitValue[*str];
3177 if (bot >= 16) {
3178 invalid_char = str - PyUnicode_1BYTE_DATA(string);
3179 goto error;
3180 }
3181 str++;
3182
3183 *buf++ = (unsigned char)((top << 4) + bot);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003184 }
Victor Stinner2bf89932015-10-14 11:25:33 +02003185
3186 return _PyBytesWriter_Finish(&writer, buf);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003187
3188 error:
Victor Stinner2bf89932015-10-14 11:25:33 +02003189 PyErr_Format(PyExc_ValueError,
3190 "non-hexadecimal number found in "
3191 "fromhex() arg at position %zd", invalid_char);
3192 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003193 return NULL;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003194}
3195
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003196PyDoc_STRVAR(hex__doc__,
3197"B.hex() -> string\n\
3198\n\
3199Create a string of hexadecimal numbers from a bytes object.\n\
3200Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
3201
3202static PyObject *
3203bytes_hex(PyBytesObject *self)
3204{
3205 char* argbuf = PyBytes_AS_STRING(self);
3206 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
3207 return _Py_strhex(argbuf, arglen);
3208}
3209
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003210static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003211bytes_getnewargs(PyBytesObject *v)
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003212{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003213 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003214}
3215
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003216
3217static PyMethodDef
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003218bytes_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003219 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
3220 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3221 _Py_capitalize__doc__},
3222 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3223 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003224 BYTES_DECODE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003225 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
3226 endswith__doc__},
Ezio Melotti745d54d2013-11-16 19:10:57 +02003227 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003228 expandtabs__doc__},
3229 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003230 BYTES_FROMHEX_METHODDEF
Gregory P. Smith8cb65692015-04-25 23:22:26 +00003231 {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003232 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3233 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3234 _Py_isalnum__doc__},
3235 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3236 _Py_isalpha__doc__},
3237 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3238 _Py_isdigit__doc__},
3239 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3240 _Py_islower__doc__},
3241 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3242 _Py_isspace__doc__},
3243 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3244 _Py_istitle__doc__},
3245 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3246 _Py_isupper__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003247 BYTES_JOIN_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003248 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3249 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003250 BYTES_LSTRIP_METHODDEF
3251 BYTES_MAKETRANS_METHODDEF
3252 BYTES_PARTITION_METHODDEF
3253 BYTES_REPLACE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003254 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3255 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3256 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003257 BYTES_RPARTITION_METHODDEF
3258 BYTES_RSPLIT_METHODDEF
3259 BYTES_RSTRIP_METHODDEF
3260 BYTES_SPLIT_METHODDEF
3261 BYTES_SPLITLINES_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003262 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
3263 startswith__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003264 BYTES_STRIP_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003265 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3266 _Py_swapcase__doc__},
3267 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
Martin v. Löwis7252a6e2014-07-27 16:25:09 +02003268 BYTES_TRANSLATE_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003269 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3270 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003271 {NULL, NULL} /* sentinel */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003272};
3273
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003274static PyObject *
Victor Stinner772b2b02015-10-14 09:56:53 +02003275bytes_mod(PyObject *self, PyObject *args)
Ethan Furmanb95b5612015-01-23 20:05:18 -08003276{
Victor Stinner772b2b02015-10-14 09:56:53 +02003277 if (self == NULL || !PyBytes_Check(self)) {
3278 PyErr_BadInternalCall();
3279 return NULL;
3280 }
3281
3282 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
3283 args, 0);
Ethan Furmanb95b5612015-01-23 20:05:18 -08003284}
3285
3286static PyNumberMethods bytes_as_number = {
3287 0, /*nb_add*/
3288 0, /*nb_subtract*/
3289 0, /*nb_multiply*/
3290 bytes_mod, /*nb_remainder*/
3291};
3292
3293static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02003294bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003295
3296static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003297bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003298{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003299 PyObject *x = NULL;
3300 const char *encoding = NULL;
3301 const char *errors = NULL;
3302 PyObject *new = NULL;
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003303 PyObject *func;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003304 Py_ssize_t size;
3305 static char *kwlist[] = {"source", "encoding", "errors", 0};
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003306 _Py_IDENTIFIER(__bytes__);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003307
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003308 if (type != &PyBytes_Type)
Serhiy Storchaka15095802015-11-25 15:47:01 +02003309 return bytes_subtype_new(type, args, kwds);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003310 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
3311 &encoding, &errors))
3312 return NULL;
3313 if (x == NULL) {
3314 if (encoding != NULL || errors != NULL) {
3315 PyErr_SetString(PyExc_TypeError,
3316 "encoding or errors without sequence "
3317 "argument");
3318 return NULL;
3319 }
Victor Stinnerdb067af2014-05-02 22:31:14 +02003320 return PyBytes_FromStringAndSize(NULL, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003321 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003322
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003323 if (encoding != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003324 /* Encode via the codec registry */
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003325 if (!PyUnicode_Check(x)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003326 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003327 "encoding without a string argument");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003328 return NULL;
3329 }
3330 new = PyUnicode_AsEncodedString(x, encoding, errors);
3331 if (new == NULL)
3332 return NULL;
3333 assert(PyBytes_Check(new));
3334 return new;
3335 }
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003336
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003337 if (errors != NULL) {
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003338 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003339 PyUnicode_Check(x) ?
3340 "string argument without an encoding" :
3341 "errors without a string argument");
Serhiy Storchaka83cf99d2014-12-02 09:24:06 +02003342 return NULL;
3343 }
3344
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003345 /* We'd like to call PyObject_Bytes here, but we need to check for an
3346 integer argument before deferring to PyBytes_FromObject, something
3347 PyObject_Bytes doesn't do. */
3348 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
3349 if (func != NULL) {
3350 new = PyObject_CallFunctionObjArgs(func, NULL);
3351 Py_DECREF(func);
3352 if (new == NULL)
3353 return NULL;
3354 if (!PyBytes_Check(new)) {
3355 PyErr_Format(PyExc_TypeError,
3356 "__bytes__ returned non-bytes (type %.200s)",
3357 Py_TYPE(new)->tp_name);
3358 Py_DECREF(new);
3359 return NULL;
3360 }
3361 return new;
3362 }
3363 else if (PyErr_Occurred())
3364 return NULL;
3365
Serhiy Storchaka5aac3ed2015-12-20 16:36:34 +02003366 if (PyUnicode_Check(x)) {
3367 PyErr_SetString(PyExc_TypeError,
3368 "string argument without an encoding");
3369 return NULL;
3370 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003371 /* Is it an integer? */
3372 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
3373 if (size == -1 && PyErr_Occurred()) {
3374 if (PyErr_ExceptionMatches(PyExc_OverflowError))
3375 return NULL;
3376 PyErr_Clear();
3377 }
3378 else if (size < 0) {
3379 PyErr_SetString(PyExc_ValueError, "negative count");
3380 return NULL;
3381 }
3382 else {
Victor Stinnerdb067af2014-05-02 22:31:14 +02003383 new = _PyBytes_FromSize(size, 1);
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003384 if (new == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003385 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003386 return new;
3387 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003388
Benjamin Peterson5ff3f732012-12-19 15:27:41 -06003389 return PyBytes_FromObject(x);
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003390}
3391
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003392static PyObject*
3393_PyBytes_FromBuffer(PyObject *x)
3394{
3395 PyObject *new;
3396 Py_buffer view;
3397
3398 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
3399 return NULL;
3400
3401 new = PyBytes_FromStringAndSize(NULL, view.len);
3402 if (!new)
3403 goto fail;
3404 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
3405 &view, view.len, 'C') < 0)
3406 goto fail;
3407 PyBuffer_Release(&view);
3408 return new;
3409
3410fail:
3411 Py_XDECREF(new);
3412 PyBuffer_Release(&view);
3413 return NULL;
3414}
3415
Victor Stinner3c50ce32015-10-14 13:50:40 +02003416#define _PyBytes_FROM_LIST_BODY(x, GET_ITEM) \
3417 do { \
3418 PyObject *bytes; \
3419 Py_ssize_t i; \
3420 Py_ssize_t value; \
3421 char *str; \
3422 PyObject *item; \
3423 \
3424 bytes = PyBytes_FromStringAndSize(NULL, Py_SIZE(x)); \
3425 if (bytes == NULL) \
3426 return NULL; \
3427 str = ((PyBytesObject *)bytes)->ob_sval; \
3428 \
3429 for (i = 0; i < Py_SIZE(x); i++) { \
3430 item = GET_ITEM((x), i); \
3431 value = PyNumber_AsSsize_t(item, PyExc_ValueError); \
3432 if (value == -1 && PyErr_Occurred()) \
3433 goto error; \
3434 \
3435 if (value < 0 || value >= 256) { \
3436 PyErr_SetString(PyExc_ValueError, \
3437 "bytes must be in range(0, 256)"); \
3438 goto error; \
3439 } \
3440 *str++ = (char) value; \
3441 } \
3442 return bytes; \
3443 \
3444 error: \
3445 Py_DECREF(bytes); \
3446 return NULL; \
3447 } while (0)
3448
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003449static PyObject*
3450_PyBytes_FromList(PyObject *x)
3451{
Victor Stinner3c50ce32015-10-14 13:50:40 +02003452 _PyBytes_FROM_LIST_BODY(x, PyList_GET_ITEM);
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003453}
3454
3455static PyObject*
3456_PyBytes_FromTuple(PyObject *x)
3457{
Victor Stinner3c50ce32015-10-14 13:50:40 +02003458 _PyBytes_FROM_LIST_BODY(x, PyTuple_GET_ITEM);
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003459}
3460
3461static PyObject *
3462_PyBytes_FromIterator(PyObject *x)
Benjamin Petersonc15a0732008-08-26 16:46:47 +00003463{
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003464 char *str;
3465 PyObject *it;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003466 Py_ssize_t i, size;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003467 _PyBytesWriter writer;
3468
3469 _PyBytesWriter_Init(&writer);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003470
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003471 /* For iterator version, create a string object and resize as needed */
Armin Ronacheraa9a79d2012-10-06 14:03:24 +02003472 size = PyObject_LengthHint(x, 64);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003473 if (size == -1 && PyErr_Occurred())
3474 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003475
3476 str = _PyBytesWriter_Alloc(&writer, size);
3477 if (str == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003478 return NULL;
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003479 writer.overallocate = 1;
3480 size = writer.allocated;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003481
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003482 /* Get the iterator */
3483 it = PyObject_GetIter(x);
3484 if (it == NULL)
3485 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003486
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003487 /* Run the iterator to exhaustion */
3488 for (i = 0; ; i++) {
3489 PyObject *item;
3490 Py_ssize_t value;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003491
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003492 /* Get the next item */
3493 item = PyIter_Next(it);
3494 if (item == NULL) {
3495 if (PyErr_Occurred())
3496 goto error;
3497 break;
3498 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003499
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003500 /* Interpret it as an int (__index__) */
3501 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
3502 Py_DECREF(item);
3503 if (value == -1 && PyErr_Occurred())
3504 goto error;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003505
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003506 /* Range check */
3507 if (value < 0 || value >= 256) {
3508 PyErr_SetString(PyExc_ValueError,
3509 "bytes must be in range(0, 256)");
3510 goto error;
3511 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003512
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003513 /* Append the byte */
3514 if (i >= size) {
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003515 str = _PyBytesWriter_Resize(&writer, str, size+1);
3516 if (str == NULL)
3517 return NULL;
3518 size = writer.allocated;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003519 }
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003520 *str++ = (char) value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003521 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003522 Py_DECREF(it);
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003523
3524 return _PyBytesWriter_Finish(&writer, str);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003525
3526 error:
Victor Stinnerc3d2bc12015-10-14 14:15:49 +02003527 _PyBytesWriter_Dealloc(&writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003528 Py_XDECREF(it);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003529 return NULL;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003530}
3531
Victor Stinnerf2eafa32015-10-14 13:44:29 +02003532PyObject *
3533PyBytes_FromObject(PyObject *x)
3534{
3535 if (x == NULL) {
3536 PyErr_BadInternalCall();
3537 return NULL;
3538 }
3539
3540 if (PyBytes_CheckExact(x)) {
3541 Py_INCREF(x);
3542 return x;
3543 }
3544
3545 /* Use the modern buffer interface */
3546 if (PyObject_CheckBuffer(x))
3547 return _PyBytes_FromBuffer(x);
3548
3549 if (PyList_CheckExact(x))
3550 return _PyBytes_FromList(x);
3551
3552 if (PyTuple_CheckExact(x))
3553 return _PyBytes_FromTuple(x);
3554
3555 if (PyUnicode_Check(x)) {
3556 PyErr_SetString(PyExc_TypeError,
3557 "cannot convert unicode object to bytes");
3558 return NULL;
3559 }
3560
3561 return _PyBytes_FromIterator(x);
3562}
3563
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003564static PyObject *
Serhiy Storchaka15095802015-11-25 15:47:01 +02003565bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003566{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003567 PyObject *tmp, *pnew;
3568 Py_ssize_t n;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003569
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003570 assert(PyType_IsSubtype(type, &PyBytes_Type));
3571 tmp = bytes_new(&PyBytes_Type, args, kwds);
3572 if (tmp == NULL)
3573 return NULL;
Serhiy Storchaka15095802015-11-25 15:47:01 +02003574 assert(PyBytes_Check(tmp));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003575 n = PyBytes_GET_SIZE(tmp);
3576 pnew = type->tp_alloc(type, n);
3577 if (pnew != NULL) {
3578 Py_MEMCPY(PyBytes_AS_STRING(pnew),
3579 PyBytes_AS_STRING(tmp), n+1);
3580 ((PyBytesObject *)pnew)->ob_shash =
3581 ((PyBytesObject *)tmp)->ob_shash;
3582 }
3583 Py_DECREF(tmp);
3584 return pnew;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003585}
3586
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003587PyDoc_STRVAR(bytes_doc,
Georg Brandl17cb8a82008-05-30 08:20:09 +00003588"bytes(iterable_of_ints) -> bytes\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003589bytes(string, encoding[, errors]) -> bytes\n\
Georg Brandl17cb8a82008-05-30 08:20:09 +00003590bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003591bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
3592bytes() -> empty bytes object\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003593\n\
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003594Construct an immutable array of bytes from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003595 - an iterable yielding integers in range(256)\n\
3596 - a text string encoded using the specified encoding\n\
Victor Stinnerbb2e9c42011-12-17 23:18:07 +01003597 - any object implementing the buffer API.\n\
3598 - an integer");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003599
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003600static PyObject *bytes_iter(PyObject *seq);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003601
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003602PyTypeObject PyBytes_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003603 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3604 "bytes",
3605 PyBytesObject_SIZE,
3606 sizeof(char),
3607 bytes_dealloc, /* tp_dealloc */
3608 0, /* tp_print */
3609 0, /* tp_getattr */
3610 0, /* tp_setattr */
3611 0, /* tp_reserved */
3612 (reprfunc)bytes_repr, /* tp_repr */
Ethan Furmanb95b5612015-01-23 20:05:18 -08003613 &bytes_as_number, /* tp_as_number */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003614 &bytes_as_sequence, /* tp_as_sequence */
3615 &bytes_as_mapping, /* tp_as_mapping */
3616 (hashfunc)bytes_hash, /* tp_hash */
3617 0, /* tp_call */
3618 bytes_str, /* tp_str */
3619 PyObject_GenericGetAttr, /* tp_getattro */
3620 0, /* tp_setattro */
3621 &bytes_as_buffer, /* tp_as_buffer */
3622 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3623 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
3624 bytes_doc, /* tp_doc */
3625 0, /* tp_traverse */
3626 0, /* tp_clear */
3627 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3628 0, /* tp_weaklistoffset */
3629 bytes_iter, /* tp_iter */
3630 0, /* tp_iternext */
3631 bytes_methods, /* tp_methods */
3632 0, /* tp_members */
3633 0, /* tp_getset */
3634 &PyBaseObject_Type, /* tp_base */
3635 0, /* tp_dict */
3636 0, /* tp_descr_get */
3637 0, /* tp_descr_set */
3638 0, /* tp_dictoffset */
3639 0, /* tp_init */
3640 0, /* tp_alloc */
3641 bytes_new, /* tp_new */
3642 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003643};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003644
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003645void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003646PyBytes_Concat(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003647{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003648 assert(pv != NULL);
3649 if (*pv == NULL)
3650 return;
3651 if (w == NULL) {
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003652 Py_CLEAR(*pv);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003653 return;
3654 }
Antoine Pitrou161d6952014-05-01 14:36:20 +02003655
3656 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
3657 /* Only one reference, so we can resize in place */
Zachary Warebca96942014-05-06 11:42:37 -05003658 Py_ssize_t oldsize;
Antoine Pitrou161d6952014-05-01 14:36:20 +02003659 Py_buffer wb;
Victor Stinner049e5092014-08-17 22:20:00 +02003660
Antoine Pitrou161d6952014-05-01 14:36:20 +02003661 wb.len = -1;
Serhiy Storchaka3dd3e262015-02-03 01:25:42 +02003662 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
Antoine Pitrou161d6952014-05-01 14:36:20 +02003663 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
3664 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
3665 Py_CLEAR(*pv);
3666 return;
3667 }
3668
3669 oldsize = PyBytes_GET_SIZE(*pv);
3670 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3671 PyErr_NoMemory();
3672 goto error;
3673 }
3674 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3675 goto error;
3676
3677 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3678 PyBuffer_Release(&wb);
3679 return;
3680
3681 error:
3682 PyBuffer_Release(&wb);
3683 Py_CLEAR(*pv);
3684 return;
3685 }
3686
3687 else {
3688 /* Multiple references, need to create new object */
3689 PyObject *v;
3690 v = bytes_concat(*pv, w);
Serhiy Storchaka5a57ade2015-12-24 10:35:59 +02003691 Py_SETREF(*pv, v);
Antoine Pitrou161d6952014-05-01 14:36:20 +02003692 }
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003693}
3694
3695void
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003696PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003697{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003698 PyBytes_Concat(pv, w);
3699 Py_XDECREF(w);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003700}
3701
3702
Ethan Furmanb95b5612015-01-23 20:05:18 -08003703/* The following function breaks the notion that bytes are immutable:
3704 it changes the size of a bytes object. We get away with this only if there
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003705 is only one module referencing the object. You can also think of it
Ethan Furmanb95b5612015-01-23 20:05:18 -08003706 as creating a new bytes object and destroying the old one, only
3707 more efficiently. In any case, don't use this if the bytes object may
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003708 already be known to some other part of the code...
Ethan Furmanb95b5612015-01-23 20:05:18 -08003709 Note that if there's not enough memory to resize the bytes object, the
3710 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003711 memory" exception is set, and -1 is returned. Else (on success) 0 is
3712 returned, and the value in *pv may or may not be the same as on input.
3713 As always, an extra byte is allocated for a trailing \0 byte (newsize
3714 does *not* include that), and a trailing \0 byte is stored.
3715*/
3716
3717int
3718_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3719{
Antoine Pitrou9ed5f272013-08-13 20:18:52 +02003720 PyObject *v;
3721 PyBytesObject *sv;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003722 v = *pv;
3723 if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
3724 *pv = 0;
3725 Py_DECREF(v);
3726 PyErr_BadInternalCall();
3727 return -1;
3728 }
3729 /* XXX UNREF/NEWREF interface should be more symmetrical */
3730 _Py_DEC_REFTOTAL;
3731 _Py_ForgetReference(v);
3732 *pv = (PyObject *)
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03003733 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003734 if (*pv == NULL) {
3735 PyObject_Del(v);
3736 PyErr_NoMemory();
3737 return -1;
3738 }
3739 _Py_NewReference(*pv);
3740 sv = (PyBytesObject *) *pv;
3741 Py_SIZE(sv) = newsize;
3742 sv->ob_sval[newsize] = '\0';
3743 sv->ob_shash = -1; /* invalidate cached hash value */
3744 return 0;
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003745}
3746
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003747void
3748PyBytes_Fini(void)
3749{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003750 int i;
Serhiy Storchakaf458a032013-02-02 18:45:22 +02003751 for (i = 0; i < UCHAR_MAX + 1; i++)
3752 Py_CLEAR(characters[i]);
3753 Py_CLEAR(nullstring);
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003754}
3755
Benjamin Peterson4116f362008-05-27 00:36:20 +00003756/*********************** Bytes Iterator ****************************/
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003757
3758typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003759 PyObject_HEAD
3760 Py_ssize_t it_index;
3761 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003762} striterobject;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003763
3764static void
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003765striter_dealloc(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003766{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003767 _PyObject_GC_UNTRACK(it);
3768 Py_XDECREF(it->it_seq);
3769 PyObject_GC_Del(it);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003770}
3771
3772static int
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003773striter_traverse(striterobject *it, visitproc visit, void *arg)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003774{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003775 Py_VISIT(it->it_seq);
3776 return 0;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003777}
3778
3779static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003780striter_next(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003781{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003782 PyBytesObject *seq;
3783 PyObject *item;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003784
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003785 assert(it != NULL);
3786 seq = it->it_seq;
3787 if (seq == NULL)
3788 return NULL;
3789 assert(PyBytes_Check(seq));
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003790
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003791 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3792 item = PyLong_FromLong(
3793 (unsigned char)seq->ob_sval[it->it_index]);
3794 if (item != NULL)
3795 ++it->it_index;
3796 return item;
3797 }
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003798
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003799 Py_DECREF(seq);
3800 it->it_seq = NULL;
3801 return NULL;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003802}
3803
3804static PyObject *
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003805striter_len(striterobject *it)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003806{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003807 Py_ssize_t len = 0;
3808 if (it->it_seq)
3809 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3810 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003811}
3812
3813PyDoc_STRVAR(length_hint_doc,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003814 "Private method returning an estimate of len(list(it)).");
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003815
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003816static PyObject *
3817striter_reduce(striterobject *it)
3818{
3819 if (it->it_seq != NULL) {
Antoine Pitroua7013882012-04-05 00:04:20 +02003820 return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003821 it->it_seq, it->it_index);
3822 } else {
3823 PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3824 if (u == NULL)
3825 return NULL;
Antoine Pitroua7013882012-04-05 00:04:20 +02003826 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003827 }
3828}
3829
3830PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3831
3832static PyObject *
3833striter_setstate(striterobject *it, PyObject *state)
3834{
3835 Py_ssize_t index = PyLong_AsSsize_t(state);
3836 if (index == -1 && PyErr_Occurred())
3837 return NULL;
Kristján Valur Jónsson25dded02014-03-05 13:47:57 +00003838 if (it->it_seq != NULL) {
3839 if (index < 0)
3840 index = 0;
3841 else if (index > PyBytes_GET_SIZE(it->it_seq))
3842 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3843 it->it_index = index;
3844 }
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003845 Py_RETURN_NONE;
3846}
3847
3848PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3849
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003850static PyMethodDef striter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003851 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3852 length_hint_doc},
Kristján Valur Jónsson31668b82012-04-03 10:49:41 +00003853 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3854 reduce_doc},
3855 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3856 setstate_doc},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003857 {NULL, NULL} /* sentinel */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003858};
3859
Christian Heimes2c9c7a52008-05-26 13:42:13 +00003860PyTypeObject PyBytesIter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003861 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3862 "bytes_iterator", /* tp_name */
3863 sizeof(striterobject), /* tp_basicsize */
3864 0, /* tp_itemsize */
3865 /* methods */
3866 (destructor)striter_dealloc, /* tp_dealloc */
3867 0, /* tp_print */
3868 0, /* tp_getattr */
3869 0, /* tp_setattr */
3870 0, /* tp_reserved */
3871 0, /* tp_repr */
3872 0, /* tp_as_number */
3873 0, /* tp_as_sequence */
3874 0, /* tp_as_mapping */
3875 0, /* tp_hash */
3876 0, /* tp_call */
3877 0, /* tp_str */
3878 PyObject_GenericGetAttr, /* tp_getattro */
3879 0, /* tp_setattro */
3880 0, /* tp_as_buffer */
3881 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3882 0, /* tp_doc */
3883 (traverseproc)striter_traverse, /* tp_traverse */
3884 0, /* tp_clear */
3885 0, /* tp_richcompare */
3886 0, /* tp_weaklistoffset */
3887 PyObject_SelfIter, /* tp_iter */
3888 (iternextfunc)striter_next, /* tp_iternext */
3889 striter_methods, /* tp_methods */
3890 0,
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003891};
3892
3893static PyObject *
Benjamin Peterson80688ef2009-04-18 15:17:02 +00003894bytes_iter(PyObject *seq)
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003895{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003896 striterobject *it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003897
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00003898 if (!PyBytes_Check(seq)) {
3899 PyErr_BadInternalCall();
3900 return NULL;
3901 }
3902 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3903 if (it == NULL)
3904 return NULL;
3905 it->it_index = 0;
3906 Py_INCREF(seq);
3907 it->it_seq = (PyBytesObject *)seq;
3908 _PyObject_GC_TRACK(it);
3909 return (PyObject *)it;
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003910}
Victor Stinner00165072015-10-09 01:53:21 +02003911
3912
3913/* _PyBytesWriter API */
3914
3915#ifdef MS_WINDOWS
3916 /* On Windows, overallocate by 50% is the best factor */
3917# define OVERALLOCATE_FACTOR 2
3918#else
3919 /* On Linux, overallocate by 25% is the best factor */
3920# define OVERALLOCATE_FACTOR 4
3921#endif
3922
3923void
3924_PyBytesWriter_Init(_PyBytesWriter *writer)
3925{
Victor Stinner661aacc2015-10-14 09:41:48 +02003926 /* Set all attributes before small_buffer to 0 */
3927 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003928#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02003929 memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02003930#endif
3931}
3932
3933void
3934_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3935{
3936 Py_CLEAR(writer->buffer);
3937}
3938
3939Py_LOCAL_INLINE(char*)
3940_PyBytesWriter_AsString(_PyBytesWriter *writer)
3941{
Victor Stinner661aacc2015-10-14 09:41:48 +02003942 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003943 assert(writer->buffer == NULL);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003944 return writer->small_buffer;
Victor Stinner00165072015-10-09 01:53:21 +02003945 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003946 else if (writer->use_bytearray) {
3947 assert(writer->buffer != NULL);
3948 return PyByteArray_AS_STRING(writer->buffer);
3949 }
3950 else {
3951 assert(writer->buffer != NULL);
3952 return PyBytes_AS_STRING(writer->buffer);
3953 }
Victor Stinner00165072015-10-09 01:53:21 +02003954}
3955
3956Py_LOCAL_INLINE(Py_ssize_t)
Victor Stinner2bf89932015-10-14 11:25:33 +02003957_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
Victor Stinner00165072015-10-09 01:53:21 +02003958{
3959 char *start = _PyBytesWriter_AsString(writer);
3960 assert(str != NULL);
3961 assert(str >= start);
Victor Stinnerb3653a32015-10-09 03:38:24 +02003962 assert(str - start <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003963 return str - start;
3964}
3965
3966Py_LOCAL_INLINE(void)
3967_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3968{
3969#ifdef Py_DEBUG
3970 char *start, *end;
3971
Victor Stinner661aacc2015-10-14 09:41:48 +02003972 if (writer->use_small_buffer) {
Victor Stinner00165072015-10-09 01:53:21 +02003973 assert(writer->buffer == NULL);
3974 }
Victor Stinner661aacc2015-10-14 09:41:48 +02003975 else {
3976 assert(writer->buffer != NULL);
3977 if (writer->use_bytearray)
3978 assert(PyByteArray_CheckExact(writer->buffer));
3979 else
3980 assert(PyBytes_CheckExact(writer->buffer));
3981 assert(Py_REFCNT(writer->buffer) == 1);
3982 }
Victor Stinner00165072015-10-09 01:53:21 +02003983
Victor Stinner661aacc2015-10-14 09:41:48 +02003984 if (writer->use_bytearray) {
3985 /* bytearray has its own overallocation algorithm,
3986 writer overallocation must be disabled */
3987 assert(!writer->overallocate);
3988 }
3989
3990 assert(0 <= writer->allocated);
Victor Stinner53926a12015-10-09 12:37:03 +02003991 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
Victor Stinner00165072015-10-09 01:53:21 +02003992 /* the last byte must always be null */
Victor Stinner661aacc2015-10-14 09:41:48 +02003993 start = _PyBytesWriter_AsString(writer);
Victor Stinner00165072015-10-09 01:53:21 +02003994 assert(start[writer->allocated] == 0);
3995
3996 end = start + writer->allocated;
3997 assert(str != NULL);
3998 assert(start <= str && str <= end);
3999#endif
4000}
4001
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004002void*
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02004003_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
Victor Stinner00165072015-10-09 01:53:21 +02004004{
4005 Py_ssize_t allocated, pos;
4006
4007 _PyBytesWriter_CheckConsistency(writer, str);
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02004008 assert(writer->allocated < size);
Victor Stinner00165072015-10-09 01:53:21 +02004009
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02004010 allocated = size;
Victor Stinner00165072015-10-09 01:53:21 +02004011 if (writer->overallocate
4012 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
4013 /* overallocate to limit the number of realloc() */
4014 allocated += allocated / OVERALLOCATE_FACTOR;
4015 }
4016
Victor Stinner2bf89932015-10-14 11:25:33 +02004017 pos = _PyBytesWriter_GetSize(writer, str);
Victor Stinnerb3653a32015-10-09 03:38:24 +02004018 if (!writer->use_small_buffer) {
Victor Stinner661aacc2015-10-14 09:41:48 +02004019 if (writer->use_bytearray) {
4020 if (PyByteArray_Resize(writer->buffer, allocated))
4021 goto error;
4022 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
4023 but we cannot use ob_alloc because bytes may need to be moved
4024 to use the whole buffer. bytearray uses an internal optimization
4025 to avoid moving or copying bytes when bytes are removed at the
4026 beginning (ex: del bytearray[:1]). */
4027 }
4028 else {
4029 if (_PyBytes_Resize(&writer->buffer, allocated))
4030 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02004031 }
4032 }
4033 else {
4034 /* convert from stack buffer to bytes object buffer */
4035 assert(writer->buffer == NULL);
4036
Victor Stinner661aacc2015-10-14 09:41:48 +02004037 if (writer->use_bytearray)
4038 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
4039 else
4040 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
Victor Stinner00165072015-10-09 01:53:21 +02004041 if (writer->buffer == NULL)
Victor Stinner661aacc2015-10-14 09:41:48 +02004042 goto error;
Victor Stinner00165072015-10-09 01:53:21 +02004043
4044 if (pos != 0) {
Victor Stinner661aacc2015-10-14 09:41:48 +02004045 char *dest;
4046 if (writer->use_bytearray)
4047 dest = PyByteArray_AS_STRING(writer->buffer);
4048 else
4049 dest = PyBytes_AS_STRING(writer->buffer);
4050 Py_MEMCPY(dest,
Victor Stinnerb3653a32015-10-09 03:38:24 +02004051 writer->small_buffer,
Victor Stinner00165072015-10-09 01:53:21 +02004052 pos);
4053 }
4054
Victor Stinnerb3653a32015-10-09 03:38:24 +02004055 writer->use_small_buffer = 0;
Victor Stinner00165072015-10-09 01:53:21 +02004056#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02004057 memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
Victor Stinner00165072015-10-09 01:53:21 +02004058#endif
Victor Stinner00165072015-10-09 01:53:21 +02004059 }
4060 writer->allocated = allocated;
4061
4062 str = _PyBytesWriter_AsString(writer) + pos;
4063 _PyBytesWriter_CheckConsistency(writer, str);
4064 return str;
Victor Stinner661aacc2015-10-14 09:41:48 +02004065
4066error:
4067 _PyBytesWriter_Dealloc(writer);
4068 return NULL;
Victor Stinner00165072015-10-09 01:53:21 +02004069}
4070
Victor Stinnerc5c3ba42015-10-14 13:56:47 +02004071void*
4072_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
4073{
4074 Py_ssize_t new_min_size;
4075
4076 _PyBytesWriter_CheckConsistency(writer, str);
4077 assert(size >= 0);
4078
4079 if (size == 0) {
4080 /* nothing to do */
4081 return str;
4082 }
4083
4084 if (writer->min_size > PY_SSIZE_T_MAX - size) {
4085 PyErr_NoMemory();
4086 _PyBytesWriter_Dealloc(writer);
4087 return NULL;
4088 }
4089 new_min_size = writer->min_size + size;
4090
4091 if (new_min_size > writer->allocated)
4092 str = _PyBytesWriter_Resize(writer, str, new_min_size);
4093
4094 writer->min_size = new_min_size;
4095 return str;
4096}
4097
Victor Stinner00165072015-10-09 01:53:21 +02004098/* Allocate the buffer to write size bytes.
4099 Return the pointer to the beginning of buffer data.
4100 Raise an exception and return NULL on error. */
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004101void*
Victor Stinner00165072015-10-09 01:53:21 +02004102_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
4103{
4104 /* ensure that _PyBytesWriter_Alloc() is only called once */
Victor Stinner53926a12015-10-09 12:37:03 +02004105 assert(writer->min_size == 0 && writer->buffer == NULL);
Victor Stinner00165072015-10-09 01:53:21 +02004106 assert(size >= 0);
4107
Victor Stinnerb3653a32015-10-09 03:38:24 +02004108 writer->use_small_buffer = 1;
Victor Stinnerb13b97d2015-10-09 02:52:16 +02004109#ifdef Py_DEBUG
Victor Stinnerb3653a32015-10-09 03:38:24 +02004110 writer->allocated = sizeof(writer->small_buffer) - 1;
Victor Stinnerf6358a72015-10-14 12:02:39 +02004111 /* In debug mode, don't use the full small buffer because it is less
4112 efficient than bytes and bytearray objects to detect buffer underflow
4113 and buffer overflow. Use 10 bytes of the small buffer to test also
4114 code using the smaller buffer in debug mode.
4115
4116 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
4117 in debug mode to also be able to detect stack overflow when running
4118 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
4119 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
4120 stack overflow. */
4121 writer->allocated = Py_MIN(writer->allocated, 10);
4122 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
4123 to detect buffer overflow */
Victor Stinnerb3653a32015-10-09 03:38:24 +02004124 writer->small_buffer[writer->allocated] = 0;
Victor Stinner00165072015-10-09 01:53:21 +02004125#else
Victor Stinnerb3653a32015-10-09 03:38:24 +02004126 writer->allocated = sizeof(writer->small_buffer);
Victor Stinner00165072015-10-09 01:53:21 +02004127#endif
Victor Stinnerb3653a32015-10-09 03:38:24 +02004128 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
Victor Stinner00165072015-10-09 01:53:21 +02004129}
4130
4131PyObject *
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004132_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
Victor Stinner00165072015-10-09 01:53:21 +02004133{
Victor Stinner2bf89932015-10-14 11:25:33 +02004134 Py_ssize_t size;
Victor Stinner00165072015-10-09 01:53:21 +02004135 PyObject *result;
4136
4137 _PyBytesWriter_CheckConsistency(writer, str);
4138
Victor Stinner2bf89932015-10-14 11:25:33 +02004139 size = _PyBytesWriter_GetSize(writer, str);
4140 if (size == 0 && !writer->use_bytearray) {
Victor Stinner6c2cdae2015-10-12 13:29:43 +02004141 Py_CLEAR(writer->buffer);
4142 /* Get the empty byte string singleton */
4143 result = PyBytes_FromStringAndSize(NULL, 0);
4144 }
4145 else if (writer->use_small_buffer) {
Victor Stinner2bf89932015-10-14 11:25:33 +02004146 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
Victor Stinner6c2cdae2015-10-12 13:29:43 +02004147 }
4148 else {
4149 result = writer->buffer;
4150 writer->buffer = NULL;
4151
Victor Stinner2bf89932015-10-14 11:25:33 +02004152 if (size != writer->allocated) {
Victor Stinner661aacc2015-10-14 09:41:48 +02004153 if (writer->use_bytearray) {
Victor Stinner2bf89932015-10-14 11:25:33 +02004154 if (PyByteArray_Resize(result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02004155 Py_DECREF(result);
4156 return NULL;
4157 }
4158 }
4159 else {
Victor Stinner2bf89932015-10-14 11:25:33 +02004160 if (_PyBytes_Resize(&result, size)) {
Victor Stinner661aacc2015-10-14 09:41:48 +02004161 assert(result == NULL);
4162 return NULL;
4163 }
Victor Stinner00165072015-10-09 01:53:21 +02004164 }
4165 }
Victor Stinner00165072015-10-09 01:53:21 +02004166 }
Victor Stinner00165072015-10-09 01:53:21 +02004167 return result;
4168}
Victor Stinnerce179bf2015-10-09 12:57:22 +02004169
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004170void*
Victor Stinnere9aa5952015-10-12 13:57:47 +02004171_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
Victor Stinnerc29e29b2015-10-12 13:12:54 +02004172 const void *bytes, Py_ssize_t size)
Victor Stinnerce179bf2015-10-09 12:57:22 +02004173{
Victor Stinnere9aa5952015-10-12 13:57:47 +02004174 char *str = (char *)ptr;
4175
Victor Stinnerce179bf2015-10-09 12:57:22 +02004176 str = _PyBytesWriter_Prepare(writer, str, size);
4177 if (str == NULL)
4178 return NULL;
4179
4180 Py_MEMCPY(str, bytes, size);
4181 str += size;
4182
4183 return str;
4184}